From 3431ac42de470a4bb73f1c6852a5ccffc07da7b1 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 21 Dec 2019 21:17:45 +0100 Subject: [PATCH 01/49] Fix typo --- spacy/tests/regression/test_issue4674.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/tests/regression/test_issue4674.py b/spacy/tests/regression/test_issue4674.py index 8d0c32eaa..8fa4f9259 100644 --- a/spacy/tests/regression/test_issue4674.py +++ b/spacy/tests/regression/test_issue4674.py @@ -6,7 +6,7 @@ from spacy.kb import KnowledgeBase from spacy.util import ensure_path from spacy.lang.en import English -from ..tests.util import make_tempdir +from ..util import make_tempdir def test_issue4674(): From fd4a7bd2b76081f31ad7e12a25f6ba028792f661 Mon Sep 17 00:00:00 2001 From: Al Johri Date: Sun, 29 Dec 2019 08:17:28 -0500 Subject: [PATCH 02/49] sign contributor agreement for AlJohri (#4839) [ci skip] --- .github/contributors/AlJohri.md | 106 ++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 .github/contributors/AlJohri.md diff --git a/.github/contributors/AlJohri.md b/.github/contributors/AlJohri.md new file mode 100644 index 000000000..4b2797ab0 --- /dev/null +++ b/.github/contributors/AlJohri.md @@ -0,0 +1,106 @@ +# spaCy contributor agreement + +This spaCy Contributor Agreement (**"SCA"**) is based on the +[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf). +The SCA applies to any contribution that you make to any product or project +managed by us (the **"project"**), and sets out the intellectual property rights +you grant to us in the contributed materials. The term **"us"** shall mean +[ExplosionAI GmbH](https://explosion.ai/legal). The term +**"you"** shall mean the person or entity identified below. + +If you agree to be bound by these terms, fill in the information requested +below and include the filled-in version with your first pull request, under the +folder [`.github/contributors/`](/.github/contributors/). The name of the file +should be your GitHub username, with the extension `.md`. For example, the user +example_user would create the file `.github/contributors/example_user.md`. + +Read this agreement carefully before signing. These terms and conditions +constitute a binding legal agreement. + +## Contributor Agreement + +1. The term "contribution" or "contributed materials" means any source code, +object code, patch, tool, sample, graphic, specification, manual, +documentation, or any other material posted or submitted by you to the project. + +2. With respect to any worldwide copyrights, or copyright applications and +registrations, in your contribution: + + * you hereby assign to us joint ownership, and to the extent that such + assignment is or becomes invalid, ineffective or unenforceable, you hereby + grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, + royalty-free, unrestricted license to exercise all rights under those + copyrights. This includes, at our option, the right to sublicense these same + rights to third parties through multiple levels of sublicensees or other + licensing arrangements; + + * you agree that each of us can do all things in relation to your + contribution as if each of us were the sole owners, and if one of us makes + a derivative work of your contribution, the one who makes the derivative + work (or has it made will be the sole owner of that derivative work; + + * you agree that you will not assert any moral rights in your contribution + against us, our licensees or transferees; + + * you agree that we may register a copyright in your contribution and + exercise all ownership rights associated with it; and + + * you agree that neither of us has any duty to consult with, obtain the + consent of, pay or render an accounting to the other for any use or + distribution of your contribution. + +3. With respect to any patents you own, or that you can license without payment +to any third party, you hereby grant to us a perpetual, irrevocable, +non-exclusive, worldwide, no-charge, royalty-free license to: + + * make, have made, use, sell, offer to sell, import, and otherwise transfer + your contribution in whole or in part, alone or in combination with or + included in any product, work or materials arising out of the project to + which your contribution was submitted, and + + * at our option, to sublicense these same rights to third parties through + multiple levels of sublicensees or other licensing arrangements. + +4. Except as set out above, you keep all right, title, and interest in your +contribution. The rights that you grant to us under these terms are effective +on the date you first submitted a contribution to us, even if your submission +took place before the date you sign these terms. + +5. You covenant, represent, warrant and agree that: + + * Each contribution that you submit is and shall be an original work of + authorship and you can legally grant the rights set out in this SCA; + + * to the best of your knowledge, each contribution will not violate any + third party's copyrights, trademarks, patents, or other intellectual + property rights; and + + * each contribution shall be in compliance with U.S. export control laws and + other applicable export and import laws. You agree to notify us if you + become aware of any circumstance which would make any of the foregoing + representations inaccurate in any respect. We may publicly disclose your + participation in the project, including the fact that you have signed the SCA. + +6. This SCA is governed by the laws of the State of California and applicable +U.S. Federal law. Any choice of law rules will not apply. + +7. Please place an “x” on one of the applicable statement below. Please do NOT +mark both statements: + + * [x] I am signing on behalf of myself as an individual and no other person + or entity, including my employer, has or will have rights with respect to my + contributions. + + * [ ] I am signing on behalf of my employer or a legal entity and I have the + actual authority to contractually bind that entity. + +## Contributor Details + +| Field | Entry | +|------------------------------- | -------------------- | +| Name | Al Johri | +| Company name (if applicable) | N/A | +| Title or role (if applicable) | N/A | +| Date | December 27th, 2019 | +| GitHub username | AlJohri | +| Website (optional) | http://aljohri.com/ | From ef13e0c038c633d938891b6cedec898a9c4f3fff Mon Sep 17 00:00:00 2001 From: Ivan Echevarria Date: Sun, 29 Dec 2019 05:23:33 -0800 Subject: [PATCH 03/49] Add n_process to Language.pipe documentation (#4842) [ci skip] * Add n_process to documentation * Auto-format and add default [ci skip] Co-authored-by: Ines Montani --- .github/contributors/iechevarria.md | 106 ++++++++++++++++++++++++++++ spacy/errors.py | 7 +- website/docs/api/language.md | 7 +- 3 files changed, 113 insertions(+), 7 deletions(-) create mode 100644 .github/contributors/iechevarria.md diff --git a/.github/contributors/iechevarria.md b/.github/contributors/iechevarria.md new file mode 100644 index 000000000..f0c05efc9 --- /dev/null +++ b/.github/contributors/iechevarria.md @@ -0,0 +1,106 @@ +# spaCy contributor agreement + +This spaCy Contributor Agreement (**"SCA"**) is based on the +[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf). +The SCA applies to any contribution that you make to any product or project +managed by us (the **"project"**), and sets out the intellectual property rights +you grant to us in the contributed materials. The term **"us"** shall mean +[ExplosionAI GmbH](https://explosion.ai/legal). The term +**"you"** shall mean the person or entity identified below. + +If you agree to be bound by these terms, fill in the information requested +below and include the filled-in version with your first pull request, under the +folder [`.github/contributors/`](/.github/contributors/). The name of the file +should be your GitHub username, with the extension `.md`. For example, the user +example_user would create the file `.github/contributors/example_user.md`. + +Read this agreement carefully before signing. These terms and conditions +constitute a binding legal agreement. + +## Contributor Agreement + +1. The term "contribution" or "contributed materials" means any source code, +object code, patch, tool, sample, graphic, specification, manual, +documentation, or any other material posted or submitted by you to the project. + +2. With respect to any worldwide copyrights, or copyright applications and +registrations, in your contribution: + + * you hereby assign to us joint ownership, and to the extent that such + assignment is or becomes invalid, ineffective or unenforceable, you hereby + grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, + royalty-free, unrestricted license to exercise all rights under those + copyrights. This includes, at our option, the right to sublicense these same + rights to third parties through multiple levels of sublicensees or other + licensing arrangements; + + * you agree that each of us can do all things in relation to your + contribution as if each of us were the sole owners, and if one of us makes + a derivative work of your contribution, the one who makes the derivative + work (or has it made will be the sole owner of that derivative work; + + * you agree that you will not assert any moral rights in your contribution + against us, our licensees or transferees; + + * you agree that we may register a copyright in your contribution and + exercise all ownership rights associated with it; and + + * you agree that neither of us has any duty to consult with, obtain the + consent of, pay or render an accounting to the other for any use or + distribution of your contribution. + +3. With respect to any patents you own, or that you can license without payment +to any third party, you hereby grant to us a perpetual, irrevocable, +non-exclusive, worldwide, no-charge, royalty-free license to: + + * make, have made, use, sell, offer to sell, import, and otherwise transfer + your contribution in whole or in part, alone or in combination with or + included in any product, work or materials arising out of the project to + which your contribution was submitted, and + + * at our option, to sublicense these same rights to third parties through + multiple levels of sublicensees or other licensing arrangements. + +4. Except as set out above, you keep all right, title, and interest in your +contribution. The rights that you grant to us under these terms are effective +on the date you first submitted a contribution to us, even if your submission +took place before the date you sign these terms. + +5. You covenant, represent, warrant and agree that: + + * Each contribution that you submit is and shall be an original work of + authorship and you can legally grant the rights set out in this SCA; + + * to the best of your knowledge, each contribution will not violate any + third party's copyrights, trademarks, patents, or other intellectual + property rights; and + + * each contribution shall be in compliance with U.S. export control laws and + other applicable export and import laws. You agree to notify us if you + become aware of any circumstance which would make any of the foregoing + representations inaccurate in any respect. We may publicly disclose your + participation in the project, including the fact that you have signed the SCA. + +6. This SCA is governed by the laws of the State of California and applicable +U.S. Federal law. Any choice of law rules will not apply. + +7. Please place an “x” on one of the applicable statement below. Please do NOT +mark both statements: + + * [x] I am signing on behalf of myself as an individual and no other person + or entity, including my employer, has or will have rights with respect to my + contributions. + + * [ ] I am signing on behalf of my employer or a legal entity and I have the + actual authority to contractually bind that entity. + +## Contributor Details + +| Field | Entry | +|------------------------------- | --------------------- | +| Name | Ivan Echevarria | +| Company name (if applicable) | | +| Title or role (if applicable) | | +| Date | 2019-12-24 | +| GitHub username | iechevarria | +| Website (optional) | https://echevarria.io | diff --git a/spacy/errors.py b/spacy/errors.py index ce35d706c..fd0f66cd9 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -78,10 +78,9 @@ class Warnings(object): W015 = ("As of v2.1.0, the use of keyword arguments to exclude fields from " "being serialized or deserialized is deprecated. Please use the " "`exclude` argument instead. For example: exclude=['{arg}'].") - W016 = ("The keyword argument `n_threads` on the is now deprecated, as " - "the v2.x models cannot release the global interpreter lock. " - "Future versions may introduce a `n_process` argument for " - "parallel inference via multiprocessing.") + W016 = ("The keyword argument `n_threads` is now deprecated. As of v2.2.2, " + "the argument `n_process` controls parallel inference via " + "multiprocessing.") W017 = ("Alias '{alias}' already exists in the Knowledge Base.") W018 = ("Entity '{entity}' already exists in the Knowledge Base - " "ignoring the duplicate entry.") diff --git a/website/docs/api/language.md b/website/docs/api/language.md index 6e7f6be3e..d548a1f64 100644 --- a/website/docs/api/language.md +++ b/website/docs/api/language.md @@ -77,9 +77,9 @@ more efficient than processing texts one-by-one. Early versions of spaCy used simple statistical models that could be efficiently multi-threaded, as we were able to entirely release Python's global interpreter lock. The multi-threading was controlled using the `n_threads` keyword argument -to the `.pipe` method. This keyword argument is now deprecated as of v2.1.0. -Future versions may introduce a `n_process` argument for parallel inference via -multiprocessing. +to the `.pipe` method. This keyword argument is now deprecated as of v2.1.0. A +new keyword argument, `n_process`, was introduced to control parallel inference +via multiprocessing in v2.2.2. @@ -98,6 +98,7 @@ multiprocessing. | `batch_size` | int | The number of texts to buffer. | | `disable` | list | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). | | `component_cfg` 2.1 | dict | Config parameters for specific pipeline components, keyed by component name. | +| `n_process` 2.2.2 | int | Number of processors to use, only supported in Python 3. Defaults to `1`. | | **YIELDS** | `Doc` | Documents in the order of the original text. | ## Language.update {#update tag="method"} From 1830a12578700a27c59f62cd39c38c99e8ac74eb Mon Sep 17 00:00:00 2001 From: Anastasiia Iurshina Date: Sun, 29 Dec 2019 14:24:13 +0100 Subject: [PATCH 04/49] Fixes typos (#4843) * Fixes typos * Fixes typo * Contributor agreement --- .github/contributors/iurshina.md | 106 +++++++++++++++++++++++++++++++ spacy/_align.pyx | 2 +- spacy/tokens/doc.pyx | 4 +- 3 files changed, 109 insertions(+), 3 deletions(-) create mode 100644 .github/contributors/iurshina.md diff --git a/.github/contributors/iurshina.md b/.github/contributors/iurshina.md new file mode 100644 index 000000000..226813084 --- /dev/null +++ b/.github/contributors/iurshina.md @@ -0,0 +1,106 @@ +# spaCy contributor agreement + +This spaCy Contributor Agreement (**"SCA"**) is based on the +[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf). +The SCA applies to any contribution that you make to any product or project +managed by us (the **"project"**), and sets out the intellectual property rights +you grant to us in the contributed materials. The term **"us"** shall mean +[ExplosionAI GmbH](https://explosion.ai/legal). The term +**"you"** shall mean the person or entity identified below. + +If you agree to be bound by these terms, fill in the information requested +below and include the filled-in version with your first pull request, under the +folder [`.github/contributors/`](/.github/contributors/). The name of the file +should be your GitHub username, with the extension `.md`. For example, the user +example_user would create the file `.github/contributors/example_user.md`. + +Read this agreement carefully before signing. These terms and conditions +constitute a binding legal agreement. + +## Contributor Agreement + +1. The term "contribution" or "contributed materials" means any source code, +object code, patch, tool, sample, graphic, specification, manual, +documentation, or any other material posted or submitted by you to the project. + +2. With respect to any worldwide copyrights, or copyright applications and +registrations, in your contribution: + + * you hereby assign to us joint ownership, and to the extent that such + assignment is or becomes invalid, ineffective or unenforceable, you hereby + grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, + royalty-free, unrestricted license to exercise all rights under those + copyrights. This includes, at our option, the right to sublicense these same + rights to third parties through multiple levels of sublicensees or other + licensing arrangements; + + * you agree that each of us can do all things in relation to your + contribution as if each of us were the sole owners, and if one of us makes + a derivative work of your contribution, the one who makes the derivative + work (or has it made will be the sole owner of that derivative work; + + * you agree that you will not assert any moral rights in your contribution + against us, our licensees or transferees; + + * you agree that we may register a copyright in your contribution and + exercise all ownership rights associated with it; and + + * you agree that neither of us has any duty to consult with, obtain the + consent of, pay or render an accounting to the other for any use or + distribution of your contribution. + +3. With respect to any patents you own, or that you can license without payment +to any third party, you hereby grant to us a perpetual, irrevocable, +non-exclusive, worldwide, no-charge, royalty-free license to: + + * make, have made, use, sell, offer to sell, import, and otherwise transfer + your contribution in whole or in part, alone or in combination with or + included in any product, work or materials arising out of the project to + which your contribution was submitted, and + + * at our option, to sublicense these same rights to third parties through + multiple levels of sublicensees or other licensing arrangements. + +4. Except as set out above, you keep all right, title, and interest in your +contribution. The rights that you grant to us under these terms are effective +on the date you first submitted a contribution to us, even if your submission +took place before the date you sign these terms. + +5. You covenant, represent, warrant and agree that: + + * Each contribution that you submit is and shall be an original work of + authorship and you can legally grant the rights set out in this SCA; + + * to the best of your knowledge, each contribution will not violate any + third party's copyrights, trademarks, patents, or other intellectual + property rights; and + + * each contribution shall be in compliance with U.S. export control laws and + other applicable export and import laws. You agree to notify us if you + become aware of any circumstance which would make any of the foregoing + representations inaccurate in any respect. We may publicly disclose your + participation in the project, including the fact that you have signed the SCA. + +6. This SCA is governed by the laws of the State of California and applicable +U.S. Federal law. Any choice of law rules will not apply. + +7. Please place an “x” on one of the applicable statement below. Please do NOT +mark both statements: + + * [ ] I am signing on behalf of myself as an individual and no other person + or entity, including my employer, has or will have rights with respect to my + contributions. + + * [ ] I am signing on behalf of my employer or a legal entity and I have the + actual authority to contractually bind that entity. + +## Contributor Details + +| Field | Entry | +|------------------------------- | -------------------- | +| Name | Anastasiia Iurshina | +| Company name (if applicable) | | +| Title or role (if applicable) | | +| Date | 28.12.2019 | +| GitHub username | iurshina | +| Website (optional) | | diff --git a/spacy/_align.pyx b/spacy/_align.pyx index 8ae7cdf4e..6786ec7ba 100644 --- a/spacy/_align.pyx +++ b/spacy/_align.pyx @@ -30,7 +30,7 @@ S[:i] -> T[:j] (at D[i,j]) S[:i+1] -> T[:j] (at D[i+1,j]) S[:i] -> T[:j+1] (at D[i,j+1]) -Further, we now we can tranform: +Further, now we can transform: S[:i+1] -> S[:i] (DEL) for 1, T[:j+1] -> T[:j] (INS) for 1. S[i+1] -> T[j+1] (SUB) for 0 or 1 diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 716df1087..6bd982e35 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -994,9 +994,9 @@ cdef class Doc: order, and no span intersection is allowed. spans (Span[]): Spans to merge, in document order, with all span - intersections empty. Cannot be emty. + intersections empty. Cannot be empty. attributes (Dictionary[]): Attributes to assign to the merged tokens. By default, - must be the same lenghth as spans, emty dictionaries are allowed. + must be the same length as spans, empty dictionaries are allowed. attributes are inherited from the syntactic root of the span. RETURNS (Token): The first newly merged token. """ From db9257559c0642262a46d7acb7855e1e23b50e56 Mon Sep 17 00:00:00 2001 From: Anastasiia Iurshina Date: Sun, 29 Dec 2019 14:25:05 +0100 Subject: [PATCH 05/49] Adds script shebang (#4846) --- bin/spacy | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/spacy b/bin/spacy index 29d9a80e5..11359669c 100644 --- a/bin/spacy +++ b/bin/spacy @@ -1 +1,2 @@ +#! /bin/sh python -m spacy "$@" From 1aa2d4dac9ef414b3388743c40cc65e4880f115a Mon Sep 17 00:00:00 2001 From: Al Johri Date: Wed, 1 Jan 2020 07:15:05 -0500 Subject: [PATCH 06/49] stop rendering mathjax by default in displacy (#4840) * stop rendering mathjax by default in displacy * Replace f-string and add comment Co-authored-by: Ines Montani --- spacy/displacy/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py index d2ef21dbd..c17b80aef 100644 --- a/spacy/displacy/__init__.py +++ b/spacy/displacy/__init__.py @@ -55,9 +55,10 @@ def render( html = RENDER_WRAPPER(html) if jupyter or (jupyter is None and is_in_jupyter()): # return HTML rendered by IPython display() + # See #4840 for details on span wrapper to disable mathjax from IPython.core.display import display, HTML - return display(HTML(html)) + return display(HTML('{}'.format(html))) return html From 400257a8029f8c1c51fd8f3283760e3fe492ceda Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 4 Jan 2020 01:52:18 +0100 Subject: [PATCH 07/49] Update index.md [ci skip] --- website/docs/usage/index.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/website/docs/usage/index.md b/website/docs/usage/index.md index 441297813..17fd8fa7b 100644 --- a/website/docs/usage/index.md +++ b/website/docs/usage/index.md @@ -124,9 +124,8 @@ interface for GPU arrays. spaCy can be installed on GPU by specifying `spacy[cuda]`, `spacy[cuda90]`, `spacy[cuda91]`, `spacy[cuda92]` or `spacy[cuda100]`. If you know your cuda version, using the more explicit specifier allows cupy to be installed via -wheel, saving some compilation time. The specifiers should install two -libraries: [`cupy`](https://cupy.chainer.org) and -[`thinc_gpu_ops`](https://github.com/explosion/thinc_gpu_ops). +wheel, saving some compilation time. The specifiers should install +[`cupy`](https://cupy.chainer.org). ```bash $ pip install -U spacy[cuda92] From 53929138d73a6f5ed15403a8381be122efb03edb Mon Sep 17 00:00:00 2001 From: Geoffrey Gordon Ashbrook Date: Mon, 6 Jan 2020 06:37:42 -0500 Subject: [PATCH 08/49] remove extra word typo (#4875) "let you find you" --- website/docs/usage/rule-based-matching.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/usage/rule-based-matching.md b/website/docs/usage/rule-based-matching.md index 3aa2e417c..cae4f074a 100644 --- a/website/docs/usage/rule-based-matching.md +++ b/website/docs/usage/rule-based-matching.md @@ -9,7 +9,7 @@ menu: --- Compared to using regular expressions on raw text, spaCy's rule-based matcher -engines and components not only let you find you the words and phrases you're +engines and components not only let you find the words and phrases you're looking for – they also give you access to the tokens within the document and their relationships. This means you can easily access and analyze the surrounding tokens, merge spans into single tokens or add entries to the named From a1b22e90cd78d9b5c07f0ba63fec4eb028675b54 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Mon, 6 Jan 2020 14:57:34 +0100 Subject: [PATCH 09/49] serialize ENT_ID (#4852) * expand serialization test for custom token attribute * add failing test for issue 4849 * define ENT_ID as attr and use in doc serialization * fix few typos --- spacy/attrs.pxd | 1 + spacy/attrs.pyx | 1 + spacy/language.py | 8 ++--- spacy/symbols.pxd | 1 + spacy/symbols.pyx | 1 + spacy/tests/regression/test_issue4849.py | 36 +++++++++++++++++++ .../test_serialize_extension_attrs.py | 10 +++++- spacy/tokens/doc.pyx | 6 ++-- spacy/tokens/span.pyx | 2 +- spacy/tokens/token.pxd | 4 +++ 10 files changed, 62 insertions(+), 8 deletions(-) create mode 100644 spacy/tests/regression/test_issue4849.py diff --git a/spacy/attrs.pxd b/spacy/attrs.pxd index d9aca078c..4cff4a415 100644 --- a/spacy/attrs.pxd +++ b/spacy/attrs.pxd @@ -91,3 +91,4 @@ cdef enum attr_id_t: LANG ENT_KB_ID = symbols.ENT_KB_ID + ENT_ID = symbols.ENT_ID diff --git a/spacy/attrs.pyx b/spacy/attrs.pyx index 6d1c18eb9..51eb5c35b 100644 --- a/spacy/attrs.pyx +++ b/spacy/attrs.pyx @@ -84,6 +84,7 @@ IDS = { "DEP": DEP, "ENT_IOB": ENT_IOB, "ENT_TYPE": ENT_TYPE, + "ENT_ID": ENT_ID, "ENT_KB_ID": ENT_KB_ID, "HEAD": HEAD, "SENT_START": SENT_START, diff --git a/spacy/language.py b/spacy/language.py index 266a1727d..5544b6341 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -780,7 +780,7 @@ class Language(object): pipes = ( [] - ) # contains functools.partial objects so that easily create multiprocess worker. + ) # contains functools.partial objects to easily create multiprocess worker. for name, proc in self.pipeline: if name in disable: continue @@ -837,7 +837,7 @@ class Language(object): texts, raw_texts = itertools.tee(texts) # for sending texts to worker texts_q = [mp.Queue() for _ in range(n_process)] - # for receiving byte encoded docs from worker + # for receiving byte-encoded docs from worker bytedocs_recv_ch, bytedocs_send_ch = zip( *[mp.Pipe(False) for _ in range(n_process)] ) @@ -847,7 +847,7 @@ class Language(object): # This is necessary to properly handle infinite length of texts. # (In this case, all data cannot be sent to the workers at once) sender = _Sender(batch_texts, texts_q, chunk_size=n_process) - # send twice so that make process busy + # send twice to make process busy sender.send() sender.send() @@ -859,7 +859,7 @@ class Language(object): proc.start() # Cycle channels not to break the order of docs. - # The received object is batch of byte encoded docs, so flatten them with chain.from_iterable. + # The received object is a batch of byte-encoded docs, so flatten them with chain.from_iterable. byte_docs = chain.from_iterable(recv.recv() for recv in cycle(bytedocs_recv_ch)) docs = (Doc(self.vocab).from_bytes(byte_doc) for byte_doc in byte_docs) try: diff --git a/spacy/symbols.pxd b/spacy/symbols.pxd index 5922ee588..b6391af11 100644 --- a/spacy/symbols.pxd +++ b/spacy/symbols.pxd @@ -462,3 +462,4 @@ cdef enum symbol_t: acl ENT_KB_ID + ENT_ID diff --git a/spacy/symbols.pyx b/spacy/symbols.pyx index b65ae9628..d82cf036d 100644 --- a/spacy/symbols.pyx +++ b/spacy/symbols.pyx @@ -86,6 +86,7 @@ IDS = { "DEP": DEP, "ENT_IOB": ENT_IOB, "ENT_TYPE": ENT_TYPE, + "ENT_ID": ENT_ID, "ENT_KB_ID": ENT_KB_ID, "HEAD": HEAD, "SENT_START": SENT_START, diff --git a/spacy/tests/regression/test_issue4849.py b/spacy/tests/regression/test_issue4849.py new file mode 100644 index 000000000..834219773 --- /dev/null +++ b/spacy/tests/regression/test_issue4849.py @@ -0,0 +1,36 @@ +# coding: utf8 +from __future__ import unicode_literals + +from spacy.lang.en import English +from spacy.pipeline import EntityRuler + + +def test_issue4849(): + nlp = English() + + ruler = EntityRuler( + nlp, patterns=[ + {"label": "PERSON", "pattern": 'joe biden', "id": 'joe-biden'}, + {"label": "PERSON", "pattern": 'bernie sanders', "id": 'bernie-sanders'}, + ], + phrase_matcher_attr="LOWER" + ) + + nlp.add_pipe(ruler) + + text = """ + The left is starting to take aim at Democratic front-runner Joe Biden. + Sen. Bernie Sanders joined in her criticism: "There is no 'middle ground' when it comes to climate policy." + """ + + # USING 1 PROCESS + count_ents = 0 + for doc in nlp.pipe([text], n_process=1): + count_ents += len([ent for ent in doc.ents if ent.ent_id > 0]) + assert(count_ents == 2) + + # USING 2 PROCESSES + count_ents = 0 + for doc in nlp.pipe([text], n_process=2): + count_ents += len([ent for ent in doc.ents if ent.ent_id > 0]) + assert (count_ents == 2) diff --git a/spacy/tests/serialize/test_serialize_extension_attrs.py b/spacy/tests/serialize/test_serialize_extension_attrs.py index 1881b7d0c..45c2e3909 100644 --- a/spacy/tests/serialize/test_serialize_extension_attrs.py +++ b/spacy/tests/serialize/test_serialize_extension_attrs.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals import pytest -from spacy.tokens import Doc +from spacy.tokens import Doc, Token from spacy.vocab import Vocab @@ -15,6 +15,10 @@ def doc_w_attrs(en_tokenizer): ) doc = en_tokenizer("This is a test.") doc._._test_attr = "test" + + Token.set_extension("_test_token", default="t0") + doc[1]._._test_token = "t1" + return doc @@ -25,3 +29,7 @@ def test_serialize_ext_attrs_from_bytes(doc_w_attrs): assert doc._._test_attr == "test" assert doc._._test_prop == len(doc.text) assert doc._._test_method("test") == "{}{}".format(len(doc.text), "test") + + assert doc[0]._._test_token == "t0" + assert doc[1]._._test_token == "t1" + assert doc[2]._._test_token == "t0" diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 6bd982e35..4aee21153 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -23,7 +23,7 @@ from ..lexeme cimport Lexeme, EMPTY_LEXEME from ..typedefs cimport attr_t, flags_t from ..attrs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, CLUSTER from ..attrs cimport LENGTH, POS, LEMMA, TAG, DEP, HEAD, SPACY, ENT_IOB -from ..attrs cimport ENT_TYPE, ENT_KB_ID, SENT_START, attr_id_t +from ..attrs cimport ENT_TYPE, ENT_ID, ENT_KB_ID, SENT_START, attr_id_t from ..parts_of_speech cimport CCONJ, PUNCT, NOUN, univ_pos_t from ..attrs import intify_attrs, IDS @@ -69,6 +69,8 @@ cdef attr_t get_token_attr(const TokenC* token, attr_id_t feat_name) nogil: return token.ent_iob elif feat_name == ENT_TYPE: return token.ent_type + elif feat_name == ENT_ID: + return token.ent_id elif feat_name == ENT_KB_ID: return token.ent_kb_id else: @@ -868,7 +870,7 @@ cdef class Doc: DOCS: https://spacy.io/api/doc#to_bytes """ - array_head = [LENGTH, SPACY, LEMMA, ENT_IOB, ENT_TYPE] # TODO: ENT_KB_ID ? + array_head = [LENGTH, SPACY, LEMMA, ENT_IOB, ENT_TYPE, ENT_ID] # TODO: ENT_KB_ID ? if self.is_tagged: array_head.extend([TAG, POS]) # If doc parsed add head and dep attribute diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index 957e853ca..24857790b 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -212,7 +212,7 @@ cdef class Span: words = [t.text for t in self] spaces = [bool(t.whitespace_) for t in self] cdef Doc doc = Doc(self.doc.vocab, words=words, spaces=spaces) - array_head = [LENGTH, SPACY, LEMMA, ENT_IOB, ENT_TYPE, ENT_KB_ID] + array_head = [LENGTH, SPACY, LEMMA, ENT_IOB, ENT_TYPE, ENT_ID, ENT_KB_ID] if self.doc.is_tagged: array_head.append(TAG) # If doc parsed add head and dep attribute diff --git a/spacy/tokens/token.pxd b/spacy/tokens/token.pxd index ec5df3fac..cbca55c40 100644 --- a/spacy/tokens/token.pxd +++ b/spacy/tokens/token.pxd @@ -53,6 +53,8 @@ cdef class Token: return token.ent_iob elif feat_name == ENT_TYPE: return token.ent_type + elif feat_name == ENT_ID: + return token.ent_id elif feat_name == ENT_KB_ID: return token.ent_kb_id elif feat_name == SENT_START: @@ -81,6 +83,8 @@ cdef class Token: token.ent_iob = value elif feat_name == ENT_TYPE: token.ent_type = value + elif feat_name == ENT_ID: + token.ent_id = value elif feat_name == ENT_KB_ID: token.ent_kb_id = value elif feat_name == SENT_START: From de69bc65098169a3f7098353d790a21b06d01f04 Mon Sep 17 00:00:00 2001 From: adrianeboyd Date: Mon, 6 Jan 2020 14:58:30 +0100 Subject: [PATCH 10/49] Fix and improve URL pattern (#4882) * match domains longer than `hostname.domain.tld` like `www.foo.co.uk` * expand allowed characters in domain names while only matching lowercase TLDs so that "this.That" isn't matched as a URL and can be split on the period as an infix (relevant for at least English, German, and Tatar) --- spacy/lang/tokenizer_exceptions.py | 27 ++++++++++++++++++++------- spacy/tests/tokenizer/test_urls.py | 18 +++++++++++------- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/spacy/lang/tokenizer_exceptions.py b/spacy/lang/tokenizer_exceptions.py index 57771cca4..2c0fc9cf7 100644 --- a/spacy/lang/tokenizer_exceptions.py +++ b/spacy/lang/tokenizer_exceptions.py @@ -3,14 +3,18 @@ from __future__ import unicode_literals import re +from .char_classes import ALPHA_LOWER from ..symbols import ORTH, POS, TAG, LEMMA, SPACE # URL validation regex courtesy of: https://mathiasbynens.be/demo/url-regex -# A few minor mods to this regex to account for use cases represented in test_urls +# and https://gist.github.com/dperini/729294 (Diego Perini, MIT License) +# A few mods to this regex to account for use cases represented in test_urls URL_PATTERN = ( + # fmt: off r"^" - # protocol identifier (see: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml) + # protocol identifier (mods: make optional and expand schemes) + # (see: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml) r"(?:(?:[\w\+\-\.]{2,})://)?" # mailto:user or user:pass authentication r"(?:\S+(?::\S*)?@)?" @@ -31,18 +35,27 @@ URL_PATTERN = ( r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" r"|" - # host name - r"(?:(?:[a-z0-9\-]*)?[a-z0-9]+)" - # domain name - r"(?:\.(?:[a-z0-9])(?:[a-z0-9\-])*[a-z0-9])?" + # host & domain names + # mods: match is case-sensitive, so include [A-Z] + "(?:" + "(?:" + "[A-Za-z0-9\u00a1-\uffff]" + "[A-Za-z0-9\u00a1-\uffff_-]{0,62}" + ")?" + "[A-Za-z0-9\u00a1-\uffff]\." + ")+" # TLD identifier - r"(?:\.(?:[a-z]{2,}))" + # mods: use ALPHA_LOWER instead of a wider range so that this doesn't match + # strings like "lower.Upper", which can be split on "." by infixes in some + # languages + r"(?:[" + ALPHA_LOWER + "]{2,63})" r")" # port number r"(?::\d{2,5})?" # resource path r"(?:[/?#]\S*)?" r"$" + # fmt: on ).strip() TOKEN_MATCH = re.compile(URL_PATTERN, re.UNICODE).match diff --git a/spacy/tests/tokenizer/test_urls.py b/spacy/tests/tokenizer/test_urls.py index 21e1819b7..ef99484ee 100644 --- a/spacy/tests/tokenizer/test_urls.py +++ b/spacy/tests/tokenizer/test_urls.py @@ -20,6 +20,7 @@ URLS_FULL = URLS_BASIC + [ # URL SHOULD_MATCH and SHOULD_NOT_MATCH patterns courtesy of https://mathiasbynens.be/demo/url-regex URLS_SHOULD_MATCH = [ "http://foo.com/blah_blah", + "http://BlahBlah.com/Blah_Blah", "http://foo.com/blah_blah/", "http://www.example.com/wpstyle/?p=364", "https://www.example.com/foo/?bar=baz&inga=42&quux", @@ -57,14 +58,17 @@ URLS_SHOULD_MATCH = [ ), "http://foo.com/blah_blah_(wikipedia)", "http://foo.com/blah_blah_(wikipedia)_(again)", - pytest.param("http://⌘.ws", marks=pytest.mark.xfail()), - pytest.param("http://⌘.ws/", marks=pytest.mark.xfail()), - pytest.param("http://☺.damowmow.com/", marks=pytest.mark.xfail()), - pytest.param("http://✪df.ws/123", marks=pytest.mark.xfail()), - pytest.param("http://➡.ws/䨹", marks=pytest.mark.xfail()), - pytest.param("http://مثال.إختبار", marks=pytest.mark.xfail()), + "http://www.foo.co.uk", + "http://www.foo.co.uk/", + "http://www.foo.co.uk/blah/blah", + "http://⌘.ws", + "http://⌘.ws/", + "http://☺.damowmow.com/", + "http://✪df.ws/123", + "http://➡.ws/䨹", + "http://مثال.إختبار", pytest.param("http://例子.测试", marks=pytest.mark.xfail()), - pytest.param("http://उदाहरण.परीक्षा", marks=pytest.mark.xfail()), + "http://उदाहरण.परीक्षा", ] URLS_SHOULD_NOT_MATCH = [ From d652ff215d1d6e32b25efa217370fa0c1ef9e0ba Mon Sep 17 00:00:00 2001 From: adrianeboyd Date: Mon, 6 Jan 2020 14:58:59 +0100 Subject: [PATCH 11/49] Add trailing whitespace to multiline test text (#4877) --- spacy/tests/lang/tt/test_tokenizer.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/spacy/tests/lang/tt/test_tokenizer.py b/spacy/tests/lang/tt/test_tokenizer.py index 66ef9c181..f6c68a401 100644 --- a/spacy/tests/lang/tt/test_tokenizer.py +++ b/spacy/tests/lang/tt/test_tokenizer.py @@ -44,15 +44,15 @@ TYPOS_IN_PUNC_TESTS = [ LONG_TEXTS_TESTS = [ ( - "Иң борынгы кешеләр суыклар һәм салкын кышлар булмый торган җылы" - "якларда яшәгәннәр, шуңа күрә аларга кием кирәк булмаган.Йөз" - "меңнәрчә еллар үткән, борынгы кешеләр акрынлап Европа һәм Азиянең" - "салкын илләрендә дә яши башлаганнар. Алар кырыс һәм салкын" + "Иң борынгы кешеләр суыклар һәм салкын кышлар булмый торган җылы " + "якларда яшәгәннәр, шуңа күрә аларга кием кирәк булмаган.Йөз " + "меңнәрчә еллар үткән, борынгы кешеләр акрынлап Европа һәм Азиянең " + "салкын илләрендә дә яши башлаганнар. Алар кырыс һәм салкын " "кышлардан саклану өчен кием-салым уйлап тапканнар - итәк.", - "Иң борынгы кешеләр суыклар һәм салкын кышлар булмый торган җылы" - "якларда яшәгәннәр , шуңа күрә аларга кием кирәк булмаган . Йөз" - "меңнәрчә еллар үткән , борынгы кешеләр акрынлап Европа һәм Азиянең" - "салкын илләрендә дә яши башлаганнар . Алар кырыс һәм салкын" + "Иң борынгы кешеләр суыклар һәм салкын кышлар булмый торган җылы " + "якларда яшәгәннәр , шуңа күрә аларга кием кирәк булмаган . Йөз " + "меңнәрчә еллар үткән , борынгы кешеләр акрынлап Европа һәм Азиянең " + "салкын илләрендә дә яши башлаганнар . Алар кырыс һәм салкын " "кышлардан саклану өчен кием-салым уйлап тапканнар - итәк .".split(), ) ] From 6e9b61b49dbd37ae1cb03c5216ea15a99277000f Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Mon, 6 Jan 2020 14:59:28 +0100 Subject: [PATCH 12/49] add warning in debug_data for punctuation in entities (#4853) --- spacy/cli/debug_data.py | 22 +++++++++++++++++++++- spacy/errors.py | 3 ++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py index 5d044e617..4b12052c3 100644 --- a/spacy/cli/debug_data.py +++ b/spacy/cli/debug_data.py @@ -192,6 +192,7 @@ def debug_data( has_low_data_warning = False has_no_neg_warning = False has_ws_ents_error = False + has_punct_ents_warning = False msg.divider("Named Entity Recognition") msg.info( @@ -226,10 +227,16 @@ def debug_data( if gold_train_data["ws_ents"]: msg.fail( - "{} invalid whitespace entity spans".format(gold_train_data["ws_ents"]) + "{} invalid whitespace entity span(s)".format(gold_train_data["ws_ents"]) ) has_ws_ents_error = True + if gold_train_data["punct_ents"]: + msg.warn( + "{} entity span(s) with punctuation".format(gold_train_data["punct_ents"]) + ) + has_punct_ents_warning = True + for label in new_labels: if label_counts[label] <= NEW_LABEL_THRESHOLD: msg.warn( @@ -253,6 +260,8 @@ def debug_data( msg.good("Examples without occurrences available for all labels") if not has_ws_ents_error: msg.good("No entities consisting of or starting/ending with whitespace") + if not has_punct_ents_warning: + msg.good("No entities consisting of or starting/ending with punctuation") if has_low_data_warning: msg.text( @@ -273,6 +282,12 @@ def debug_data( "with whitespace characters are considered invalid." ) + if has_punct_ents_warning: + msg.text( + "Entity spans consisting of or starting/ending " + "with punctuation can not be trained with a noise level > 0." + ) + if "textcat" in pipeline: msg.divider("Text Classification") labels = [label for label in gold_train_data["cats"]] @@ -547,6 +562,7 @@ def _compile_gold(train_docs, pipeline): "words": Counter(), "roots": Counter(), "ws_ents": 0, + "punct_ents": 0, "n_words": 0, "n_misaligned_words": 0, "n_sents": 0, @@ -568,6 +584,10 @@ def _compile_gold(train_docs, pipeline): if label.startswith(("B-", "U-", "L-")) and doc[i].is_space: # "Illegal" whitespace entity data["ws_ents"] += 1 + if label.startswith(("B-", "U-", "L-")) and doc[i].text in [".", "'", "!", "?", ","]: + # punctuation entity: could be replaced by whitespace when training with noise, + # so add a warning to alert the user to this unexpected side effect. + data["punct_ents"] += 1 if label.startswith(("B-", "U-")): combined_label = label.split("-")[1] data["ner"][combined_label] += 1 diff --git a/spacy/errors.py b/spacy/errors.py index fd0f66cd9..2f0a8a2ad 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -172,7 +172,8 @@ class Errors(object): "and satisfies the correct annotations specified in the GoldParse. " "For example, are all labels added to the model? If you're " "training a named entity recognizer, also make sure that none of " - "your annotated entity spans have leading or trailing whitespace. " + "your annotated entity spans have leading or trailing whitespace " + "or punctuation. " "You can also use the experimental `debug-data` command to " "validate your JSON-formatted training data. For details, run:\n" "python -m spacy debug-data --help") From 7b96a5e10f907c325f1d71dd23daafafe237d4d9 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Mon, 6 Jan 2020 14:59:50 +0100 Subject: [PATCH 13/49] Reduce mem usage in training Entity Linker (#4811) * move nlp processing for el pipe to batch training instead of preprocessing * adding dev eval back in, and limit in articles instead of entities * use pipe whenever possible * few more small doc changes * access dev data through generator * tqdm description * small fixes * update documentation --- bin/wiki_entity_linking/README.md | 28 +-- .../entity_linker_evaluation.py | 181 ++++++++---------- .../wikidata_train_entity_linker.py | 158 +++++++-------- .../wikipedia_processor.py | 61 +++--- spacy/pipeline/pipes.pyx | 2 +- 5 files changed, 200 insertions(+), 230 deletions(-) diff --git a/bin/wiki_entity_linking/README.md b/bin/wiki_entity_linking/README.md index 540878592..7460a455e 100644 --- a/bin/wiki_entity_linking/README.md +++ b/bin/wiki_entity_linking/README.md @@ -7,16 +7,16 @@ Run `wikipedia_pretrain_kb.py` * WikiData: get `latest-all.json.bz2` from https://dumps.wikimedia.org/wikidatawiki/entities/ * Wikipedia: get `enwiki-latest-pages-articles-multistream.xml.bz2` from https://dumps.wikimedia.org/enwiki/latest/ (or for any other language) * You can set the filtering parameters for KB construction: - * `max_per_alias`: (max) number of candidate entities in the KB per alias/synonym - * `min_freq`: threshold of number of times an entity should occur in the corpus to be included in the KB - * `min_pair`: threshold of number of times an entity+alias combination should occur in the corpus to be included in the KB + * `max_per_alias` (`-a`): (max) number of candidate entities in the KB per alias/synonym + * `min_freq` (`-f`): threshold of number of times an entity should occur in the corpus to be included in the KB + * `min_pair` (`-c`): threshold of number of times an entity+alias combination should occur in the corpus to be included in the KB * Further parameters to set: - * `descriptions_from_wikipedia`: whether to parse descriptions from Wikipedia (`True`) or Wikidata (`False`) - * `entity_vector_length`: length of the pre-trained entity description vectors - * `lang`: language for which to fetch Wikidata information (as the dump contains all languages) + * `descriptions_from_wikipedia` (`-wp`): whether to parse descriptions from Wikipedia (`True`) or Wikidata (`False`) + * `entity_vector_length` (`-v`): length of the pre-trained entity description vectors + * `lang` (`-la`): language for which to fetch Wikidata information (as the dump contains all languages) Quick testing and rerunning: -* When trying out the pipeline for a quick test, set `limit_prior`, `limit_train` and/or `limit_wd` to read only parts of the dumps instead of everything. +* When trying out the pipeline for a quick test, set `limit_prior` (`-lp`), `limit_train` (`-lt`) and/or `limit_wd` (`-lw`) to read only parts of the dumps instead of everything. * If you only want to (re)run certain parts of the pipeline, just remove the corresponding files and they will be recalculated or reparsed. @@ -24,11 +24,13 @@ Quick testing and rerunning: Run `wikidata_train_entity_linker.py` * This takes the **KB directory** produced by Step 1, and trains an **Entity Linking model** +* Specify the output directory (`-o`) in which the final, trained model will be saved * You can set the learning parameters for the EL training: - * `epochs`: number of training iterations - * `dropout`: dropout rate - * `lr`: learning rate - * `l2`: L2 regularization -* Specify the number of training and dev testing entities with `train_inst` and `dev_inst` respectively + * `epochs` (`-e`): number of training iterations + * `dropout` (`-p`): dropout rate + * `lr` (`-n`): learning rate + * `l2` (`-r`): L2 regularization +* Specify the number of training and dev testing articles with `train_articles` (`-t`) and `dev_articles` (`-d`) respectively + * If not specified, the full dataset will be processed - this may take a LONG time ! * Further parameters to set: - * `labels_discard`: NER label types to discard during training + * `labels_discard` (`-l`): NER label types to discard during training diff --git a/bin/wiki_entity_linking/entity_linker_evaluation.py b/bin/wiki_entity_linking/entity_linker_evaluation.py index 273ade0cd..2aeffbfc2 100644 --- a/bin/wiki_entity_linking/entity_linker_evaluation.py +++ b/bin/wiki_entity_linking/entity_linker_evaluation.py @@ -1,6 +1,8 @@ +# coding: utf-8 +from __future__ import unicode_literals + import logging import random - from tqdm import tqdm from collections import defaultdict @@ -92,133 +94,110 @@ class BaselineResults(object): self.random.update_metrics(ent_label, true_entity, random_candidate) -def measure_performance(dev_data, kb, el_pipe, baseline=True, context=True): - if baseline: - baseline_accuracies, counts = measure_baselines(dev_data, kb) - logger.info("Counts: {}".format({k: v for k, v in sorted(counts.items())})) - logger.info(baseline_accuracies.report_performance("random")) - logger.info(baseline_accuracies.report_performance("prior")) - logger.info(baseline_accuracies.report_performance("oracle")) +def measure_performance(dev_data, kb, el_pipe, baseline=True, context=True, dev_limit=None): + counts = dict() + baseline_results = BaselineResults() + context_results = EvaluationResults() + combo_results = EvaluationResults() - if context: - # using only context - el_pipe.cfg["incl_context"] = True - el_pipe.cfg["incl_prior"] = False - results = get_eval_results(dev_data, el_pipe) - logger.info(results.report_metrics("context only")) - - # measuring combined accuracy (prior + context) - el_pipe.cfg["incl_context"] = True - el_pipe.cfg["incl_prior"] = True - results = get_eval_results(dev_data, el_pipe) - logger.info(results.report_metrics("context and prior")) - - -def get_eval_results(data, el_pipe=None): - """ - Evaluate the ent.kb_id_ annotations against the gold standard. - Only evaluate entities that overlap between gold and NER, to isolate the performance of the NEL. - If the docs in the data require further processing with an entity linker, set el_pipe. - """ - docs = [] - golds = [] - for d, g in tqdm(data, leave=False): - if len(d) > 0: - golds.append(g) - if el_pipe is not None: - docs.append(el_pipe(d)) - else: - docs.append(d) - - results = EvaluationResults() - for doc, gold in zip(docs, golds): - try: - correct_entries_per_article = dict() + for doc, gold in tqdm(dev_data, total=dev_limit, leave=False, desc='Processing dev data'): + if len(doc) > 0: + correct_ents = dict() for entity, kb_dict in gold.links.items(): start, end = entity for gold_kb, value in kb_dict.items(): if value: # only evaluating on positive examples offset = _offset(start, end) - correct_entries_per_article[offset] = gold_kb + correct_ents[offset] = gold_kb - for ent in doc.ents: - ent_label = ent.label_ - pred_entity = ent.kb_id_ - start = ent.start_char - end = ent.end_char - offset = _offset(start, end) - gold_entity = correct_entries_per_article.get(offset, None) - # the gold annotations are not complete so we can't evaluate missing annotations as 'wrong' - if gold_entity is not None: - results.update_metrics(ent_label, gold_entity, pred_entity) + if baseline: + _add_baseline(baseline_results, counts, doc, correct_ents, kb) - except Exception as e: - logging.error("Error assessing accuracy " + str(e)) + if context: + # using only context + el_pipe.cfg["incl_context"] = True + el_pipe.cfg["incl_prior"] = False + _add_eval_result(context_results, doc, correct_ents, el_pipe) - return results + # measuring combined accuracy (prior + context) + el_pipe.cfg["incl_context"] = True + el_pipe.cfg["incl_prior"] = True + _add_eval_result(combo_results, doc, correct_ents, el_pipe) + + if baseline: + logger.info("Counts: {}".format({k: v for k, v in sorted(counts.items())})) + logger.info(baseline_results.report_performance("random")) + logger.info(baseline_results.report_performance("prior")) + logger.info(baseline_results.report_performance("oracle")) + + if context: + logger.info(context_results.report_metrics("context only")) + logger.info(combo_results.report_metrics("context and prior")) -def measure_baselines(data, kb): +def _add_eval_result(results, doc, correct_ents, el_pipe): """ - Measure 3 performance baselines: random selection, prior probabilities, and 'oracle' prediction for upper bound. + Evaluate the ent.kb_id_ annotations against the gold standard. Only evaluate entities that overlap between gold and NER, to isolate the performance of the NEL. - Also return a dictionary of counts by entity label. """ - counts_d = dict() - - baseline_results = BaselineResults() - - docs = [d for d, g in data if len(d) > 0] - golds = [g for d, g in data if len(d) > 0] - - for doc, gold in zip(docs, golds): - correct_entries_per_article = dict() - for entity, kb_dict in gold.links.items(): - start, end = entity - for gold_kb, value in kb_dict.items(): - # only evaluating on positive examples - if value: - offset = _offset(start, end) - correct_entries_per_article[offset] = gold_kb - + try: + doc = el_pipe(doc) for ent in doc.ents: ent_label = ent.label_ start = ent.start_char end = ent.end_char offset = _offset(start, end) - gold_entity = correct_entries_per_article.get(offset, None) - + gold_entity = correct_ents.get(offset, None) # the gold annotations are not complete so we can't evaluate missing annotations as 'wrong' if gold_entity is not None: - candidates = kb.get_candidates(ent.text) - oracle_candidate = "" - prior_candidate = "" - random_candidate = "" - if candidates: - scores = [] + pred_entity = ent.kb_id_ + results.update_metrics(ent_label, gold_entity, pred_entity) - for c in candidates: - scores.append(c.prior_prob) - if c.entity_ == gold_entity: - oracle_candidate = c.entity_ + except Exception as e: + logging.error("Error assessing accuracy " + str(e)) - best_index = scores.index(max(scores)) - prior_candidate = candidates[best_index].entity_ - random_candidate = random.choice(candidates).entity_ - current_count = counts_d.get(ent_label, 0) - counts_d[ent_label] = current_count+1 +def _add_baseline(baseline_results, counts, doc, correct_ents, kb): + """ + Measure 3 performance baselines: random selection, prior probabilities, and 'oracle' prediction for upper bound. + Only evaluate entities that overlap between gold and NER, to isolate the performance of the NEL. + """ + for ent in doc.ents: + ent_label = ent.label_ + start = ent.start_char + end = ent.end_char + offset = _offset(start, end) + gold_entity = correct_ents.get(offset, None) - baseline_results.update_baselines( - gold_entity, - ent_label, - random_candidate, - prior_candidate, - oracle_candidate, - ) + # the gold annotations are not complete so we can't evaluate missing annotations as 'wrong' + if gold_entity is not None: + candidates = kb.get_candidates(ent.text) + oracle_candidate = "" + prior_candidate = "" + random_candidate = "" + if candidates: + scores = [] - return baseline_results, counts_d + for c in candidates: + scores.append(c.prior_prob) + if c.entity_ == gold_entity: + oracle_candidate = c.entity_ + + best_index = scores.index(max(scores)) + prior_candidate = candidates[best_index].entity_ + random_candidate = random.choice(candidates).entity_ + + current_count = counts.get(ent_label, 0) + counts[ent_label] = current_count+1 + + baseline_results.update_baselines( + gold_entity, + ent_label, + random_candidate, + prior_candidate, + oracle_candidate, + ) def _offset(start, end): diff --git a/bin/wiki_entity_linking/wikidata_train_entity_linker.py b/bin/wiki_entity_linking/wikidata_train_entity_linker.py index 8635ae547..54f00fc6f 100644 --- a/bin/wiki_entity_linking/wikidata_train_entity_linker.py +++ b/bin/wiki_entity_linking/wikidata_train_entity_linker.py @@ -1,5 +1,5 @@ # coding: utf-8 -"""Script to take a previously created Knowledge Base and train an entity linking +"""Script that takes a previously created Knowledge Base and trains an entity linking pipeline. The provided KB directory should hold the kb, the original nlp object and its vocab used to create the KB, and a few auxiliary files such as the entity definitions, as created by the script `wikidata_create_kb`. @@ -14,6 +14,7 @@ import logging import spacy from pathlib import Path import plac +from tqdm import tqdm from bin.wiki_entity_linking import wikipedia_processor from bin.wiki_entity_linking import TRAINING_DATA_FILE, KB_MODEL_DIR, KB_FILE, LOG_FORMAT, OUTPUT_MODEL_DIR @@ -33,8 +34,8 @@ logger = logging.getLogger(__name__) dropout=("Dropout to prevent overfitting (default 0.5)", "option", "p", float), lr=("Learning rate (default 0.005)", "option", "n", float), l2=("L2 regularization", "option", "r", float), - train_inst=("# training instances (default 90% of all)", "option", "t", int), - dev_inst=("# test instances (default 10% of all)", "option", "d", int), + train_articles=("# training articles (default 90% of all)", "option", "t", int), + dev_articles=("# dev test articles (default 10% of all)", "option", "d", int), labels_discard=("NER labels to discard (default None)", "option", "l", str), ) def main( @@ -45,10 +46,13 @@ def main( dropout=0.5, lr=0.005, l2=1e-6, - train_inst=None, - dev_inst=None, + train_articles=None, + dev_articles=None, labels_discard=None ): + if not output_dir: + logger.warning("No output dir specified so no results will be written, are you sure about this ?") + logger.info("Creating Entity Linker with Wikipedia and WikiData") output_dir = Path(output_dir) if output_dir else dir_kb @@ -64,44 +68,33 @@ def main( # STEP 1 : load the NLP object logger.info("STEP 1a: Loading model from {}".format(nlp_dir)) nlp = spacy.load(nlp_dir) - logger.info("STEP 1b: Loading KB from {}".format(kb_path)) - kb = read_kb(nlp, kb_path) + logger.info("Original NLP pipeline has following pipeline components: {}".format(nlp.pipe_names)) # check that there is a NER component in the pipeline if "ner" not in nlp.pipe_names: raise ValueError("The `nlp` object should have a pretrained `ner` component.") - # STEP 2: read the training dataset previously created from WP - logger.info("STEP 2: Reading training dataset from {}".format(training_path)) + logger.info("STEP 1b: Loading KB from {}".format(kb_path)) + kb = read_kb(nlp, kb_path) + # STEP 2: read the training dataset previously created from WP + logger.info("STEP 2: Reading training & dev dataset from {}".format(training_path)) + train_indices, dev_indices = wikipedia_processor.read_training_indices(training_path) + logger.info("Training set has {} articles, limit set to roughly {} articles per epoch" + .format(len(train_indices), train_articles if train_articles else "all")) + logger.info("Dev set has {} articles, limit set to rougly {} articles for evaluation" + .format(len(dev_indices), dev_articles if dev_articles else "all")) + if dev_articles: + dev_indices = dev_indices[0:dev_articles] + + # STEP 3: create and train an entity linking pipe + logger.info("STEP 3: Creating and training an Entity Linking pipe for {} epochs".format(epochs)) if labels_discard: labels_discard = [x.strip() for x in labels_discard.split(",")] logger.info("Discarding {} NER types: {}".format(len(labels_discard), labels_discard)) else: labels_discard = [] - train_data = wikipedia_processor.read_training( - nlp=nlp, - entity_file_path=training_path, - dev=False, - limit=train_inst, - kb=kb, - labels_discard=labels_discard - ) - - # for testing, get all pos instances (independently of KB) - dev_data = wikipedia_processor.read_training( - nlp=nlp, - entity_file_path=training_path, - dev=True, - limit=dev_inst, - kb=None, - labels_discard=labels_discard - ) - - # STEP 3: create and train an entity linking pipe - logger.info("STEP 3: Creating and training an Entity Linking pipe") - el_pipe = nlp.create_pipe( name="entity_linker", config={"pretrained_vectors": nlp.vocab.vectors.name, "labels_discard": labels_discard} @@ -115,80 +108,65 @@ def main( optimizer.learn_rate = lr optimizer.L2 = l2 - logger.info("Training on {} articles".format(len(train_data))) - logger.info("Dev testing on {} articles".format(len(dev_data))) - - # baseline performance on dev data logger.info("Dev Baseline Accuracies:") - measure_performance(dev_data, kb, el_pipe, baseline=True, context=False) + dev_data = wikipedia_processor.read_el_docs_golds(nlp=nlp, entity_file_path=training_path, + dev=True, line_ids=dev_indices, + kb=kb, labels_discard=labels_discard) + + measure_performance(dev_data, kb, el_pipe, baseline=True, context=False, dev_limit=len(dev_indices)) for itn in range(epochs): - random.shuffle(train_data) + random.shuffle(train_indices) losses = {} - batches = minibatch(train_data, size=compounding(4.0, 128.0, 1.001)) + batches = minibatch(train_indices, size=compounding(8.0, 128.0, 1.001)) batchnr = 0 + articles_processed = 0 - with nlp.disable_pipes(*other_pipes): + # we either process the whole training file, or just a part each epoch + bar_total = len(train_indices) + if train_articles: + bar_total = train_articles + + with tqdm(total=bar_total, leave=False, desc='Epoch ' + str(itn)) as pbar: for batch in batches: - try: - docs, golds = zip(*batch) - nlp.update( - docs=docs, - golds=golds, - sgd=optimizer, - drop=dropout, - losses=losses, - ) - batchnr += 1 - except Exception as e: - logger.error("Error updating batch:" + str(e)) + if not train_articles or articles_processed < train_articles: + with nlp.disable_pipes("entity_linker"): + train_batch = wikipedia_processor.read_el_docs_golds(nlp=nlp, entity_file_path=training_path, + dev=False, line_ids=batch, + kb=kb, labels_discard=labels_discard) + docs, golds = zip(*train_batch) + try: + with nlp.disable_pipes(*other_pipes): + nlp.update( + docs=docs, + golds=golds, + sgd=optimizer, + drop=dropout, + losses=losses, + ) + batchnr += 1 + articles_processed += len(docs) + pbar.update(len(docs)) + except Exception as e: + logger.error("Error updating batch:" + str(e)) if batchnr > 0: - logging.info("Epoch {}, train loss {}".format(itn, round(losses["entity_linker"] / batchnr, 2))) - measure_performance(dev_data, kb, el_pipe, baseline=False, context=True) - - # STEP 4: measure the performance of our trained pipe on an independent dev set - logger.info("STEP 4: Final performance measurement of Entity Linking pipe") - measure_performance(dev_data, kb, el_pipe) - - # STEP 5: apply the EL pipe on a toy example - logger.info("STEP 5: Applying Entity Linking to toy example") - run_el_toy_example(nlp=nlp) + logging.info("Epoch {} trained on {} articles, train loss {}" + .format(itn, articles_processed, round(losses["entity_linker"] / batchnr, 2))) + # re-read the dev_data (data is returned as a generator) + dev_data = wikipedia_processor.read_el_docs_golds(nlp=nlp, entity_file_path=training_path, + dev=True, line_ids=dev_indices, + kb=kb, labels_discard=labels_discard) + measure_performance(dev_data, kb, el_pipe, baseline=False, context=True, dev_limit=len(dev_indices)) if output_dir: - # STEP 6: write the NLP pipeline (now including an EL model) to file - logger.info("STEP 6: Writing trained NLP to {}".format(nlp_output_dir)) + # STEP 4: write the NLP pipeline (now including an EL model) to file + logger.info("Final NLP pipeline has following pipeline components: {}".format(nlp.pipe_names)) + logger.info("STEP 4: Writing trained NLP to {}".format(nlp_output_dir)) nlp.to_disk(nlp_output_dir) logger.info("Done!") -def check_kb(kb): - for mention in ("Bush", "Douglas Adams", "Homer", "Brazil", "China"): - candidates = kb.get_candidates(mention) - - logger.info("generating candidates for " + mention + " :") - for c in candidates: - logger.info(" ".join[ - str(c.prior_prob), - c.alias_, - "-->", - c.entity_ + " (freq=" + str(c.entity_freq) + ")" - ]) - - -def run_el_toy_example(nlp): - text = ( - "In The Hitchhiker's Guide to the Galaxy, written by Douglas Adams, " - "Douglas reminds us to always bring our towel, even in China or Brazil. " - "The main character in Doug's novel is the man Arthur Dent, " - "but Dougledydoug doesn't write about George Washington or Homer Simpson." - ) - doc = nlp(text) - logger.info(text) - for ent in doc.ents: - logger.info(" ".join(["ent", ent.text, ent.label_, ent.kb_id_])) - - if __name__ == "__main__": logging.basicConfig(level=logging.INFO, format=LOG_FORMAT) plac.call(main) diff --git a/bin/wiki_entity_linking/wikipedia_processor.py b/bin/wiki_entity_linking/wikipedia_processor.py index 19df0cf10..315b1e916 100644 --- a/bin/wiki_entity_linking/wikipedia_processor.py +++ b/bin/wiki_entity_linking/wikipedia_processor.py @@ -6,9 +6,6 @@ import bz2 import logging import random import json -from tqdm import tqdm - -from functools import partial from spacy.gold import GoldParse from bin.wiki_entity_linking import wiki_io as io @@ -454,25 +451,40 @@ def _write_training_entities(outputfile, article_id, clean_text, entities): outputfile.write(line) -def read_training(nlp, entity_file_path, dev, limit, kb, labels_discard=None): - """ This method provides training examples that correspond to the entity annotations found by the nlp object. +def read_training_indices(entity_file_path): + """ This method creates two lists of indices into the training file: one with indices for the + training examples, and one for the dev examples.""" + train_indices = [] + dev_indices = [] + + with entity_file_path.open("r", encoding="utf8") as file: + for i, line in enumerate(file): + example = json.loads(line) + article_id = example["article_id"] + clean_text = example["clean_text"] + + if is_valid_article(clean_text): + if is_dev(article_id): + dev_indices.append(i) + else: + train_indices.append(i) + + return train_indices, dev_indices + + +def read_el_docs_golds(nlp, entity_file_path, dev, line_ids, kb, labels_discard=None): + """ This method provides training/dev examples that correspond to the entity annotations found by the nlp object. For training, it will include both positive and negative examples by using the candidate generator from the kb. For testing (kb=None), it will include all positive examples only.""" if not labels_discard: labels_discard = [] - data = [] - num_entities = 0 - get_gold_parse = partial( - _get_gold_parse, dev=dev, kb=kb, labels_discard=labels_discard - ) + texts = [] + entities_list = [] - logger.info( - "Reading {} data with limit {}".format("dev" if dev else "train", limit) - ) with entity_file_path.open("r", encoding="utf8") as file: - with tqdm(total=limit, leave=False) as pbar: - for i, line in enumerate(file): + for i, line in enumerate(file): + if i in line_ids: example = json.loads(line) article_id = example["article_id"] clean_text = example["clean_text"] @@ -481,16 +493,15 @@ def read_training(nlp, entity_file_path, dev, limit, kb, labels_discard=None): if dev != is_dev(article_id) or not is_valid_article(clean_text): continue - doc = nlp(clean_text) - gold = get_gold_parse(doc, entities) - if gold and len(gold.links) > 0: - data.append((doc, gold)) - num_entities += len(gold.links) - pbar.update(len(gold.links)) - if limit and num_entities >= limit: - break - logger.info("Read {} entities in {} articles".format(num_entities, len(data))) - return data + texts.append(clean_text) + entities_list.append(entities) + + docs = nlp.pipe(texts, batch_size=50) + + for doc, entities in zip(docs, entities_list): + gold = _get_gold_parse(doc, entities, dev=dev, kb=kb, labels_discard=labels_discard) + if gold and len(gold.links) > 0: + yield doc, gold def _get_gold_parse(doc, entities, dev, kb, labels_discard): diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx index f57ea59d2..b51520777 100644 --- a/spacy/pipeline/pipes.pyx +++ b/spacy/pipeline/pipes.pyx @@ -1308,7 +1308,7 @@ class EntityLinker(Pipe): for i, doc in enumerate(docs): if len(doc) > 0: # Looping through each sentence and each entity - # This may go wrong if there are entities across sentences - because they might not get a KB ID + # This may go wrong if there are entities across sentences - which shouldn't happen normally. for sent in doc.sents: sent_doc = sent.as_doc() # currently, the context is the same for each entity in a sentence (should be refined) From aef83e80704fe20c68ac3e188ae245c819f04c07 Mon Sep 17 00:00:00 2001 From: adrianeboyd Date: Wed, 8 Jan 2020 12:34:06 +0100 Subject: [PATCH 14/49] Mark most Hungarian tokenizer test cases as slow (#4883) * Mark most Hungarian tokenizer test cases as slow Mark most Hungarian tokenizer test cases as slow to reduce the runtime of the test suite in ordinary usage: * for normal tests: run default tests plus 10% of the detailed tests * for slow tests: run all tests * Rework to mark individual tests as slow --- spacy/tests/lang/hu/test_tokenizer.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/spacy/tests/lang/hu/test_tokenizer.py b/spacy/tests/lang/hu/test_tokenizer.py index fa8e132c0..2fceece49 100644 --- a/spacy/tests/lang/hu/test_tokenizer.py +++ b/spacy/tests/lang/hu/test_tokenizer.py @@ -296,9 +296,8 @@ WIKI_TESTS = [ ("cérium(IV)-oxid", ["cérium", "(", "IV", ")", "-oxid"]), ] -TESTCASES = ( - DEFAULT_TESTS - + DOT_TESTS +EXTRA_TESTS = ( + DOT_TESTS + QUOTE_TESTS + NUMBER_TESTS + HYPHEN_TESTS @@ -306,8 +305,16 @@ TESTCASES = ( + TYPO_TESTS ) +# normal: default tests + 10% of extra tests +TESTS = DEFAULT_TESTS +TESTS.extend([x for i, x in enumerate(EXTRA_TESTS) if i % 10 == 0]) -@pytest.mark.parametrize("text,expected_tokens", TESTCASES) +# slow: remaining 90% of extra tests +SLOW_TESTS = [x for i, x in enumerate(EXTRA_TESTS) if i % 10 != 0] +TESTS.extend([pytest.param(x[0], x[1], marks=pytest.mark.slow()) if not isinstance(x[0], tuple) else x for x in SLOW_TESTS]) + + +@pytest.mark.parametrize("text,expected_tokens", TESTS) def test_hu_tokenizer_handles_testcases(hu_tokenizer, text, expected_tokens): tokens = hu_tokenizer(text) token_list = [token.text for token in tokens if not token.is_space] From b216ff43c971adc7ce0a49ec3ced3ec5b2dfd20d Mon Sep 17 00:00:00 2001 From: Preston Badeer <467756+pbadeer@users.noreply.github.com> Date: Wed, 8 Jan 2020 09:49:40 -0600 Subject: [PATCH 15/49] Update vectors-similarity.md (#4889) These links are broken on the website, due to quotes around the URLs. --- website/docs/usage/vectors-similarity.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/docs/usage/vectors-similarity.md b/website/docs/usage/vectors-similarity.md index 0bb79779e..93ba67704 100644 --- a/website/docs/usage/vectors-similarity.md +++ b/website/docs/usage/vectors-similarity.md @@ -229,10 +229,10 @@ For more details on **adding hooks** and **overwriting** the built-in `Doc`, If you're using a GPU, it's much more efficient to keep the word vectors on the device. You can do that by setting the [`Vectors.data`](/api/vectors#attributes) attribute to a `cupy.ndarray` object if you're using spaCy or -[Chainer]("https://chainer.org"), or a `torch.Tensor` object if you're using -[PyTorch]("http://pytorch.org"). The `data` object just needs to support +[Chainer](https://chainer.org), or a `torch.Tensor` object if you're using +[PyTorch](http://pytorch.org). The `data` object just needs to support `__iter__` and `__getitem__`, so if you're using another library such as -[TensorFlow]("https://www.tensorflow.org"), you could also create a wrapper for +[TensorFlow](https://www.tensorflow.org), you could also create a wrapper for your vectors data. ```python From d24bca62f6d4c4af87bf8b904be0af17382ae673 Mon Sep 17 00:00:00 2001 From: adrianeboyd Date: Wed, 8 Jan 2020 16:50:19 +0100 Subject: [PATCH 16/49] Add CJK to character classes (#4884) * Add CJK character class as uncased * Incorporate Chinese URL test case Un-xfail Chinese URL test instance --- spacy/lang/char_classes.py | 12 ++++++++++++ spacy/tests/tokenizer/test_urls.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/spacy/lang/char_classes.py b/spacy/lang/char_classes.py index 2c8823867..bd0f7e437 100644 --- a/spacy/lang/char_classes.py +++ b/spacy/lang/char_classes.py @@ -17,6 +17,17 @@ _tamil = r"\u0B80-\u0BFF" _telugu = r"\u0C00-\u0C7F" +# from the final table in: https://en.wikipedia.org/wiki/CJK_Unified_Ideographs +_cjk = ( + r"\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF" + r"\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF" + r"\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF" + r"\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F" + r"\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF" + r"\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF" + r"\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F" +) + # Latin standard _latin_u_standard = r"A-Z" _latin_l_standard = r"a-z" @@ -215,6 +226,7 @@ _uncased = ( + _tamil + _telugu + _hangul + + _cjk ) ALPHA = group_chars(LATIN + _russian + _tatar + _greek + _ukrainian + _uncased) diff --git a/spacy/tests/tokenizer/test_urls.py b/spacy/tests/tokenizer/test_urls.py index ef99484ee..58e9d73f3 100644 --- a/spacy/tests/tokenizer/test_urls.py +++ b/spacy/tests/tokenizer/test_urls.py @@ -67,7 +67,7 @@ URLS_SHOULD_MATCH = [ "http://✪df.ws/123", "http://➡.ws/䨹", "http://مثال.إختبار", - pytest.param("http://例子.测试", marks=pytest.mark.xfail()), + "http://例子.测试", "http://उदाहरण.परीक्षा", ] From c70ccd543d562c76665c617ac3ed4ba3ebac0052 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Tue, 14 Jan 2020 01:51:14 +0100 Subject: [PATCH 17/49] Friendly error warning for NEL example script (#4881) * make model positional arg and raise error if no vectors * small doc fixes --- bin/wiki_entity_linking/README.md | 1 + .../wikidata_pretrain_kb.py | 2 +- examples/training/pretrain_kb.py | 35 ++++++++----------- 3 files changed, 17 insertions(+), 21 deletions(-) diff --git a/bin/wiki_entity_linking/README.md b/bin/wiki_entity_linking/README.md index 7460a455e..56d0c1415 100644 --- a/bin/wiki_entity_linking/README.md +++ b/bin/wiki_entity_linking/README.md @@ -17,6 +17,7 @@ Run `wikipedia_pretrain_kb.py` Quick testing and rerunning: * When trying out the pipeline for a quick test, set `limit_prior` (`-lp`), `limit_train` (`-lt`) and/or `limit_wd` (`-lw`) to read only parts of the dumps instead of everything. + * e.g. set `-lt 20000 -lp 2000 -lw 3000 -f 1` * If you only want to (re)run certain parts of the pipeline, just remove the corresponding files and they will be recalculated or reparsed. diff --git a/bin/wiki_entity_linking/wikidata_pretrain_kb.py b/bin/wiki_entity_linking/wikidata_pretrain_kb.py index 940607b72..003074feb 100644 --- a/bin/wiki_entity_linking/wikidata_pretrain_kb.py +++ b/bin/wiki_entity_linking/wikidata_pretrain_kb.py @@ -40,7 +40,7 @@ logger = logging.getLogger(__name__) loc_prior_prob=("Location to file with prior probabilities", "option", "p", Path), loc_entity_defs=("Location to file with entity definitions", "option", "d", Path), loc_entity_desc=("Location to file with entity descriptions", "option", "s", Path), - descr_from_wp=("Flag for using wp descriptions not wd", "flag", "wp"), + descr_from_wp=("Flag for using descriptions from WP instead of WD (default False)", "flag", "wp"), limit_prior=("Threshold to limit lines read from WP for prior probabilities", "option", "lp", int), limit_train=("Threshold to limit lines read from WP for training set", "option", "lt", int), limit_wd=("Threshold to limit lines read from WD", "option", "lw", int), diff --git a/examples/training/pretrain_kb.py b/examples/training/pretrain_kb.py index db6442ad4..54c68f653 100644 --- a/examples/training/pretrain_kb.py +++ b/examples/training/pretrain_kb.py @@ -32,27 +32,24 @@ DESC_WIDTH = 64 # dimension of output entity vectors @plac.annotations( - vocab_path=("Path to the vocab for the kb", "option", "v", Path), - model=("Model name, should have pretrained word embeddings", "option", "m", str), + model=("Model name, should have pretrained word embeddings", "positional", None, str), output_dir=("Optional output directory", "option", "o", Path), n_iter=("Number of training iterations", "option", "n", int), ) -def main(vocab_path=None, model=None, output_dir=None, n_iter=50): +def main(model=None, output_dir=None, n_iter=50): """Load the model, create the KB and pretrain the entity encodings. - Either an nlp model or a vocab is needed to provide access to pretrained word embeddings. If an output_dir is provided, the KB will be stored there in a file 'kb'. - When providing an nlp model, the updated vocab will also be written to a directory in the output_dir.""" - if model is None and vocab_path is None: - raise ValueError("Either the `nlp` model or the `vocab` should be specified.") + The updated vocab will also be written to a directory in the output_dir.""" - if model is not None: - nlp = spacy.load(model) # load existing spaCy model - print("Loaded model '%s'" % model) - else: - vocab = Vocab().from_disk(vocab_path) - # create blank Language class with specified vocab - nlp = spacy.blank("en", vocab=vocab) - print("Created blank 'en' model with vocab from '%s'" % vocab_path) + nlp = spacy.load(model) # load existing spaCy model + print("Loaded model '%s'" % model) + + # check the length of the nlp vectors + if "vectors" not in nlp.meta or not nlp.vocab.vectors.size: + raise ValueError( + "The `nlp` object should have access to pretrained word vectors, " + " cf. https://spacy.io/usage/models#languages." + ) kb = KnowledgeBase(vocab=nlp.vocab) @@ -103,11 +100,9 @@ def main(vocab_path=None, model=None, output_dir=None, n_iter=50): print() print("Saved KB to", kb_path) - # only storing the vocab if we weren't already reading it from file - if not vocab_path: - vocab_path = output_dir / "vocab" - kb.vocab.to_disk(vocab_path) - print("Saved vocab to", vocab_path) + vocab_path = output_dir / "vocab" + kb.vocab.to_disk(vocab_path) + print("Saved vocab to", vocab_path) print() From ee828d5a9ad18b65b6845244329f4225340271a6 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Tue, 14 Jan 2020 09:02:58 +0100 Subject: [PATCH 18/49] bugfix typo conv_window --- spacy/syntax/nn_parser.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 8493140b8..153ca67cd 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -57,7 +57,7 @@ cdef class Parser: subword_features = util.env_opt('subword_features', cfg.get('subword_features', True)) conv_depth = util.env_opt('conv_depth', cfg.get('conv_depth', 4)) - conv_window = util.env_opt('conv_window', cfg.get('conv_depth', 1)) + conv_window = util.env_opt('conv_window', cfg.get('conv_window', 1)) t2v_pieces = util.env_opt('cnn_maxout_pieces', cfg.get('cnn_maxout_pieces', 3)) bilstm_depth = util.env_opt('bilstm_depth', cfg.get('bilstm_depth', 0)) self_attn_depth = util.env_opt('self_attn_depth', cfg.get('self_attn_depth', 0)) From 718704022a0a9f9ee28f37539dbf26a06b78c072 Mon Sep 17 00:00:00 2001 From: Bram Vanroy Date: Thu, 16 Jan 2020 01:56:39 +0100 Subject: [PATCH 19/49] Changes to spacy_conll in universe (#4914) * Update information on spacy_conll * Typo fix --- website/meta/universe.json | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/website/meta/universe.json b/website/meta/universe.json index 67da8c828..54eab8640 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -1509,28 +1509,30 @@ { "id": "spacy-conll", "title": "spacy_conll", - "slogan": "Parse text with spaCy and print the output in CoNLL-U format", - "description": "This module allows you to parse a text to CoNLL-U format. You can use it as a command line tool, or embed it in your own scripts.", + "slogan": "Parse text with spaCy and gets its output in CoNLL-U format", + "description": "This module allows you to parse a text to CoNLL-U format. It contains a pipeline component for spaCy that adds CoNLL-U properties to a Doc and its sentences. It can also be used as a command-line tool.", "code_example": [ - "from spacy_conll import Spacy2ConllParser", - "spacyconll = Spacy2ConllParser()", + "import spacy", + "from spacy_conll import ConllFormatter", "", - "# `parse` returns a generator of the parsed sentences", - "for parsed_sent in spacyconll.parse(input_str='I like cookies.\nWhat about you?\nI don't like 'em!'):", - " do_something_(parsed_sent)", - "", - "# `parseprint` prints output to stdout (default) or a file (use `output_file` parameter)", - "# This method is called when using the command line", - "spacyconll.parseprint(input_str='I like cookies.')" + "nlp = spacy.load('en')", + "conllformatter = ConllFormatter(nlp)", + "nlp.add_pipe(conllformatter, after='parser')", + "doc = nlp('I like cookies. Do you?')", + "conll = doc._.conll", + "print(doc._.conll_str_headers)", + "print(doc._.conll_str)" ], "code_language": "python", "author": "Bram Vanroy", "author_links": { - "github": "BramVanroy", + "github": "BramVanroy", + "twitter": "BramVanroy", "website": "https://bramvanroy.be" }, "github": "BramVanroy/spacy_conll", - "category": ["standalone"] + "category": ["standalone", "pipeline"], + "tags": ["linguistics", "computational linguistics", "conll"] }, { "id": "spacy-langdetect", From 90c52128dc5f8131affa7710742e9081cdcaf476 Mon Sep 17 00:00:00 2001 From: adrianeboyd Date: Thu, 16 Jan 2020 01:58:51 +0100 Subject: [PATCH 20/49] Improve train CLI with base model (#4911) Improve train CLI with a provided base model so that you can: * add a new component * extend an existing component * replace an existing component When the final model and best model are saved, reenable any disabled components and merge the meta information to include the full pipeline and accuracy information for all components in the base model plus the newly added components if needed. --- spacy/cli/train.py | 63 +++++++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/spacy/cli/train.py b/spacy/cli/train.py index 7c4298ca3..7de1d445d 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -30,6 +30,7 @@ from .. import about raw_text=("Path to jsonl file with unlabelled text documents.", "option", "rt", Path), base_model=("Name of model to update (optional)", "option", "b", str), pipeline=("Comma-separated names of pipeline components", "option", "p", str), + replace_components=("Replace components from base model", "flag", "R", bool), vectors=("Model to load vectors from", "option", "v", str), n_iter=("Number of iterations", "option", "n", int), n_early_stopping=("Maximum number of training epochs without dev accuracy improvement", "option", "ne", int), @@ -60,6 +61,7 @@ def train( raw_text=None, base_model=None, pipeline="tagger,parser,ner", + replace_components=False, vectors=None, n_iter=30, n_early_stopping=None, @@ -142,6 +144,8 @@ def train( # the model and make sure the pipeline matches the pipeline setting. If # training starts from a blank model, intitalize the language class. pipeline = [p.strip() for p in pipeline.split(",")] + disabled_pipes = None + pipes_added = False msg.text("Training pipeline: {}".format(pipeline)) if base_model: msg.text("Starting with base model '{}'".format(base_model)) @@ -152,20 +156,24 @@ def train( "`lang` argument ('{}') ".format(nlp.lang, lang), exits=1, ) - nlp.disable_pipes([p for p in nlp.pipe_names if p not in pipeline]) for pipe in pipeline: + pipe_cfg = {} + if pipe == "parser": + pipe_cfg = {"learn_tokens": learn_tokens} + elif pipe == "textcat": + pipe_cfg = { + "exclusive_classes": not textcat_multilabel, + "architecture": textcat_arch, + "positive_label": textcat_positive_label, + } if pipe not in nlp.pipe_names: - if pipe == "parser": - pipe_cfg = {"learn_tokens": learn_tokens} - elif pipe == "textcat": - pipe_cfg = { - "exclusive_classes": not textcat_multilabel, - "architecture": textcat_arch, - "positive_label": textcat_positive_label, - } - else: - pipe_cfg = {} + msg.text("Adding component to base model '{}'".format(pipe)) nlp.add_pipe(nlp.create_pipe(pipe, config=pipe_cfg)) + pipes_added = True + elif replace_components: + msg.text("Replacing component from base model '{}'".format(pipe)) + nlp.replace_pipe(pipe, nlp.create_pipe(pipe, config=pipe_cfg)) + pipes_added = True else: if pipe == "textcat": textcat_cfg = nlp.get_pipe("textcat").cfg @@ -174,11 +182,6 @@ def train( "architecture": textcat_cfg["architecture"], "positive_label": textcat_cfg["positive_label"], } - pipe_cfg = { - "exclusive_classes": not textcat_multilabel, - "architecture": textcat_arch, - "positive_label": textcat_positive_label, - } if base_cfg != pipe_cfg: msg.fail( "The base textcat model configuration does" @@ -188,6 +191,8 @@ def train( ), exits=1, ) + msg.text("Extending component from base model '{}'".format(pipe)) + disabled_pipes = nlp.disable_pipes([p for p in nlp.pipe_names if p not in pipeline]) else: msg.text("Starting with blank model '{}'".format(lang)) lang_cls = util.get_lang_class(lang) @@ -227,7 +232,7 @@ def train( corpus = GoldCorpus(train_path, dev_path, limit=n_examples) n_train_words = corpus.count_train() - if base_model: + if base_model and not pipes_added: # Start with an existing model, use default optimizer optimizer = create_default_optimizer(Model.ops) else: @@ -243,7 +248,7 @@ def train( # Verify textcat config if "textcat" in pipeline: - textcat_labels = nlp.get_pipe("textcat").cfg["labels"] + textcat_labels = nlp.get_pipe("textcat").cfg.get("labels", []) if textcat_positive_label and textcat_positive_label not in textcat_labels: msg.fail( "The textcat_positive_label (tpl) '{}' does not match any " @@ -426,11 +431,16 @@ def train( "cpu": cpu_wps, "gpu": gpu_wps, } - meta["accuracy"] = scorer.scores + meta.setdefault("accuracy", {}) + for component in nlp.pipe_names: + for metric in _get_metrics(component): + meta["accuracy"][metric] = scorer.scores[metric] else: meta.setdefault("beam_accuracy", {}) meta.setdefault("beam_speed", {}) - meta["beam_accuracy"][beam_width] = scorer.scores + for component in nlp.pipe_names: + for metric in _get_metrics(component): + meta["beam_accuracy"][metric] = scorer.scores[metric] meta["beam_speed"][beam_width] = { "nwords": nwords, "cpu": cpu_wps, @@ -486,12 +496,16 @@ def train( ) break finally: + best_pipes = nlp.pipe_names + if disabled_pipes: + disabled_pipes.restore() with nlp.use_params(optimizer.averages): final_model_path = output_path / "model-final" nlp.to_disk(final_model_path) + final_meta = srsly.read_json(output_path / "model-final" / "meta.json") msg.good("Saved model to output directory", final_model_path) with msg.loading("Creating best model..."): - best_model_path = _collate_best_model(meta, output_path, nlp.pipe_names) + best_model_path = _collate_best_model(final_meta, output_path, best_pipes) msg.good("Created best model", best_model_path) @@ -549,6 +563,7 @@ def _load_pretrained_tok2vec(nlp, loc): def _collate_best_model(meta, output_path, components): bests = {} + meta.setdefault("accuracy", {}) for component in components: bests[component] = _find_best(output_path, component) best_dest = output_path / "model-best" @@ -580,11 +595,13 @@ def _find_best(experiment_dir, component): def _get_metrics(component): if component == "parser": - return ("las", "uas", "token_acc") + return ("las", "uas", "las_per_type", "token_acc") elif component == "tagger": return ("tags_acc",) elif component == "ner": - return ("ents_f", "ents_p", "ents_r") + return ("ents_f", "ents_p", "ents_r", "ents_per_type") + elif component == "textcat": + return ("textcat_score",) return ("token_acc",) From fbfc418745f3ce7ca807672f8b3afacdd199dbd2 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Thu, 16 Jan 2020 02:01:23 +0100 Subject: [PATCH 21/49] run normal textcat train script with transformers (#4834) * keep trf tok2vec and wordpiecer components during update * also support transformer models for other example scripts --- examples/training/pretrain_textcat.py | 3 ++- examples/training/rehearsal.py | 3 ++- examples/training/train_entity_linker.py | 3 ++- examples/training/train_intent_parser.py | 3 ++- examples/training/train_ner.py | 3 ++- examples/training/train_new_entity_type.py | 3 ++- examples/training/train_parser.py | 3 ++- examples/training/train_textcat.py | 3 ++- 8 files changed, 16 insertions(+), 8 deletions(-) diff --git a/examples/training/pretrain_textcat.py b/examples/training/pretrain_textcat.py index 00cbd992c..f3e493f6a 100644 --- a/examples/training/pretrain_textcat.py +++ b/examples/training/pretrain_textcat.py @@ -131,7 +131,8 @@ def train_textcat(nlp, n_texts, n_iter=10): train_data = list(zip(train_texts, [{"cats": cats} for cats in train_cats])) # get names of other pipes to disable them during training - other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"] + pipe_exceptions = ["textcat", "trf_wordpiecer", "trf_tok2vec"] + other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions] with nlp.disable_pipes(*other_pipes): # only train textcat optimizer = nlp.begin_training() textcat.model.tok2vec.from_bytes(tok2vec_weights) diff --git a/examples/training/rehearsal.py b/examples/training/rehearsal.py index 123f5049d..9ece91427 100644 --- a/examples/training/rehearsal.py +++ b/examples/training/rehearsal.py @@ -63,7 +63,8 @@ def main(model_name, unlabelled_loc): optimizer.b2 = 0.0 # get names of other pipes to disable them during training - other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"] + pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"] + other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions] sizes = compounding(1.0, 4.0, 1.001) with nlp.disable_pipes(*other_pipes): for itn in range(n_iter): diff --git a/examples/training/train_entity_linker.py b/examples/training/train_entity_linker.py index df8b59db1..dd7c3a1b2 100644 --- a/examples/training/train_entity_linker.py +++ b/examples/training/train_entity_linker.py @@ -113,7 +113,8 @@ def main(kb_path, vocab_path=None, output_dir=None, n_iter=50): TRAIN_DOCS.append((doc, annotation_clean)) # get names of other pipes to disable them during training - other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "entity_linker"] + pipe_exceptions = ["entity_linker", "trf_wordpiecer", "trf_tok2vec"] + other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions] with nlp.disable_pipes(*other_pipes): # only train entity linker # reset and initialize the weights randomly optimizer = nlp.begin_training() diff --git a/examples/training/train_intent_parser.py b/examples/training/train_intent_parser.py index 08d06bd4c..d2472b6b9 100644 --- a/examples/training/train_intent_parser.py +++ b/examples/training/train_intent_parser.py @@ -124,7 +124,8 @@ def main(model=None, output_dir=None, n_iter=15): for dep in annotations.get("deps", []): parser.add_label(dep) - other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "parser"] + pipe_exceptions = ["parser", "trf_wordpiecer", "trf_tok2vec"] + other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions] with nlp.disable_pipes(*other_pipes): # only train parser optimizer = nlp.begin_training() for itn in range(n_iter): diff --git a/examples/training/train_ner.py b/examples/training/train_ner.py index 49c25654c..01bb6a67b 100644 --- a/examples/training/train_ner.py +++ b/examples/training/train_ner.py @@ -55,7 +55,8 @@ def main(model=None, output_dir=None, n_iter=100): ner.add_label(ent[2]) # get names of other pipes to disable them during training - other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"] + pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"] + other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions] with nlp.disable_pipes(*other_pipes): # only train NER # reset and initialize the weights randomly – but only if we're # training a new model diff --git a/examples/training/train_new_entity_type.py b/examples/training/train_new_entity_type.py index e3a76f0c0..72d33ad50 100644 --- a/examples/training/train_new_entity_type.py +++ b/examples/training/train_new_entity_type.py @@ -95,7 +95,8 @@ def main(model=None, new_model_name="animal", output_dir=None, n_iter=30): optimizer = nlp.resume_training() move_names = list(ner.move_names) # get names of other pipes to disable them during training - other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"] + pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"] + other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions] with nlp.disable_pipes(*other_pipes): # only train NER sizes = compounding(1.0, 4.0, 1.001) # batch up the examples using spaCy's minibatch diff --git a/examples/training/train_parser.py b/examples/training/train_parser.py index aa60af00b..c5adb0dec 100644 --- a/examples/training/train_parser.py +++ b/examples/training/train_parser.py @@ -65,7 +65,8 @@ def main(model=None, output_dir=None, n_iter=15): parser.add_label(dep) # get names of other pipes to disable them during training - other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "parser"] + pipe_exceptions = ["parser", "trf_wordpiecer", "trf_tok2vec"] + other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions] with nlp.disable_pipes(*other_pipes): # only train parser optimizer = nlp.begin_training() for itn in range(n_iter): diff --git a/examples/training/train_textcat.py b/examples/training/train_textcat.py index 4d4ebf396..456ef098c 100644 --- a/examples/training/train_textcat.py +++ b/examples/training/train_textcat.py @@ -67,7 +67,8 @@ def main(model=None, output_dir=None, n_iter=20, n_texts=2000, init_tok2vec=None train_data = list(zip(train_texts, [{"cats": cats} for cats in train_cats])) # get names of other pipes to disable them during training - other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"] + pipe_exceptions = ["textcat", "trf_wordpiecer", "trf_tok2vec"] + other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions] with nlp.disable_pipes(*other_pipes): # only train textcat optimizer = nlp.begin_training() if init_tok2vec is not None: From b9afcd56e3532125ec15f7d1f0825608c04835e3 Mon Sep 17 00:00:00 2001 From: Kabir Khan Date: Wed, 15 Jan 2020 17:01:31 -0800 Subject: [PATCH 22/49] Fix ent_ids and labels properties when id attribute used in patterns (#4900) * Fix ent_ids and labels properties when id attribute used in patterns * use set for labels * sort end_ids for comparison in entity_ruler tests * fixing entity_ruler ent_ids test * add to set --- spacy/pipeline/entityruler.py | 19 +++++++++++++++---- spacy/tests/pipeline/test_entity_ruler.py | 12 ++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py index 2db312d64..1c8429049 100644 --- a/spacy/pipeline/entityruler.py +++ b/spacy/pipeline/entityruler.py @@ -129,20 +129,31 @@ class EntityRuler(object): DOCS: https://spacy.io/api/entityruler#labels """ - all_labels = set(self.token_patterns.keys()) - all_labels.update(self.phrase_patterns.keys()) + keys = set(self.token_patterns.keys()) + keys.update(self.phrase_patterns.keys()) + all_labels = set() + + for l in keys: + if self.ent_id_sep in l: + label, _ = self._split_label(l) + all_labels.add(label) + else: + all_labels.add(l) return tuple(all_labels) @property def ent_ids(self): - """All entity ids present in the match patterns `id` properties. + """All entity ids present in the match patterns `id` properties RETURNS (set): The string entity ids. DOCS: https://spacy.io/api/entityruler#ent_ids """ + keys = set(self.token_patterns.keys()) + keys.update(self.phrase_patterns.keys()) all_ent_ids = set() - for l in self.labels: + + for l in keys: if self.ent_id_sep in l: _, ent_id = self._split_label(l) all_ent_ids.add(ent_id) diff --git a/spacy/tests/pipeline/test_entity_ruler.py b/spacy/tests/pipeline/test_entity_ruler.py index 660ad3b28..3b46baa9b 100644 --- a/spacy/tests/pipeline/test_entity_ruler.py +++ b/spacy/tests/pipeline/test_entity_ruler.py @@ -21,6 +21,7 @@ def patterns(): {"label": "HELLO", "pattern": [{"ORTH": "HELLO"}]}, {"label": "COMPLEX", "pattern": [{"ORTH": "foo", "OP": "*"}]}, {"label": "TECH_ORG", "pattern": "Apple", "id": "a1"}, + {"label": "TECH_ORG", "pattern": "Microsoft", "id": "a2"}, ] @@ -147,3 +148,14 @@ def test_entity_ruler_validate(nlp): # invalid pattern raises error with validate with pytest.raises(MatchPatternError): validated_ruler.add_patterns([invalid_pattern]) + + +def test_entity_ruler_properties(nlp, patterns): + ruler = EntityRuler(nlp, patterns=patterns, overwrite_ents=True) + assert sorted(ruler.labels) == sorted([ + "HELLO", + "BYE", + "COMPLEX", + "TECH_ORG" + ]) + assert sorted(ruler.ent_ids) == ["a1", "a2"] From 708a4d27ebd47fde3dd15f0530fdebc4cf40dd01 Mon Sep 17 00:00:00 2001 From: Yohei Tamura Date: Mon, 20 Jan 2020 20:17:46 +0900 Subject: [PATCH 23/49] fix nlp.evaluate (#4924) (#4925) * new file: test_issue4924.py * modified: spacy/gold.pyx * modified: test_issue4924.py for python2 --- spacy/gold.pyx | 5 +++++ spacy/tests/regression/test_issue4924.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 spacy/tests/regression/test_issue4924.py diff --git a/spacy/gold.pyx b/spacy/gold.pyx index 1d7f80c92..3884e1cba 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -694,6 +694,11 @@ cdef class GoldParse: self.cats = {} if cats is None else dict(cats) self.links = links + # orig_annot is used as an iterator in `nlp.evalate` even if self.length == 0, + # so set a empty list to avoid error. + # if self.lenght > 0, this is modified latter. + self.orig_annot = [] + # avoid allocating memory if the doc does not contain any tokens if self.length > 0: if words is None: diff --git a/spacy/tests/regression/test_issue4924.py b/spacy/tests/regression/test_issue4924.py new file mode 100644 index 000000000..8aea2c3d5 --- /dev/null +++ b/spacy/tests/regression/test_issue4924.py @@ -0,0 +1,16 @@ +# coding: utf8 +from __future__ import unicode_literals + +import pytest + +import spacy + + +@pytest.fixture +def nlp(): + return spacy.blank("en") + + +def test_evaluate(nlp): + docs_golds = [("", {})] + nlp.evaluate(docs_golds) From 7ad000fce7824f237feec20e577f14c1c3a4a755 Mon Sep 17 00:00:00 2001 From: adrianeboyd Date: Mon, 20 Jan 2020 17:02:47 +0100 Subject: [PATCH 24/49] Update docs for train CLI --use_gpu option (#4927) --- website/docs/api/cli.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md index 94bedd889..2f7346491 100644 --- a/website/docs/api/cli.md +++ b/website/docs/api/cli.md @@ -372,7 +372,7 @@ $ python -m spacy train [lang] [output_path] [train_path] [dev_path] | `--n-iter`, `-n` | option | Number of iterations (default: `30`). | | `--n-early-stopping`, `-ne` | option | Maximum number of training epochs without dev accuracy improvement. | | `--n-examples`, `-ns` | option | Number of examples to use (defaults to `0` for all examples). | -| `--use-gpu`, `-g` | option | Whether to use GPU. Can be either `0`, `1` or `-1`. | +| `--use-gpu`, `-g` | option | GPU ID or `-1` for CPU only (default: `-1`). | | `--version`, `-V` | option | Model version. Will be written out to the model's `meta.json` after training. | | `--meta-path`, `-m` 2 | option | Optional path to model [`meta.json`](/usage/training#models-generating). All relevant properties like `lang`, `pipeline` and `spacy_version` will be overwritten. | | `--init-tok2vec`, `-t2v` 2.1 | option | Path to pretrained weights for the token-to-vector parts of the models. See `spacy pretrain`. Experimental. | From a938566b62fe74c6bacac55e658cd271cc5d1e29 Mon Sep 17 00:00:00 2001 From: adrianeboyd Date: Tue, 28 Jan 2020 11:36:49 +0100 Subject: [PATCH 25/49] Fix Sentencizer.pipe() for empty doc (#4940) --- spacy/pipeline/pipes.pyx | 25 ++++++++++++------------ spacy/tests/pipeline/test_sentencizer.py | 16 +++++++++++++++ 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx index b51520777..b4fecf5cb 100644 --- a/spacy/pipeline/pipes.pyx +++ b/spacy/pipeline/pipes.pyx @@ -1492,20 +1492,21 @@ class Sentencizer(object): return guesses guesses = [] for doc in docs: - start = 0 - seen_period = False doc_guesses = [False] * len(doc) - doc_guesses[0] = True - for i, token in enumerate(doc): - is_in_punct_chars = token.text in self.punct_chars - if seen_period and not token.is_punct and not is_in_punct_chars: + if len(doc) > 0: + start = 0 + seen_period = False + doc_guesses[0] = True + for i, token in enumerate(doc): + is_in_punct_chars = token.text in self.punct_chars + if seen_period and not token.is_punct and not is_in_punct_chars: + doc_guesses[start] = True + start = token.i + seen_period = False + elif is_in_punct_chars: + seen_period = True + if start < len(doc): doc_guesses[start] = True - start = token.i - seen_period = False - elif is_in_punct_chars: - seen_period = True - if start < len(doc): - doc_guesses[start] = True guesses.append(doc_guesses) return guesses diff --git a/spacy/tests/pipeline/test_sentencizer.py b/spacy/tests/pipeline/test_sentencizer.py index 359552c5b..d690958cc 100644 --- a/spacy/tests/pipeline/test_sentencizer.py +++ b/spacy/tests/pipeline/test_sentencizer.py @@ -29,6 +29,22 @@ def test_sentencizer_pipe(): assert len(list(doc.sents)) == 2 +def test_sentencizer_empty_docs(): + one_empty_text = [""] + many_empty_texts = ["", "", ""] + some_empty_texts = ["hi", "", "This is a test. Here are two sentences.", ""] + nlp = English() + nlp.add_pipe(nlp.create_pipe("sentencizer")) + for texts in [one_empty_text, many_empty_texts, some_empty_texts]: + for doc in nlp.pipe(texts): + assert doc.is_sentenced + sent_starts = [t.is_sent_start for t in doc] + if len(doc) == 0: + assert sent_starts == [] + else: + assert len(sent_starts) > 0 + + @pytest.mark.parametrize( "words,sent_starts,n_sents", [ From 49fefb6139ef96dc154f9bb4a53917716f785622 Mon Sep 17 00:00:00 2001 From: Paco Nathan <57973+ceteri@users.noreply.github.com> Date: Tue, 28 Jan 2020 02:37:54 -0800 Subject: [PATCH 26/49] Submitting `PyTextRank` for inclusion in the spaCy uniVerse (#4942) * submitting PyTextRank for consideration of including in the spaCy uniVerse * including SCA --- .github/contributors/ceteri.md | 106 +++++++++++++++++++++++++++++++++ website/meta/universe.json | 37 ++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 .github/contributors/ceteri.md diff --git a/.github/contributors/ceteri.md b/.github/contributors/ceteri.md new file mode 100644 index 000000000..f29bfd346 --- /dev/null +++ b/.github/contributors/ceteri.md @@ -0,0 +1,106 @@ +# spaCy contributor agreement + +This spaCy Contributor Agreement (**"SCA"**) is based on the +[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf). +The SCA applies to any contribution that you make to any product or project +managed by us (the **"project"**), and sets out the intellectual property rights +you grant to us in the contributed materials. The term **"us"** shall mean +[ExplosionAI GmbH](https://explosion.ai/legal). The term +**"you"** shall mean the person or entity identified below. + +If you agree to be bound by these terms, fill in the information requested +below and include the filled-in version with your first pull request, under the +folder [`.github/contributors/`](/.github/contributors/). The name of the file +should be your GitHub username, with the extension `.md`. For example, the user +example_user would create the file `.github/contributors/example_user.md`. + +Read this agreement carefully before signing. These terms and conditions +constitute a binding legal agreement. + +## Contributor Agreement + +1. The term "contribution" or "contributed materials" means any source code, +object code, patch, tool, sample, graphic, specification, manual, +documentation, or any other material posted or submitted by you to the project. + +2. With respect to any worldwide copyrights, or copyright applications and +registrations, in your contribution: + + * you hereby assign to us joint ownership, and to the extent that such + assignment is or becomes invalid, ineffective or unenforceable, you hereby + grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, + royalty-free, unrestricted license to exercise all rights under those + copyrights. This includes, at our option, the right to sublicense these same + rights to third parties through multiple levels of sublicensees or other + licensing arrangements; + + * you agree that each of us can do all things in relation to your + contribution as if each of us were the sole owners, and if one of us makes + a derivative work of your contribution, the one who makes the derivative + work (or has it made will be the sole owner of that derivative work; + + * you agree that you will not assert any moral rights in your contribution + against us, our licensees or transferees; + + * you agree that we may register a copyright in your contribution and + exercise all ownership rights associated with it; and + + * you agree that neither of us has any duty to consult with, obtain the + consent of, pay or render an accounting to the other for any use or + distribution of your contribution. + +3. With respect to any patents you own, or that you can license without payment +to any third party, you hereby grant to us a perpetual, irrevocable, +non-exclusive, worldwide, no-charge, royalty-free license to: + + * make, have made, use, sell, offer to sell, import, and otherwise transfer + your contribution in whole or in part, alone or in combination with or + included in any product, work or materials arising out of the project to + which your contribution was submitted, and + + * at our option, to sublicense these same rights to third parties through + multiple levels of sublicensees or other licensing arrangements. + +4. Except as set out above, you keep all right, title, and interest in your +contribution. The rights that you grant to us under these terms are effective +on the date you first submitted a contribution to us, even if your submission +took place before the date you sign these terms. + +5. You covenant, represent, warrant and agree that: + + * Each contribution that you submit is and shall be an original work of + authorship and you can legally grant the rights set out in this SCA; + + * to the best of your knowledge, each contribution will not violate any + third party's copyrights, trademarks, patents, or other intellectual + property rights; and + + * each contribution shall be in compliance with U.S. export control laws and + other applicable export and import laws. You agree to notify us if you + become aware of any circumstance which would make any of the foregoing + representations inaccurate in any respect. We may publicly disclose your + participation in the project, including the fact that you have signed the SCA. + +6. This SCA is governed by the laws of the State of California and applicable +U.S. Federal law. Any choice of law rules will not apply. + +7. Please place an “x” on one of the applicable statement below. Please do NOT +mark both statements: + + * [ ] I am signing on behalf of myself as an individual and no other person + or entity, including my employer, has or will have rights with respect to my + contributions. + + * [x] I am signing on behalf of my employer or a legal entity and I have the + actual authority to contractually bind that entity. + +## Contributor Details + +| Field | Entry | +|------------------------------- | ---------------------- | +| Name | Paco Nathan | +| Company name (if applicable) | Derwen, Inc. | +| Title or role (if applicable) | Managing Partner | +| Date | 2020-01-25 | +| GitHub username | ceteri | +| Website (optional) | https://derwen.ai/paco | diff --git a/website/meta/universe.json b/website/meta/universe.json index 54eab8640..2b5f983fa 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -1903,6 +1903,43 @@ "twitter": "PatadiaYash", "github": "yash1994" } + }, + { + "id": "spacy-pytextrank", + "title": "PyTextRank", + "slogan": "Py impl of TextRank for lightweight phrase extraction", + "description": "An implementation of TextRank in Python for use in spaCy pipelines which provides fast, effective phrase extraction from texts, along with extractive summarization. The graph algorithm works independent of a specific natural language and does not require domain knowledge. See (Mihalcea 2004) https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf", + "github": "DerwenAI/pytextrank", + "pip": "pytextrank", + "code_example": [ + "import spacy", + "import pytextrank", + "", + "nlp = spacy.load('en_core_web_sm')", + "", + "tr = pytextrank.TextRank()", + "nlp.add_pipe(tr.PipelineComponent, name='textrank', last=True)", + "", + "text = 'Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered.'", + "doc = nlp(text)", + "", + "# examine the top-ranked phrases in the document", + "for p in doc._.phrases:", + " print('{:.4f} {:5d} {}'.format(p.rank, p.count, p.text))", + " print(p.chunks)" + ], + "code_language": "python", + "url": "https://github.com/DerwenAI/pytextrank/wiki", + "thumb": "https://memegenerator.net/img/instances/66942896.jpg", + "image": "https://memegenerator.net/img/instances/66942896.jpg", + "author": "Paco Nathan", + "author_links": { + "twitter": "pacoid", + "github": "ceteri", + "website": "https://derwen.ai/paco" + }, + "category": ["pipeline"], + "tags": ["phrase extraction", "ner", "summarization", "graph algorithms", "textrank"] } ], From d031440de2df1f0587d704c85b114d49ec9df53c Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 29 Jan 2020 17:35:46 +0100 Subject: [PATCH 27/49] Update setup.cfg --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index a0103c5a2..2c8268517 100644 --- a/setup.cfg +++ b/setup.cfg @@ -50,6 +50,7 @@ install_requires = srsly>=0.1.0,<1.1.0 catalogue>=0.0.7,<1.1.0 # Third-party dependencies + tqdm>=4.38.0,<5.0.0 setuptools numpy>=1.15.0 plac>=0.9.6,<1.2.0 From 6ff947e1f9b7286f8b2ff3a42eab2e0acd3edf87 Mon Sep 17 00:00:00 2001 From: Omri Mendels Date: Mon, 3 Feb 2020 13:57:55 +0200 Subject: [PATCH 28/49] Added presidio-research to universe.json (#4950) * Added presidio-research to universe.json Added a reference to Presidio Research, the data-science toolbox for Microsoft Presidio. * Updated url --- website/meta/universe.json | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/website/meta/universe.json b/website/meta/universe.json index 2b5f983fa..cf5978edc 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -1839,6 +1839,20 @@ "github": "microsoft" } }, + { + "id": "presidio-research", + "title": "Presidio Research", + "slogan": "Toolbox for developing and evaluating PII detectors, NER models for PII and generating fake PII data", + "description": "This package features data-science related tasks for developing new recognizers for Microsoft Presidio. It is used for the evaluation of the entire system, as well as for evaluating specific PII recognizers or PII detection models. Anyone interested in evaluating an existing Microsoft Presidio instance, a specific PII recognizer or to develop new models or logic for detecting PII could leverage the preexisting work in this package. Additionally, anyone interested in generating new data based on previous datasets (e.g. to increase the coverage of entity values) for Named Entity Recognition models could leverage the data generator contained in this package.", + "url": "https://aka.ms/presidio-research", + "github": "microsoft/presidio-research", + "category": ["standalone"], + "thumb": "https://avatars0.githubusercontent.com/u/6154722", + "author": "Microsoft", + "author_links": { + "github": "microsoft" + } + }, { "id": "python-sentence-boundary-disambiguation", "title": "pySBD - python Sentence Boundary Disambiguation", From 02a44c5be2dbbd8a6a0a1d40ecb04bc887ce8fb1 Mon Sep 17 00:00:00 2001 From: "Martin A. Kayser" <9056896+maknotavailable@users.noreply.github.com> Date: Mon, 3 Feb 2020 03:58:59 -0800 Subject: [PATCH 29/49] Adding a note on retrieving the string rep of the match_id (#4904) Stolen from here: https://stackoverflow.com/questions/47638877/using-phrasematcher-in-spacy-to-find-multiple-match-types --- website/docs/api/phrasematcher.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/website/docs/api/phrasematcher.md b/website/docs/api/phrasematcher.md index 90ecd3416..4119c8fc0 100644 --- a/website/docs/api/phrasematcher.md +++ b/website/docs/api/phrasematcher.md @@ -70,6 +70,17 @@ Find all token sequences matching the supplied patterns on the `Doc`. | `doc` | `Doc` | The document to match over. | | **RETURNS** | list | A list of `(match_id, start, end)` tuples, describing the matches. A match tuple describes a span `doc[start:end]`. The `match_id` is the ID of the added match pattern. | + + +Because spaCy stores all strings as integers, the match_id you get back will be an integer, too – but you can always get the string representation by looking it up in the vocabulary's StringStore, i.e. nlp.vocab.strings: + +``` +match_id_string = nlp.vocab.strings[match_id] +``` + + + + ## PhraseMatcher.pipe {#pipe tag="method"} Match a stream of documents, yielding them in turn. From abd5c06374eab5db0cf897b73543b1f3eb007e12 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Mon, 3 Feb 2020 13:00:02 +0100 Subject: [PATCH 30/49] Adjust formatting [ci skip] --- website/docs/api/phrasematcher.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/website/docs/api/phrasematcher.md b/website/docs/api/phrasematcher.md index 4119c8fc0..a72277420 100644 --- a/website/docs/api/phrasematcher.md +++ b/website/docs/api/phrasematcher.md @@ -38,7 +38,7 @@ be shown. | Name | Type | Description | | --------------------------------------- | --------------- | ------------------------------------------------------------------------------------------- | | `vocab` | `Vocab` | The vocabulary object, which must be shared with the documents the matcher will operate on. | -| `max_length` | int | Deprecated argument - the `PhraseMatcher` does not have a phrase length limit anymore. | +| `max_length` | int | Deprecated argument - the `PhraseMatcher` does not have a phrase length limit anymore. | | `attr` 2.1 | int / unicode | The token attribute to match on. Defaults to `ORTH`, i.e. the verbatim token text. | | `validate` 2.1 | bool | Validate patterns added to the matcher. | | **RETURNS** | `PhraseMatcher` | The newly constructed object. | @@ -72,15 +72,16 @@ Find all token sequences matching the supplied patterns on the `Doc`. -Because spaCy stores all strings as integers, the match_id you get back will be an integer, too – but you can always get the string representation by looking it up in the vocabulary's StringStore, i.e. nlp.vocab.strings: +Because spaCy stores all strings as integers, the `match_id` you get back will +be an integer, too – but you can always get the string representation by looking +it up in the vocabulary's `StringStore`, i.e. `nlp.vocab.strings`: -``` +```python match_id_string = nlp.vocab.strings[match_id] ``` - ## PhraseMatcher.pipe {#pipe tag="method"} Match a stream of documents, yielding them in turn. From 9fa9d7f2cb52ce6a70c264d4e57c7f190d7686bf Mon Sep 17 00:00:00 2001 From: Tyler Couto Date: Mon, 3 Feb 2020 06:01:48 -0600 Subject: [PATCH 31/49] Fix for Issue 4665 - conllu2json (#4953) * Fix for Issue 4665 - conllu2json - Allowing HEAD to be an underscore * Added contributor agreement --- .github/contributors/onlyanegg.md | 106 +++++++++++++++++++++++ spacy/cli/converters/conllu2json.py | 2 +- spacy/tests/regression/test_issue4665.py | 31 +++++++ 3 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 .github/contributors/onlyanegg.md create mode 100644 spacy/tests/regression/test_issue4665.py diff --git a/.github/contributors/onlyanegg.md b/.github/contributors/onlyanegg.md new file mode 100644 index 000000000..eb1ac01a6 --- /dev/null +++ b/.github/contributors/onlyanegg.md @@ -0,0 +1,106 @@ +# spaCy contributor agreement + +This spaCy Contributor Agreement (**"SCA"**) is based on the +[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf). +The SCA applies to any contribution that you make to any product or project +managed by us (the **"project"**), and sets out the intellectual property rights +you grant to us in the contributed materials. The term **"us"** shall mean +[ExplosionAI GmbH](https://explosion.ai/legal). The term +**"you"** shall mean the person or entity identified below. + +If you agree to be bound by these terms, fill in the information requested +below and include the filled-in version with your first pull request, under the +folder [`.github/contributors/`](/.github/contributors/). The name of the file +should be your GitHub username, with the extension `.md`. For example, the user +example_user would create the file `.github/contributors/example_user.md`. + +Read this agreement carefully before signing. These terms and conditions +constitute a binding legal agreement. + +## Contributor Agreement + +1. The term "contribution" or "contributed materials" means any source code, + object code, patch, tool, sample, graphic, specification, manual, + documentation, or any other material posted or submitted by you to the project. + +2. With respect to any worldwide copyrights, or copyright applications and + registrations, in your contribution: + + * you hereby assign to us joint ownership, and to the extent that such + assignment is or becomes invalid, ineffective or unenforceable, you hereby + grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, + royalty-free, unrestricted license to exercise all rights under those + copyrights. This includes, at our option, the right to sublicense these same + rights to third parties through multiple levels of sublicensees or other + licensing arrangements; + + * you agree that each of us can do all things in relation to your + contribution as if each of us were the sole owners, and if one of us makes + a derivative work of your contribution, the one who makes the derivative + work (or has it made will be the sole owner of that derivative work; + + * you agree that you will not assert any moral rights in your contribution + against us, our licensees or transferees; + + * you agree that we may register a copyright in your contribution and + exercise all ownership rights associated with it; and + + * you agree that neither of us has any duty to consult with, obtain the + consent of, pay or render an accounting to the other for any use or + distribution of your contribution. + +3. With respect to any patents you own, or that you can license without payment + to any third party, you hereby grant to us a perpetual, irrevocable, + non-exclusive, worldwide, no-charge, royalty-free license to: + + * make, have made, use, sell, offer to sell, import, and otherwise transfer + your contribution in whole or in part, alone or in combination with or + included in any product, work or materials arising out of the project to + which your contribution was submitted, and + + * at our option, to sublicense these same rights to third parties through + multiple levels of sublicensees or other licensing arrangements. + +4. Except as set out above, you keep all right, title, and interest in your + contribution. The rights that you grant to us under these terms are effective + on the date you first submitted a contribution to us, even if your submission + took place before the date you sign these terms. + +5. You covenant, represent, warrant and agree that: + + - Each contribution that you submit is and shall be an original work of + authorship and you can legally grant the rights set out in this SCA; + + - to the best of your knowledge, each contribution will not violate any + third party's copyrights, trademarks, patents, or other intellectual + property rights; and + + - each contribution shall be in compliance with U.S. export control laws and + other applicable export and import laws. You agree to notify us if you + become aware of any circumstance which would make any of the foregoing + representations inaccurate in any respect. We may publicly disclose your + participation in the project, including the fact that you have signed the SCA. + +6. This SCA is governed by the laws of the State of California and applicable + U.S. Federal law. Any choice of law rules will not apply. + +7. Please place an “x” on one of the applicable statement below. Please do NOT + mark both statements: + + * [x] I am signing on behalf of myself as an individual and no other person + or entity, including my employer, has or will have rights with respect to my + contributions. + + * [ ] I am signing on behalf of my employer or a legal entity and I have the + actual authority to contractually bind that entity. + +## Contributor Details + +| Field | Entry | +| ----------------------------- | ---------------- | +| Name | Tyler Couto | +| Company name (if applicable) | | +| Title or role (if applicable) | | +| Date | January 29, 2020 | +| GitHub username | onlyanegg | +| Website (optional) | | diff --git a/spacy/cli/converters/conllu2json.py b/spacy/cli/converters/conllu2json.py index e66a8c50e..3de4dcc30 100644 --- a/spacy/cli/converters/conllu2json.py +++ b/spacy/cli/converters/conllu2json.py @@ -70,7 +70,7 @@ def read_conllx(input_data, use_morphology=False, n=0): continue try: id_ = int(id_) - 1 - head = (int(head) - 1) if head != "0" else id_ + head = (int(head) - 1) if head not in ["0", "_"] else id_ dep = "ROOT" if dep == "root" else dep tag = pos if tag == "_" else tag tag = tag + "__" + morph if use_morphology else tag diff --git a/spacy/tests/regression/test_issue4665.py b/spacy/tests/regression/test_issue4665.py new file mode 100644 index 000000000..721ec0098 --- /dev/null +++ b/spacy/tests/regression/test_issue4665.py @@ -0,0 +1,31 @@ +from spacy.cli.converters.conllu2json import conllu2json + +input_data = """ +1 [ _ PUNCT -LRB- _ _ punct _ _ +2 This _ DET DT _ _ det _ _ +3 killing _ NOUN NN _ _ nsubj _ _ +4 of _ ADP IN _ _ case _ _ +5 a _ DET DT _ _ det _ _ +6 respected _ ADJ JJ _ _ amod _ _ +7 cleric _ NOUN NN _ _ nmod _ _ +8 will _ AUX MD _ _ aux _ _ +9 be _ AUX VB _ _ aux _ _ +10 causing _ VERB VBG _ _ root _ _ +11 us _ PRON PRP _ _ iobj _ _ +12 trouble _ NOUN NN _ _ dobj _ _ +13 for _ ADP IN _ _ case _ _ +14 years _ NOUN NNS _ _ nmod _ _ +15 to _ PART TO _ _ mark _ _ +16 come _ VERB VB _ _ acl _ _ +17 . _ PUNCT . _ _ punct _ _ +18 ] _ PUNCT -RRB- _ _ punct _ _ +""" + + +def test_issue4665(): + """ + conllu2json should not raise an exception if the HEAD column contains an + underscore + """ + + conllu2json(input_data) From d4f4060bf33f9f2f028bd9728bdd99b9a2ddc387 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Bedn=C3=A1rik?= Date: Mon, 3 Feb 2020 13:03:59 +0100 Subject: [PATCH 32/49] Add Slovak language tools implementation (#4943) * Add correct stopwords for Slovak language * Add SNK Tags * Disable formatting lint for TAGS * Add example sentences for Slovak language * Add slovak numerals in base form * Add lex_attrs to sk init * Add contributor agreement --- .github/contributors/drndos.md | 106 +++ spacy/lang/sk/__init__.py | 5 + spacy/lang/sk/examples.py | 27 + spacy/lang/sk/lex_attrs.py | 62 ++ spacy/lang/sk/stop_words.py | 263 +++++- spacy/lang/sk/tag_map.py | 1467 ++++++++++++++++++++++++++++++++ 6 files changed, 1897 insertions(+), 33 deletions(-) create mode 100644 .github/contributors/drndos.md create mode 100644 spacy/lang/sk/examples.py create mode 100644 spacy/lang/sk/lex_attrs.py create mode 100644 spacy/lang/sk/tag_map.py diff --git a/.github/contributors/drndos.md b/.github/contributors/drndos.md new file mode 100644 index 000000000..897959a1c --- /dev/null +++ b/.github/contributors/drndos.md @@ -0,0 +1,106 @@ +# spaCy contributor agreement + +This spaCy Contributor Agreement (**"SCA"**) is based on the +[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf). +The SCA applies to any contribution that you make to any product or project +managed by us (the **"project"**), and sets out the intellectual property rights +you grant to us in the contributed materials. The term **"us"** shall mean +[ExplosionAI GmbH](https://explosion.ai/legal). The term +**"you"** shall mean the person or entity identified below. + +If you agree to be bound by these terms, fill in the information requested +below and include the filled-in version with your first pull request, under the +folder [`.github/contributors/`](/.github/contributors/). The name of the file +should be your GitHub username, with the extension `.md`. For example, the user +example_user would create the file `.github/contributors/example_user.md`. + +Read this agreement carefully before signing. These terms and conditions +constitute a binding legal agreement. + +## Contributor Agreement + +1. The term "contribution" or "contributed materials" means any source code, +object code, patch, tool, sample, graphic, specification, manual, +documentation, or any other material posted or submitted by you to the project. + +2. With respect to any worldwide copyrights, or copyright applications and +registrations, in your contribution: + + * you hereby assign to us joint ownership, and to the extent that such + assignment is or becomes invalid, ineffective or unenforceable, you hereby + grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, + royalty-free, unrestricted license to exercise all rights under those + copyrights. This includes, at our option, the right to sublicense these same + rights to third parties through multiple levels of sublicensees or other + licensing arrangements; + + * you agree that each of us can do all things in relation to your + contribution as if each of us were the sole owners, and if one of us makes + a derivative work of your contribution, the one who makes the derivative + work (or has it made will be the sole owner of that derivative work; + + * you agree that you will not assert any moral rights in your contribution + against us, our licensees or transferees; + + * you agree that we may register a copyright in your contribution and + exercise all ownership rights associated with it; and + + * you agree that neither of us has any duty to consult with, obtain the + consent of, pay or render an accounting to the other for any use or + distribution of your contribution. + +3. With respect to any patents you own, or that you can license without payment +to any third party, you hereby grant to us a perpetual, irrevocable, +non-exclusive, worldwide, no-charge, royalty-free license to: + + * make, have made, use, sell, offer to sell, import, and otherwise transfer + your contribution in whole or in part, alone or in combination with or + included in any product, work or materials arising out of the project to + which your contribution was submitted, and + + * at our option, to sublicense these same rights to third parties through + multiple levels of sublicensees or other licensing arrangements. + +4. Except as set out above, you keep all right, title, and interest in your +contribution. The rights that you grant to us under these terms are effective +on the date you first submitted a contribution to us, even if your submission +took place before the date you sign these terms. + +5. You covenant, represent, warrant and agree that: + + * Each contribution that you submit is and shall be an original work of + authorship and you can legally grant the rights set out in this SCA; + + * to the best of your knowledge, each contribution will not violate any + third party's copyrights, trademarks, patents, or other intellectual + property rights; and + + * each contribution shall be in compliance with U.S. export control laws and + other applicable export and import laws. You agree to notify us if you + become aware of any circumstance which would make any of the foregoing + representations inaccurate in any respect. We may publicly disclose your + participation in the project, including the fact that you have signed the SCA. + +6. This SCA is governed by the laws of the State of California and applicable +U.S. Federal law. Any choice of law rules will not apply. + +7. Please place an “x” on one of the applicable statement below. Please do NOT +mark both statements: + + * [ ] I am signing on behalf of myself as an individual and no other person + or entity, including my employer, has or will have rights with respect to my + contributions. + + * [x] I am signing on behalf of my employer or a legal entity and I have the + actual authority to contractually bind that entity. + +## Contributor Details + +| Field | Entry | +|------------------------------- | -------------------- | +| Name | Filip Bednárik | +| Company name (if applicable) | Ardevop, s. r. o. | +| Title or role (if applicable) | IT Consultant | +| Date | 2020-01-26 | +| GitHub username | drndos | +| Website (optional) | https://ardevop.sk | diff --git a/spacy/lang/sk/__init__.py b/spacy/lang/sk/__init__.py index e7704196a..cb17c0b6d 100644 --- a/spacy/lang/sk/__init__.py +++ b/spacy/lang/sk/__init__.py @@ -2,13 +2,18 @@ from __future__ import unicode_literals from .stop_words import STOP_WORDS +from .tag_map import TAG_MAP +from .lex_attrs import LEX_ATTRS + from ...language import Language from ...attrs import LANG class SlovakDefaults(Language.Defaults): lex_attr_getters = dict(Language.Defaults.lex_attr_getters) + lex_attr_getters.update(LEX_ATTRS) lex_attr_getters[LANG] = lambda text: "sk" + tag_map = TAG_MAP stop_words = STOP_WORDS diff --git a/spacy/lang/sk/examples.py b/spacy/lang/sk/examples.py new file mode 100644 index 000000000..486ea375e --- /dev/null +++ b/spacy/lang/sk/examples.py @@ -0,0 +1,27 @@ +# coding: utf8 +from __future__ import unicode_literals + + +""" +Example sentences to test spaCy and its language models. + +>>> from spacy.lang.sk.examples import sentences +>>> docs = nlp.pipe(sentences) +""" + + +sentences = [ + "Ardevop, s.r.o. je malá startup firma na území SR.", + "Samojazdiace autá presúvajú poistnú zodpovednosť na výrobcov automobilov.", + "Košice sú na východe.", + "Bratislava je hlavné mesto Slovenskej republiky.", + "Kde si?", + "Kto je prezidentom Francúzska?", + "Aké je hlavné mesto Slovenska?", + "Kedy sa narodil Andrej Kiska?", + "Včera som dostal 100€ na ruku.", + "Dnes je nedeľa 26.1.2020.", + "Narodil sa 15.4.1998 v Ružomberku.", + "Niekto mi povedal, že 500 eur je veľa peňazí.", + "Podaj mi ruku!", +] diff --git a/spacy/lang/sk/lex_attrs.py b/spacy/lang/sk/lex_attrs.py new file mode 100644 index 000000000..3dea4d8f0 --- /dev/null +++ b/spacy/lang/sk/lex_attrs.py @@ -0,0 +1,62 @@ +# coding: utf8 +from __future__ import unicode_literals + +from ...attrs import LIKE_NUM + +_num_words = [ + "nula", + "jeden", + "dva", + "tri", + "štyri", + "päť", + "šesť", + "sedem", + "osem", + "deväť", + "desať", + "jedenásť", + "dvanásť", + "trinásť", + "štrnásť", + "pätnásť", + "šestnásť", + "sedemnásť", + "osemnásť", + "devätnásť", + "dvadsať", + "tridsať", + "štyridsať", + "päťdesiat", + "šesťdesiat", + "sedemdesiat", + "osemdesiat", + "deväťdesiat", + "sto", + "tisíc", + "milión", + "miliarda", + "bilión", + "biliarda", + "trilión", + "triliarda", + "kvadrilión", +] + + +def like_num(text): + if text.startswith(("+", "-", "±", "~")): + text = text[1:] + text = text.replace(",", "").replace(".", "") + if text.isdigit(): + return True + if text.count("/") == 1: + num, denom = text.split("/") + if num.isdigit() and denom.isdigit(): + return True + if text.lower() in _num_words: + return True + return False + + +LEX_ATTRS = {LIKE_NUM: like_num} diff --git a/spacy/lang/sk/stop_words.py b/spacy/lang/sk/stop_words.py index f6994d33f..3e78acb10 100644 --- a/spacy/lang/sk/stop_words.py +++ b/spacy/lang/sk/stop_words.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals -# Source: https://github.com/stopwords-iso/stopwords-sk +# Source: https://github.com/Ardevop-sk/stopwords-sk STOP_WORDS = set( """ @@ -10,17 +10,41 @@ a aby aj ak +akej +akejže ako +akom +akomže +akou +akouže +akože +aká +akáže +aké +akého +akéhože +akému +akémuže +akéže +akú +akúže aký +akých +akýchže +akým +akými +akýmiže +akýmže +akýže ale alebo -and ani asi avšak až ba bez +bezo bol bola boli @@ -31,23 +55,32 @@ budeme budete budeš budú -buï buď by byť cez +cezo dnes do ešte -for ho hoci i iba ich im +inej +inom +iná iné +iného +inému +iní +inú iný +iných +iným +inými ja je jeho @@ -56,80 +89,185 @@ jemu ju k kam +kamže +každou každá každé +každého +každému každí +každú každý +každých +každým +každými kde -kedže -keï +kej +kejže keď +keďže +kie +kieho +kiehože +kiemu +kiemuže +kieže +koho +kom +komu +kou +kouže kto +ktorej ktorou ktorá ktoré ktorí +ktorú ktorý +ktorých +ktorým +ktorými ku +ká +káže +ké +kéže +kú +kúže +ký +kýho +kýhože +kým +kýmu +kýmuže +kýže lebo +leda +ledaže len ma +majú +mal +mala +mali mať medzi -menej mi -mna mne mnou moja moje +mojej +mojich +mojim +mojimi +mojou +moju +možno mu +musia musieť +musí +musím +musíme +musíte +musíš my má +mám +máme máte -mòa +máš môcť môj +môjho môže +môžem +môžeme +môžete +môžeš +môžu +mňa na nad +nado +najmä nami +naša +naše +našej naši +našich +našim +našimi +našou +ne nech neho nej +nejakej +nejakom +nejakou +nejaká +nejaké +nejakého +nejakému +nejakú +nejaký +nejakých +nejakým +nejakými nemu než nich nie +niektorej +niektorom +niektorou +niektorá +niektoré +niektorého +niektorému +niektorú niektorý +niektorých +niektorým +niektorými nielen +niečo nim +nimi nič +ničoho +ničom +ničomu +ničím no -nová -nové -noví -nový nám nás náš +nášho ním o od odo -of on ona oni ono ony +oň +oňho po pod +podo podľa pokiaľ +popod +popri potom +poza pre pred predo @@ -137,42 +275,56 @@ preto pretože prečo pri -prvá -prvé -prví -prvý práve -pýta s sa seba +sebe +sebou sem si sme so som -späť ste svoj +svoja svoje +svojho svojich +svojim +svojimi +svojou +svoju svojím -svojími sú ta tak +takej +takejto +taká +takáto +také +takého +takéhoto +takému +takémuto +takéto +takí +takú +takúto taký +takýto takže tam -te teba tebe tebou teda tej +tejto ten tento -the ti tie tieto @@ -180,52 +332,97 @@ tiež to toho tohoto +tohto tom tomto tomu tomuto toto tou +touto tu tvoj -tvojími +tvoja +tvoje +tvojej +tvojho +tvoji +tvojich +tvojim +tvojimi +tvojím ty tá táto +tí +títo tú túto +tých tým +tými týmto -tě +u už v vami +vaša vaše -veï +vašej +vaši +vašich +vašim +vaším +veď viac vo vy vám vás váš +vášho však +všetci +všetka +všetko +všetky všetok z za +začo +začože zo -a áno -èi -èo -èí -òom -òou -òu +čej či +čia +čie +čieho +čiemu +čiu čo +čoho +čom +čomu +čou +čože +čí +čím +čími ďalšia ďalšie +ďalšieho +ďalšiemu +ďalšiu +ďalšom +ďalšou ďalší +ďalších +ďalším +ďalšími +ňom +ňou +ňu že """.split() ) diff --git a/spacy/lang/sk/tag_map.py b/spacy/lang/sk/tag_map.py new file mode 100644 index 000000000..015c8cba3 --- /dev/null +++ b/spacy/lang/sk/tag_map.py @@ -0,0 +1,1467 @@ +# coding: utf8 +from __future__ import unicode_literals + +from ...symbols import POS, AUX, PUNCT, SYM, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB +from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON + +# Source https://universaldependencies.org/tagset-conversion/sk-snk-uposf.html +# fmt: off +TAG_MAP = { + "AAfp1x": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp1y": {POS: ADJ, "morph": "Case=Nom|Degree=Cmp|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp1z": {POS: ADJ, "morph": "Case=Nom|Degree=Sup|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp2x": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp2y": {POS: ADJ, "morph": "Case=Gen|Degree=Cmp|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp2z": {POS: ADJ, "morph": "Case=Gen|Degree=Sup|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp3x": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp3y": {POS: ADJ, "morph": "Case=Dat|Degree=Cmp|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp3z": {POS: ADJ, "morph": "Case=Dat|Degree=Sup|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp4x": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp4y": {POS: ADJ, "morph": "Case=Acc|Degree=Cmp|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp4z": {POS: ADJ, "morph": "Case=Acc|Degree=Sup|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp5x": {POS: ADJ, "morph": "Case=Voc|Degree=Pos|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp5y": {POS: ADJ, "morph": "Case=Voc|Degree=Cmp|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp5z": {POS: ADJ, "morph": "Case=Voc|Degree=Sup|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp6x": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp6y": {POS: ADJ, "morph": "Case=Loc|Degree=Cmp|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp6z": {POS: ADJ, "morph": "Case=Loc|Degree=Sup|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp7x": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp7y": {POS: ADJ, "morph": "Case=Ins|Degree=Cmp|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfp7z": {POS: ADJ, "morph": "Case=Ins|Degree=Sup|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "AAfs1x": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs1y": {POS: ADJ, "morph": "Case=Nom|Degree=Cmp|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs1z": {POS: ADJ, "morph": "Case=Nom|Degree=Sup|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs2x": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs2y": {POS: ADJ, "morph": "Case=Gen|Degree=Cmp|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs2z": {POS: ADJ, "morph": "Case=Gen|Degree=Sup|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs3x": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs3y": {POS: ADJ, "morph": "Case=Dat|Degree=Cmp|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs3z": {POS: ADJ, "morph": "Case=Dat|Degree=Sup|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs4x": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs4y": {POS: ADJ, "morph": "Case=Acc|Degree=Cmp|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs4z": {POS: ADJ, "morph": "Case=Acc|Degree=Sup|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs5x": {POS: ADJ, "morph": "Case=Voc|Degree=Pos|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs5y": {POS: ADJ, "morph": "Case=Voc|Degree=Cmp|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs5z": {POS: ADJ, "morph": "Case=Voc|Degree=Sup|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs6x": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs6y": {POS: ADJ, "morph": "Case=Loc|Degree=Cmp|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs6z": {POS: ADJ, "morph": "Case=Loc|Degree=Sup|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs7x": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs7y": {POS: ADJ, "morph": "Case=Ins|Degree=Cmp|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAfs7z": {POS: ADJ, "morph": "Case=Ins|Degree=Sup|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "AAip1x": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip1y": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip1z": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip2x": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip2y": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip2z": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip3x": {POS: ADJ, "morph": "Animacy=Inan|Case=Dat|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip3y": {POS: ADJ, "morph": "Animacy=Inan|Case=Dat|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip3z": {POS: ADJ, "morph": "Animacy=Inan|Case=Dat|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip4x": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip4y": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip4z": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip5x": {POS: ADJ, "morph": "Animacy=Inan|Case=Voc|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip5y": {POS: ADJ, "morph": "Animacy=Inan|Case=Voc|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip5z": {POS: ADJ, "morph": "Animacy=Inan|Case=Voc|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip6x": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip6y": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip6z": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip7x": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip7y": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAip7z": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAis1x": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis1y": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis1z": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis2x": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis2y": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis2z": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis3x": {POS: ADJ, "morph": "Animacy=Inan|Case=Dat|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis3y": {POS: ADJ, "morph": "Animacy=Inan|Case=Dat|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis3z": {POS: ADJ, "morph": "Animacy=Inan|Case=Dat|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis4x": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis4y": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis4z": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis5x": {POS: ADJ, "morph": "Animacy=Inan|Case=Voc|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis5y": {POS: ADJ, "morph": "Animacy=Inan|Case=Voc|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis5z": {POS: ADJ, "morph": "Animacy=Inan|Case=Voc|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis6x": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis6y": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis6z": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis7x": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis7y": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAis7z": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAmp1x": {POS: ADJ, "morph": "Animacy=Anim|Case=Nom|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp1y": {POS: ADJ, "morph": "Animacy=Anim|Case=Nom|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp1z": {POS: ADJ, "morph": "Animacy=Anim|Case=Nom|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp2x": {POS: ADJ, "morph": "Animacy=Anim|Case=Gen|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp2y": {POS: ADJ, "morph": "Animacy=Anim|Case=Gen|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp2z": {POS: ADJ, "morph": "Animacy=Anim|Case=Gen|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp3x": {POS: ADJ, "morph": "Animacy=Anim|Case=Dat|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp3y": {POS: ADJ, "morph": "Animacy=Anim|Case=Dat|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp3z": {POS: ADJ, "morph": "Animacy=Anim|Case=Dat|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp4x": {POS: ADJ, "morph": "Animacy=Anim|Case=Acc|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp4y": {POS: ADJ, "morph": "Animacy=Anim|Case=Acc|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp4z": {POS: ADJ, "morph": "Animacy=Anim|Case=Acc|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp5x": {POS: ADJ, "morph": "Animacy=Anim|Case=Voc|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp5y": {POS: ADJ, "morph": "Animacy=Anim|Case=Voc|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp5z": {POS: ADJ, "morph": "Animacy=Anim|Case=Voc|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp6x": {POS: ADJ, "morph": "Animacy=Anim|Case=Loc|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp6y": {POS: ADJ, "morph": "Animacy=Anim|Case=Loc|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp6z": {POS: ADJ, "morph": "Animacy=Anim|Case=Loc|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp7x": {POS: ADJ, "morph": "Animacy=Anim|Case=Ins|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp7y": {POS: ADJ, "morph": "Animacy=Anim|Case=Ins|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAmp7z": {POS: ADJ, "morph": "Animacy=Anim|Case=Ins|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "AAms1x": {POS: ADJ, "morph": "Animacy=Anim|Case=Nom|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms1y": {POS: ADJ, "morph": "Animacy=Anim|Case=Nom|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms1z": {POS: ADJ, "morph": "Animacy=Anim|Case=Nom|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms2x": {POS: ADJ, "morph": "Animacy=Anim|Case=Gen|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms2y": {POS: ADJ, "morph": "Animacy=Anim|Case=Gen|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms2z": {POS: ADJ, "morph": "Animacy=Anim|Case=Gen|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms3x": {POS: ADJ, "morph": "Animacy=Anim|Case=Dat|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms3y": {POS: ADJ, "morph": "Animacy=Anim|Case=Dat|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms3z": {POS: ADJ, "morph": "Animacy=Anim|Case=Dat|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms4x": {POS: ADJ, "morph": "Animacy=Anim|Case=Acc|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms4y": {POS: ADJ, "morph": "Animacy=Anim|Case=Acc|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms4z": {POS: ADJ, "morph": "Animacy=Anim|Case=Acc|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms5x": {POS: ADJ, "morph": "Animacy=Anim|Case=Voc|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms5y": {POS: ADJ, "morph": "Animacy=Anim|Case=Voc|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms5z": {POS: ADJ, "morph": "Animacy=Anim|Case=Voc|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms6x": {POS: ADJ, "morph": "Animacy=Anim|Case=Loc|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms6y": {POS: ADJ, "morph": "Animacy=Anim|Case=Loc|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms6z": {POS: ADJ, "morph": "Animacy=Anim|Case=Loc|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms7x": {POS: ADJ, "morph": "Animacy=Anim|Case=Ins|Degree=Pos|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms7y": {POS: ADJ, "morph": "Animacy=Anim|Case=Ins|Degree=Cmp|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAms7z": {POS: ADJ, "morph": "Animacy=Anim|Case=Ins|Degree=Sup|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "AAnp1x": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp1y": {POS: ADJ, "morph": "Case=Nom|Degree=Cmp|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp1z": {POS: ADJ, "morph": "Case=Nom|Degree=Sup|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp2x": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp2y": {POS: ADJ, "morph": "Case=Gen|Degree=Cmp|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp2z": {POS: ADJ, "morph": "Case=Gen|Degree=Sup|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp3x": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp3y": {POS: ADJ, "morph": "Case=Dat|Degree=Cmp|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp3z": {POS: ADJ, "morph": "Case=Dat|Degree=Sup|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp4x": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp4y": {POS: ADJ, "morph": "Case=Acc|Degree=Cmp|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp4z": {POS: ADJ, "morph": "Case=Acc|Degree=Sup|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp5x": {POS: ADJ, "morph": "Case=Voc|Degree=Pos|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp5y": {POS: ADJ, "morph": "Case=Voc|Degree=Cmp|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp5z": {POS: ADJ, "morph": "Case=Voc|Degree=Sup|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp6x": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp6y": {POS: ADJ, "morph": "Case=Loc|Degree=Cmp|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp6z": {POS: ADJ, "morph": "Case=Loc|Degree=Sup|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp7x": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp7y": {POS: ADJ, "morph": "Case=Ins|Degree=Cmp|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAnp7z": {POS: ADJ, "morph": "Case=Ins|Degree=Sup|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "AAns1x": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns1y": {POS: ADJ, "morph": "Case=Nom|Degree=Cmp|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns1z": {POS: ADJ, "morph": "Case=Nom|Degree=Sup|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns2x": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns2y": {POS: ADJ, "morph": "Case=Gen|Degree=Cmp|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns2z": {POS: ADJ, "morph": "Case=Gen|Degree=Sup|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns3x": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns3y": {POS: ADJ, "morph": "Case=Dat|Degree=Cmp|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns3z": {POS: ADJ, "morph": "Case=Dat|Degree=Sup|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns4x": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns4y": {POS: ADJ, "morph": "Case=Acc|Degree=Cmp|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns4z": {POS: ADJ, "morph": "Case=Acc|Degree=Sup|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns5x": {POS: ADJ, "morph": "Case=Voc|Degree=Pos|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns5y": {POS: ADJ, "morph": "Case=Voc|Degree=Cmp|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns5z": {POS: ADJ, "morph": "Case=Voc|Degree=Sup|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns6x": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns6y": {POS: ADJ, "morph": "Case=Loc|Degree=Cmp|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns6z": {POS: ADJ, "morph": "Case=Loc|Degree=Sup|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns7x": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns7y": {POS: ADJ, "morph": "Case=Ins|Degree=Cmp|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AAns7z": {POS: ADJ, "morph": "Case=Ins|Degree=Sup|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "AFfp1x": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "AFfp2x": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "AFfp3x": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "AFfp4x": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "AFfp5x": {POS: ADJ, "morph": "Case=Voc|Degree=Pos|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "AFfp6x": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "AFfp7x": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "AFfs1x": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "AFfs2x": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "AFfs3x": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "AFfs4x": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "AFfs5x": {POS: ADJ, "morph": "Case=Voc|Degree=Pos|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "AFfs6x": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "AFfs7x": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "AFip1x": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "AFip2x": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "AFip3x": {POS: ADJ, "morph": "Animacy=Inan|Case=Dat|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "AFip4x": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "AFip5x": {POS: ADJ, "morph": "Animacy=Inan|Case=Voc|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "AFip6x": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "AFip7x": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "AFis1x": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "AFis2x": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "AFis3x": {POS: ADJ, "morph": "Animacy=Inan|Case=Dat|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "AFis4x": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "AFis5x": {POS: ADJ, "morph": "Animacy=Inan|Case=Voc|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "AFis6x": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "AFis7x": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "AFmp1x": {POS: ADJ, "morph": "Animacy=Anim|Case=Nom|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "AFmp2x": {POS: ADJ, "morph": "Animacy=Anim|Case=Gen|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "AFmp3x": {POS: ADJ, "morph": "Animacy=Anim|Case=Dat|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "AFmp4x": {POS: ADJ, "morph": "Animacy=Anim|Case=Acc|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "AFmp5x": {POS: ADJ, "morph": "Animacy=Anim|Case=Voc|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "AFmp6x": {POS: ADJ, "morph": "Animacy=Anim|Case=Loc|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "AFmp7x": {POS: ADJ, "morph": "Animacy=Anim|Case=Ins|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "AFms1x": {POS: ADJ, "morph": "Animacy=Anim|Case=Nom|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "AFms2x": {POS: ADJ, "morph": "Animacy=Anim|Case=Gen|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "AFms3x": {POS: ADJ, "morph": "Animacy=Anim|Case=Dat|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "AFms4x": {POS: ADJ, "morph": "Animacy=Anim|Case=Acc|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "AFms5x": {POS: ADJ, "morph": "Animacy=Anim|Case=Voc|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "AFms6x": {POS: ADJ, "morph": "Animacy=Anim|Case=Loc|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "AFms7x": {POS: ADJ, "morph": "Animacy=Anim|Case=Ins|Degree=Pos|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "AFnp1x": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Neut|MorphPos=Mix|Number=Plur"}, + "AFnp2x": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Neut|MorphPos=Mix|Number=Plur"}, + "AFnp3x": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Neut|MorphPos=Mix|Number=Plur"}, + "AFnp4x": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Neut|MorphPos=Mix|Number=Plur"}, + "AFnp5x": {POS: ADJ, "morph": "Case=Voc|Degree=Pos|Gender=Neut|MorphPos=Mix|Number=Plur"}, + "AFnp6x": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Neut|MorphPos=Mix|Number=Plur"}, + "AFnp7x": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Neut|MorphPos=Mix|Number=Plur"}, + "AFns1x": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Neut|MorphPos=Mix|Number=Sing"}, + "AFns2x": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Neut|MorphPos=Mix|Number=Sing"}, + "AFns3x": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Neut|MorphPos=Mix|Number=Sing"}, + "AFns4x": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Neut|MorphPos=Mix|Number=Sing"}, + "AFns5x": {POS: ADJ, "morph": "Case=Voc|Degree=Pos|Gender=Neut|MorphPos=Mix|Number=Sing"}, + "AFns6x": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Neut|MorphPos=Mix|Number=Sing"}, + "AFns7x": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Neut|MorphPos=Mix|Number=Sing"}, + "AUfp1x": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Fem|MorphPos=Def|Number=Plur"}, + "AUfp1y": {POS: ADJ, "morph": "Case=Nom|Degree=Cmp|Gender=Fem|MorphPos=Def|Number=Plur"}, + "AUfp1z": {POS: ADJ, "morph": "Case=Nom|Degree=Sup|Gender=Fem|MorphPos=Def|Number=Plur"}, + "AUfp2x": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Fem|MorphPos=Def|Number=Plur"}, + "AUfp3x": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Fem|MorphPos=Def|Number=Plur"}, + "AUfp4x": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Fem|MorphPos=Def|Number=Plur"}, + "AUfp5x": {POS: ADJ, "morph": "Case=Voc|Degree=Pos|Gender=Fem|MorphPos=Def|Number=Plur"}, + "AUfp6x": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Fem|MorphPos=Def|Number=Plur"}, + "AUfp7x": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Fem|MorphPos=Def|Number=Plur"}, + "AUfs1x": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Fem|MorphPos=Def|Number=Sing"}, + "AUfs1y": {POS: ADJ, "morph": "Case=Nom|Degree=Cmp|Gender=Fem|MorphPos=Def|Number=Sing"}, + "AUfs1z": {POS: ADJ, "morph": "Case=Nom|Degree=Sup|Gender=Fem|MorphPos=Def|Number=Sing"}, + "AUfs2x": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Fem|MorphPos=Def|Number=Sing"}, + "AUfs3x": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Fem|MorphPos=Def|Number=Sing"}, + "AUfs4x": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Fem|MorphPos=Def|Number=Sing"}, + "AUfs5x": {POS: ADJ, "morph": "Case=Voc|Degree=Pos|Gender=Fem|MorphPos=Def|Number=Sing"}, + "AUfs6x": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Fem|MorphPos=Def|Number=Sing"}, + "AUfs7x": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Fem|MorphPos=Def|Number=Sing"}, + "AUip1x": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Plur"}, + "AUip1y": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Cmp|Gender=Masc|MorphPos=Def|Number=Plur"}, + "AUip1z": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Sup|Gender=Masc|MorphPos=Def|Number=Plur"}, + "AUip2x": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Plur"}, + "AUip3x": {POS: ADJ, "morph": "Animacy=Inan|Case=Dat|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Plur"}, + "AUip4x": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Plur"}, + "AUip5x": {POS: ADJ, "morph": "Animacy=Inan|Case=Voc|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Plur"}, + "AUip6x": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Plur"}, + "AUip7x": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Plur"}, + "AUis1x": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Sing"}, + "AUis1y": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Cmp|Gender=Masc|MorphPos=Def|Number=Sing"}, + "AUis1z": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Sup|Gender=Masc|MorphPos=Def|Number=Sing"}, + "AUis2x": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Sing"}, + "AUis3x": {POS: ADJ, "morph": "Animacy=Inan|Case=Dat|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Sing"}, + "AUis4x": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Sing"}, + "AUis5x": {POS: ADJ, "morph": "Animacy=Inan|Case=Voc|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Sing"}, + "AUis6x": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Sing"}, + "AUis7x": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Sing"}, + "AUmp1x": {POS: ADJ, "morph": "Animacy=Anim|Case=Nom|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Plur"}, + "AUmp1y": {POS: ADJ, "morph": "Animacy=Anim|Case=Nom|Degree=Cmp|Gender=Masc|MorphPos=Def|Number=Plur"}, + "AUmp1z": {POS: ADJ, "morph": "Animacy=Anim|Case=Nom|Degree=Sup|Gender=Masc|MorphPos=Def|Number=Plur"}, + "AUmp2x": {POS: ADJ, "morph": "Animacy=Anim|Case=Gen|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Plur"}, + "AUmp3x": {POS: ADJ, "morph": "Animacy=Anim|Case=Dat|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Plur"}, + "AUmp4x": {POS: ADJ, "morph": "Animacy=Anim|Case=Acc|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Plur"}, + "AUmp5x": {POS: ADJ, "morph": "Animacy=Anim|Case=Voc|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Plur"}, + "AUmp6x": {POS: ADJ, "morph": "Animacy=Anim|Case=Loc|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Plur"}, + "AUmp7x": {POS: ADJ, "morph": "Animacy=Anim|Case=Ins|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Plur"}, + "AUms1x": {POS: ADJ, "morph": "Animacy=Anim|Case=Nom|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Sing"}, + "AUms1y": {POS: ADJ, "morph": "Animacy=Anim|Case=Nom|Degree=Cmp|Gender=Masc|MorphPos=Def|Number=Sing"}, + "AUms1z": {POS: ADJ, "morph": "Animacy=Anim|Case=Nom|Degree=Sup|Gender=Masc|MorphPos=Def|Number=Sing"}, + "AUms2x": {POS: ADJ, "morph": "Animacy=Anim|Case=Gen|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Sing"}, + "AUms3x": {POS: ADJ, "morph": "Animacy=Anim|Case=Dat|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Sing"}, + "AUms4x": {POS: ADJ, "morph": "Animacy=Anim|Case=Acc|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Sing"}, + "AUms5x": {POS: ADJ, "morph": "Animacy=Anim|Case=Voc|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Sing"}, + "AUms6x": {POS: ADJ, "morph": "Animacy=Anim|Case=Loc|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Sing"}, + "AUms7x": {POS: ADJ, "morph": "Animacy=Anim|Case=Ins|Degree=Pos|Gender=Masc|MorphPos=Def|Number=Sing"}, + "AUnp1x": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Neut|MorphPos=Def|Number=Plur"}, + "AUnp1y": {POS: ADJ, "morph": "Case=Nom|Degree=Cmp|Gender=Neut|MorphPos=Def|Number=Plur"}, + "AUnp1z": {POS: ADJ, "morph": "Case=Nom|Degree=Sup|Gender=Neut|MorphPos=Def|Number=Plur"}, + "AUnp2x": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Neut|MorphPos=Def|Number=Plur"}, + "AUnp3x": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Neut|MorphPos=Def|Number=Plur"}, + "AUnp4x": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Neut|MorphPos=Def|Number=Plur"}, + "AUnp5x": {POS: ADJ, "morph": "Case=Voc|Degree=Pos|Gender=Neut|MorphPos=Def|Number=Plur"}, + "AUnp6x": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Neut|MorphPos=Def|Number=Plur"}, + "AUnp7x": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Neut|MorphPos=Def|Number=Plur"}, + "AUns1x": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Neut|MorphPos=Def|Number=Sing"}, + "AUns1y": {POS: ADJ, "morph": "Case=Nom|Degree=Cmp|Gender=Neut|MorphPos=Def|Number=Sing"}, + "AUns1z": {POS: ADJ, "morph": "Case=Nom|Degree=Sup|Gender=Neut|MorphPos=Def|Number=Sing"}, + "AUns2x": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Neut|MorphPos=Def|Number=Sing"}, + "AUns3x": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Neut|MorphPos=Def|Number=Sing"}, + "AUns4x": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Neut|MorphPos=Def|Number=Sing"}, + "AUns5x": {POS: ADJ, "morph": "Case=Voc|Degree=Pos|Gender=Neut|MorphPos=Def|Number=Sing"}, + "AUns6x": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Neut|MorphPos=Def|Number=Sing"}, + "AUns7x": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Neut|MorphPos=Def|Number=Sing"}, + "Dx": {POS: ADV, "morph": "Degree=Pos"}, + "Dy": {POS: ADV, "morph": "Degree=Cmp"}, + "Dz": {POS: ADV, "morph": "Degree=Sup"}, + "Eu1": {POS: ADP, "morph": "AdpType=Prep|Case=Nom"}, + "Eu2": {POS: ADP, "morph": "AdpType=Prep|Case=Gen"}, + "Eu3": {POS: ADP, "morph": "AdpType=Prep|Case=Dat"}, + "Eu4": {POS: ADP, "morph": "AdpType=Prep|Case=Acc"}, + "Eu6": {POS: ADP, "morph": "AdpType=Prep|Case=Loc"}, + "Eu7": {POS: ADP, "morph": "AdpType=Prep|Case=Ins"}, + "Ev2": {POS: ADP, "morph": "AdpType=Voc|Case=Gen"}, + "Ev3": {POS: ADP, "morph": "AdpType=Voc|Case=Dat"}, + "Ev4": {POS: ADP, "morph": "AdpType=Voc|Case=Acc"}, + "Ev6": {POS: ADP, "morph": "AdpType=Voc|Case=Loc"}, + "Ev7": {POS: ADP, "morph": "AdpType=Voc|Case=Ins"}, + "Gkfp1x": {POS: VERB, "morph": "Case=Nom|Degree=Pos|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp1y": {POS: VERB, "morph": "Case=Nom|Degree=Cmp|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp1z": {POS: VERB, "morph": "Case=Nom|Degree=Sup|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp2x": {POS: VERB, "morph": "Case=Gen|Degree=Pos|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp2y": {POS: VERB, "morph": "Case=Gen|Degree=Cmp|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp2z": {POS: VERB, "morph": "Case=Gen|Degree=Sup|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp3x": {POS: VERB, "morph": "Case=Dat|Degree=Pos|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp3y": {POS: VERB, "morph": "Case=Dat|Degree=Cmp|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp3z": {POS: VERB, "morph": "Case=Dat|Degree=Sup|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp4x": {POS: VERB, "morph": "Case=Acc|Degree=Pos|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp4y": {POS: VERB, "morph": "Case=Acc|Degree=Cmp|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp4z": {POS: VERB, "morph": "Case=Acc|Degree=Sup|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp5x": {POS: VERB, "morph": "Case=Voc|Degree=Pos|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp5y": {POS: VERB, "morph": "Case=Voc|Degree=Cmp|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp5z": {POS: VERB, "morph": "Case=Voc|Degree=Sup|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp6x": {POS: VERB, "morph": "Case=Loc|Degree=Pos|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp6y": {POS: VERB, "morph": "Case=Loc|Degree=Cmp|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp6z": {POS: VERB, "morph": "Case=Loc|Degree=Sup|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp7x": {POS: VERB, "morph": "Case=Ins|Degree=Pos|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp7y": {POS: VERB, "morph": "Case=Ins|Degree=Cmp|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfp7z": {POS: VERB, "morph": "Case=Ins|Degree=Sup|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkfs1x": {POS: VERB, "morph": "Case=Nom|Degree=Pos|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs1y": {POS: VERB, "morph": "Case=Nom|Degree=Cmp|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs1z": {POS: VERB, "morph": "Case=Nom|Degree=Sup|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs2x": {POS: VERB, "morph": "Case=Gen|Degree=Pos|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs2y": {POS: VERB, "morph": "Case=Gen|Degree=Cmp|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs2z": {POS: VERB, "morph": "Case=Gen|Degree=Sup|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs3x": {POS: VERB, "morph": "Case=Dat|Degree=Pos|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs3y": {POS: VERB, "morph": "Case=Dat|Degree=Cmp|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs3z": {POS: VERB, "morph": "Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs4x": {POS: VERB, "morph": "Case=Acc|Degree=Pos|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs4y": {POS: VERB, "morph": "Case=Acc|Degree=Cmp|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs4z": {POS: VERB, "morph": "Case=Acc|Degree=Sup|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs5x": {POS: VERB, "morph": "Case=Voc|Degree=Pos|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs5y": {POS: VERB, "morph": "Case=Voc|Degree=Cmp|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs5z": {POS: VERB, "morph": "Case=Voc|Degree=Sup|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs6x": {POS: VERB, "morph": "Case=Loc|Degree=Pos|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs6y": {POS: VERB, "morph": "Case=Loc|Degree=Cmp|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs6z": {POS: VERB, "morph": "Case=Loc|Degree=Sup|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs7x": {POS: VERB, "morph": "Case=Ins|Degree=Pos|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs7y": {POS: VERB, "morph": "Case=Ins|Degree=Cmp|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkfs7z": {POS: VERB, "morph": "Case=Ins|Degree=Sup|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkip1x": {POS: VERB, "morph": "Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip1y": {POS: VERB, "morph": "Animacy=Inan|Case=Nom|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip1z": {POS: VERB, "morph": "Animacy=Inan|Case=Nom|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip2x": {POS: VERB, "morph": "Animacy=Inan|Case=Gen|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip2y": {POS: VERB, "morph": "Animacy=Inan|Case=Gen|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip2z": {POS: VERB, "morph": "Animacy=Inan|Case=Gen|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip3x": {POS: VERB, "morph": "Animacy=Inan|Case=Dat|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip3y": {POS: VERB, "morph": "Animacy=Inan|Case=Dat|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip3z": {POS: VERB, "morph": "Animacy=Inan|Case=Dat|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip4x": {POS: VERB, "morph": "Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip4y": {POS: VERB, "morph": "Animacy=Inan|Case=Acc|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip4z": {POS: VERB, "morph": "Animacy=Inan|Case=Acc|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip5x": {POS: VERB, "morph": "Animacy=Inan|Case=Voc|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip5y": {POS: VERB, "morph": "Animacy=Inan|Case=Voc|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip5z": {POS: VERB, "morph": "Animacy=Inan|Case=Voc|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip6x": {POS: VERB, "morph": "Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip6y": {POS: VERB, "morph": "Animacy=Inan|Case=Loc|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip6z": {POS: VERB, "morph": "Animacy=Inan|Case=Loc|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip7x": {POS: VERB, "morph": "Animacy=Inan|Case=Ins|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip7y": {POS: VERB, "morph": "Animacy=Inan|Case=Ins|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkip7z": {POS: VERB, "morph": "Animacy=Inan|Case=Ins|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkis1x": {POS: VERB, "morph": "Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis1y": {POS: VERB, "morph": "Animacy=Inan|Case=Nom|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis1z": {POS: VERB, "morph": "Animacy=Inan|Case=Nom|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis2x": {POS: VERB, "morph": "Animacy=Inan|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis2y": {POS: VERB, "morph": "Animacy=Inan|Case=Gen|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis2z": {POS: VERB, "morph": "Animacy=Inan|Case=Gen|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis3x": {POS: VERB, "morph": "Animacy=Inan|Case=Dat|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis3y": {POS: VERB, "morph": "Animacy=Inan|Case=Dat|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis3z": {POS: VERB, "morph": "Animacy=Inan|Case=Dat|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis4x": {POS: VERB, "morph": "Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis4y": {POS: VERB, "morph": "Animacy=Inan|Case=Acc|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis4z": {POS: VERB, "morph": "Animacy=Inan|Case=Acc|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis5x": {POS: VERB, "morph": "Animacy=Inan|Case=Voc|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis5y": {POS: VERB, "morph": "Animacy=Inan|Case=Voc|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis5z": {POS: VERB, "morph": "Animacy=Inan|Case=Voc|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis6x": {POS: VERB, "morph": "Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis6y": {POS: VERB, "morph": "Animacy=Inan|Case=Loc|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis6z": {POS: VERB, "morph": "Animacy=Inan|Case=Loc|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis7x": {POS: VERB, "morph": "Animacy=Inan|Case=Ins|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis7y": {POS: VERB, "morph": "Animacy=Inan|Case=Ins|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkis7z": {POS: VERB, "morph": "Animacy=Inan|Case=Ins|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkmp1x": {POS: VERB, "morph": "Animacy=Anim|Case=Nom|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp1y": {POS: VERB, "morph": "Animacy=Anim|Case=Nom|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp1z": {POS: VERB, "morph": "Animacy=Anim|Case=Nom|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp2x": {POS: VERB, "morph": "Animacy=Anim|Case=Gen|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp2y": {POS: VERB, "morph": "Animacy=Anim|Case=Gen|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp2z": {POS: VERB, "morph": "Animacy=Anim|Case=Gen|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp3x": {POS: VERB, "morph": "Animacy=Anim|Case=Dat|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp3y": {POS: VERB, "morph": "Animacy=Anim|Case=Dat|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp3z": {POS: VERB, "morph": "Animacy=Anim|Case=Dat|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp4x": {POS: VERB, "morph": "Animacy=Anim|Case=Acc|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp4y": {POS: VERB, "morph": "Animacy=Anim|Case=Acc|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp4z": {POS: VERB, "morph": "Animacy=Anim|Case=Acc|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp5x": {POS: VERB, "morph": "Animacy=Anim|Case=Voc|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp5y": {POS: VERB, "morph": "Animacy=Anim|Case=Voc|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp5z": {POS: VERB, "morph": "Animacy=Anim|Case=Voc|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp6x": {POS: VERB, "morph": "Animacy=Anim|Case=Loc|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp6y": {POS: VERB, "morph": "Animacy=Anim|Case=Loc|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp6z": {POS: VERB, "morph": "Animacy=Anim|Case=Loc|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp7x": {POS: VERB, "morph": "Animacy=Anim|Case=Ins|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp7y": {POS: VERB, "morph": "Animacy=Anim|Case=Ins|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkmp7z": {POS: VERB, "morph": "Animacy=Anim|Case=Ins|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkms1x": {POS: VERB, "morph": "Animacy=Anim|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms1y": {POS: VERB, "morph": "Animacy=Anim|Case=Nom|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms1z": {POS: VERB, "morph": "Animacy=Anim|Case=Nom|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms2x": {POS: VERB, "morph": "Animacy=Anim|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms2y": {POS: VERB, "morph": "Animacy=Anim|Case=Gen|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms2z": {POS: VERB, "morph": "Animacy=Anim|Case=Gen|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms3x": {POS: VERB, "morph": "Animacy=Anim|Case=Dat|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms3y": {POS: VERB, "morph": "Animacy=Anim|Case=Dat|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms3z": {POS: VERB, "morph": "Animacy=Anim|Case=Dat|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms4x": {POS: VERB, "morph": "Animacy=Anim|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms4y": {POS: VERB, "morph": "Animacy=Anim|Case=Acc|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms4z": {POS: VERB, "morph": "Animacy=Anim|Case=Acc|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms5x": {POS: VERB, "morph": "Animacy=Anim|Case=Voc|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms5y": {POS: VERB, "morph": "Animacy=Anim|Case=Voc|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms5z": {POS: VERB, "morph": "Animacy=Anim|Case=Voc|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms6x": {POS: VERB, "morph": "Animacy=Anim|Case=Loc|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms6y": {POS: VERB, "morph": "Animacy=Anim|Case=Loc|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms6z": {POS: VERB, "morph": "Animacy=Anim|Case=Loc|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms7x": {POS: VERB, "morph": "Animacy=Anim|Case=Ins|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms7y": {POS: VERB, "morph": "Animacy=Anim|Case=Ins|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkms7z": {POS: VERB, "morph": "Animacy=Anim|Case=Ins|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gknp1x": {POS: VERB, "morph": "Case=Nom|Degree=Pos|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp1y": {POS: VERB, "morph": "Case=Nom|Degree=Cmp|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp1z": {POS: VERB, "morph": "Case=Nom|Degree=Sup|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp2x": {POS: VERB, "morph": "Case=Gen|Degree=Pos|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp2y": {POS: VERB, "morph": "Case=Gen|Degree=Cmp|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp2z": {POS: VERB, "morph": "Case=Gen|Degree=Sup|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp3x": {POS: VERB, "morph": "Case=Dat|Degree=Pos|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp3y": {POS: VERB, "morph": "Case=Dat|Degree=Cmp|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp3z": {POS: VERB, "morph": "Case=Dat|Degree=Sup|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp4x": {POS: VERB, "morph": "Case=Acc|Degree=Pos|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp4y": {POS: VERB, "morph": "Case=Acc|Degree=Cmp|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp4z": {POS: VERB, "morph": "Case=Acc|Degree=Sup|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp5x": {POS: VERB, "morph": "Case=Voc|Degree=Pos|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp5y": {POS: VERB, "morph": "Case=Voc|Degree=Cmp|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp5z": {POS: VERB, "morph": "Case=Voc|Degree=Sup|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp6x": {POS: VERB, "morph": "Case=Loc|Degree=Pos|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp6y": {POS: VERB, "morph": "Case=Loc|Degree=Cmp|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp6z": {POS: VERB, "morph": "Case=Loc|Degree=Sup|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp7x": {POS: VERB, "morph": "Case=Ins|Degree=Pos|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp7y": {POS: VERB, "morph": "Case=Ins|Degree=Cmp|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gknp7z": {POS: VERB, "morph": "Case=Ins|Degree=Sup|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Act"}, + "Gkns1x": {POS: VERB, "morph": "Case=Nom|Degree=Pos|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns1y": {POS: VERB, "morph": "Case=Nom|Degree=Cmp|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns1z": {POS: VERB, "morph": "Case=Nom|Degree=Sup|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns2x": {POS: VERB, "morph": "Case=Gen|Degree=Pos|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns2y": {POS: VERB, "morph": "Case=Gen|Degree=Cmp|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns2z": {POS: VERB, "morph": "Case=Gen|Degree=Sup|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns3x": {POS: VERB, "morph": "Case=Dat|Degree=Pos|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns3y": {POS: VERB, "morph": "Case=Dat|Degree=Cmp|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns3z": {POS: VERB, "morph": "Case=Dat|Degree=Sup|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns4x": {POS: VERB, "morph": "Case=Acc|Degree=Pos|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns4y": {POS: VERB, "morph": "Case=Acc|Degree=Cmp|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns4z": {POS: VERB, "morph": "Case=Acc|Degree=Sup|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns5x": {POS: VERB, "morph": "Case=Voc|Degree=Pos|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns5y": {POS: VERB, "morph": "Case=Voc|Degree=Cmp|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns5z": {POS: VERB, "morph": "Case=Voc|Degree=Sup|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns6x": {POS: VERB, "morph": "Case=Loc|Degree=Pos|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns6y": {POS: VERB, "morph": "Case=Loc|Degree=Cmp|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns6z": {POS: VERB, "morph": "Case=Loc|Degree=Sup|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns7x": {POS: VERB, "morph": "Case=Ins|Degree=Pos|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns7y": {POS: VERB, "morph": "Case=Ins|Degree=Cmp|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gkns7z": {POS: VERB, "morph": "Case=Ins|Degree=Sup|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Act"}, + "Gtfp1x": {POS: VERB, "morph": "Case=Nom|Degree=Pos|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp1y": {POS: VERB, "morph": "Case=Nom|Degree=Cmp|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp1z": {POS: VERB, "morph": "Case=Nom|Degree=Sup|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp2x": {POS: VERB, "morph": "Case=Gen|Degree=Pos|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp2y": {POS: VERB, "morph": "Case=Gen|Degree=Cmp|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp2z": {POS: VERB, "morph": "Case=Gen|Degree=Sup|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp3x": {POS: VERB, "morph": "Case=Dat|Degree=Pos|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp3y": {POS: VERB, "morph": "Case=Dat|Degree=Cmp|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp3z": {POS: VERB, "morph": "Case=Dat|Degree=Sup|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp4x": {POS: VERB, "morph": "Case=Acc|Degree=Pos|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp4y": {POS: VERB, "morph": "Case=Acc|Degree=Cmp|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp4z": {POS: VERB, "morph": "Case=Acc|Degree=Sup|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp5x": {POS: VERB, "morph": "Case=Voc|Degree=Pos|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp5y": {POS: VERB, "morph": "Case=Voc|Degree=Cmp|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp5z": {POS: VERB, "morph": "Case=Voc|Degree=Sup|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp6x": {POS: VERB, "morph": "Case=Loc|Degree=Pos|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp6y": {POS: VERB, "morph": "Case=Loc|Degree=Cmp|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp6z": {POS: VERB, "morph": "Case=Loc|Degree=Sup|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp7x": {POS: VERB, "morph": "Case=Ins|Degree=Pos|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp7y": {POS: VERB, "morph": "Case=Ins|Degree=Cmp|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfp7z": {POS: VERB, "morph": "Case=Ins|Degree=Sup|Gender=Fem|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtfs1x": {POS: VERB, "morph": "Case=Nom|Degree=Pos|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs1y": {POS: VERB, "morph": "Case=Nom|Degree=Cmp|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs1z": {POS: VERB, "morph": "Case=Nom|Degree=Sup|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs2x": {POS: VERB, "morph": "Case=Gen|Degree=Pos|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs2y": {POS: VERB, "morph": "Case=Gen|Degree=Cmp|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs2z": {POS: VERB, "morph": "Case=Gen|Degree=Sup|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs3x": {POS: VERB, "morph": "Case=Dat|Degree=Pos|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs3y": {POS: VERB, "morph": "Case=Dat|Degree=Cmp|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs3z": {POS: VERB, "morph": "Case=Dat|Degree=Sup|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs4x": {POS: VERB, "morph": "Case=Acc|Degree=Pos|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs4y": {POS: VERB, "morph": "Case=Acc|Degree=Cmp|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs4z": {POS: VERB, "morph": "Case=Acc|Degree=Sup|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs5x": {POS: VERB, "morph": "Case=Voc|Degree=Pos|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs5y": {POS: VERB, "morph": "Case=Voc|Degree=Cmp|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs5z": {POS: VERB, "morph": "Case=Voc|Degree=Sup|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs6x": {POS: VERB, "morph": "Case=Loc|Degree=Pos|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs6y": {POS: VERB, "morph": "Case=Loc|Degree=Cmp|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs6z": {POS: VERB, "morph": "Case=Loc|Degree=Sup|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs7x": {POS: VERB, "morph": "Case=Ins|Degree=Pos|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs7y": {POS: VERB, "morph": "Case=Ins|Degree=Cmp|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtfs7z": {POS: VERB, "morph": "Case=Ins|Degree=Sup|Gender=Fem|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtip1x": {POS: VERB, "morph": "Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip1y": {POS: VERB, "morph": "Animacy=Inan|Case=Nom|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip1z": {POS: VERB, "morph": "Animacy=Inan|Case=Nom|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip2x": {POS: VERB, "morph": "Animacy=Inan|Case=Gen|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip2y": {POS: VERB, "morph": "Animacy=Inan|Case=Gen|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip2z": {POS: VERB, "morph": "Animacy=Inan|Case=Gen|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip3x": {POS: VERB, "morph": "Animacy=Inan|Case=Dat|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip3y": {POS: VERB, "morph": "Animacy=Inan|Case=Dat|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip3z": {POS: VERB, "morph": "Animacy=Inan|Case=Dat|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip4x": {POS: VERB, "morph": "Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip4y": {POS: VERB, "morph": "Animacy=Inan|Case=Acc|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip4z": {POS: VERB, "morph": "Animacy=Inan|Case=Acc|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip5x": {POS: VERB, "morph": "Animacy=Inan|Case=Voc|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip5y": {POS: VERB, "morph": "Animacy=Inan|Case=Voc|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip5z": {POS: VERB, "morph": "Animacy=Inan|Case=Voc|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip6x": {POS: VERB, "morph": "Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip6y": {POS: VERB, "morph": "Animacy=Inan|Case=Loc|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip6z": {POS: VERB, "morph": "Animacy=Inan|Case=Loc|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip7x": {POS: VERB, "morph": "Animacy=Inan|Case=Ins|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip7y": {POS: VERB, "morph": "Animacy=Inan|Case=Ins|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtip7z": {POS: VERB, "morph": "Animacy=Inan|Case=Ins|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtis1x": {POS: VERB, "morph": "Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis1y": {POS: VERB, "morph": "Animacy=Inan|Case=Nom|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis1z": {POS: VERB, "morph": "Animacy=Inan|Case=Nom|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis2x": {POS: VERB, "morph": "Animacy=Inan|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis2y": {POS: VERB, "morph": "Animacy=Inan|Case=Gen|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis2z": {POS: VERB, "morph": "Animacy=Inan|Case=Gen|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis3x": {POS: VERB, "morph": "Animacy=Inan|Case=Dat|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis3y": {POS: VERB, "morph": "Animacy=Inan|Case=Dat|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis3z": {POS: VERB, "morph": "Animacy=Inan|Case=Dat|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis4x": {POS: VERB, "morph": "Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis4y": {POS: VERB, "morph": "Animacy=Inan|Case=Acc|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis4z": {POS: VERB, "morph": "Animacy=Inan|Case=Acc|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis5x": {POS: VERB, "morph": "Animacy=Inan|Case=Voc|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis5y": {POS: VERB, "morph": "Animacy=Inan|Case=Voc|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis5z": {POS: VERB, "morph": "Animacy=Inan|Case=Voc|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis6x": {POS: VERB, "morph": "Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis6y": {POS: VERB, "morph": "Animacy=Inan|Case=Loc|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis6z": {POS: VERB, "morph": "Animacy=Inan|Case=Loc|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis7x": {POS: VERB, "morph": "Animacy=Inan|Case=Ins|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis7y": {POS: VERB, "morph": "Animacy=Inan|Case=Ins|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtis7z": {POS: VERB, "morph": "Animacy=Inan|Case=Ins|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtmp1x": {POS: VERB, "morph": "Animacy=Anim|Case=Nom|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp1y": {POS: VERB, "morph": "Animacy=Anim|Case=Nom|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp1z": {POS: VERB, "morph": "Animacy=Anim|Case=Nom|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp2x": {POS: VERB, "morph": "Animacy=Anim|Case=Gen|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp2y": {POS: VERB, "morph": "Animacy=Anim|Case=Gen|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp2z": {POS: VERB, "morph": "Animacy=Anim|Case=Gen|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp3x": {POS: VERB, "morph": "Animacy=Anim|Case=Dat|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp3y": {POS: VERB, "morph": "Animacy=Anim|Case=Dat|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp3z": {POS: VERB, "morph": "Animacy=Anim|Case=Dat|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp4x": {POS: VERB, "morph": "Animacy=Anim|Case=Acc|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp4y": {POS: VERB, "morph": "Animacy=Anim|Case=Acc|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp4z": {POS: VERB, "morph": "Animacy=Anim|Case=Acc|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp5x": {POS: VERB, "morph": "Animacy=Anim|Case=Voc|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp5y": {POS: VERB, "morph": "Animacy=Anim|Case=Voc|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp5z": {POS: VERB, "morph": "Animacy=Anim|Case=Voc|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp6x": {POS: VERB, "morph": "Animacy=Anim|Case=Loc|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp6y": {POS: VERB, "morph": "Animacy=Anim|Case=Loc|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp6z": {POS: VERB, "morph": "Animacy=Anim|Case=Loc|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp7x": {POS: VERB, "morph": "Animacy=Anim|Case=Ins|Degree=Pos|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp7y": {POS: VERB, "morph": "Animacy=Anim|Case=Ins|Degree=Cmp|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtmp7z": {POS: VERB, "morph": "Animacy=Anim|Case=Ins|Degree=Sup|Gender=Masc|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtms1x": {POS: VERB, "morph": "Animacy=Anim|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms1y": {POS: VERB, "morph": "Animacy=Anim|Case=Nom|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms1z": {POS: VERB, "morph": "Animacy=Anim|Case=Nom|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms2x": {POS: VERB, "morph": "Animacy=Anim|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms2y": {POS: VERB, "morph": "Animacy=Anim|Case=Gen|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms2z": {POS: VERB, "morph": "Animacy=Anim|Case=Gen|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms3x": {POS: VERB, "morph": "Animacy=Anim|Case=Dat|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms3y": {POS: VERB, "morph": "Animacy=Anim|Case=Dat|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms3z": {POS: VERB, "morph": "Animacy=Anim|Case=Dat|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms4x": {POS: VERB, "morph": "Animacy=Anim|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms4y": {POS: VERB, "morph": "Animacy=Anim|Case=Acc|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms4z": {POS: VERB, "morph": "Animacy=Anim|Case=Acc|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms5x": {POS: VERB, "morph": "Animacy=Anim|Case=Voc|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms5y": {POS: VERB, "morph": "Animacy=Anim|Case=Voc|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms5z": {POS: VERB, "morph": "Animacy=Anim|Case=Voc|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms6x": {POS: VERB, "morph": "Animacy=Anim|Case=Loc|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms6y": {POS: VERB, "morph": "Animacy=Anim|Case=Loc|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms6z": {POS: VERB, "morph": "Animacy=Anim|Case=Loc|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms7x": {POS: VERB, "morph": "Animacy=Anim|Case=Ins|Degree=Pos|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms7y": {POS: VERB, "morph": "Animacy=Anim|Case=Ins|Degree=Cmp|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtms7z": {POS: VERB, "morph": "Animacy=Anim|Case=Ins|Degree=Sup|Gender=Masc|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtnp1x": {POS: VERB, "morph": "Case=Nom|Degree=Pos|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp1y": {POS: VERB, "morph": "Case=Nom|Degree=Cmp|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp1z": {POS: VERB, "morph": "Case=Nom|Degree=Sup|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp2x": {POS: VERB, "morph": "Case=Gen|Degree=Pos|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp2y": {POS: VERB, "morph": "Case=Gen|Degree=Cmp|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp2z": {POS: VERB, "morph": "Case=Gen|Degree=Sup|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp3x": {POS: VERB, "morph": "Case=Dat|Degree=Pos|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp3y": {POS: VERB, "morph": "Case=Dat|Degree=Cmp|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp3z": {POS: VERB, "morph": "Case=Dat|Degree=Sup|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp4x": {POS: VERB, "morph": "Case=Acc|Degree=Pos|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp4y": {POS: VERB, "morph": "Case=Acc|Degree=Cmp|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp4z": {POS: VERB, "morph": "Case=Acc|Degree=Sup|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp5x": {POS: VERB, "morph": "Case=Voc|Degree=Pos|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp5y": {POS: VERB, "morph": "Case=Voc|Degree=Cmp|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp5z": {POS: VERB, "morph": "Case=Voc|Degree=Sup|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp6x": {POS: VERB, "morph": "Case=Loc|Degree=Pos|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp6y": {POS: VERB, "morph": "Case=Loc|Degree=Cmp|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp6z": {POS: VERB, "morph": "Case=Loc|Degree=Sup|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp7x": {POS: VERB, "morph": "Case=Ins|Degree=Pos|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp7y": {POS: VERB, "morph": "Case=Ins|Degree=Cmp|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtnp7z": {POS: VERB, "morph": "Case=Ins|Degree=Sup|Gender=Neut|Number=Plur|VerbForm=Part|Voice=Pass"}, + "Gtns1x": {POS: VERB, "morph": "Case=Nom|Degree=Pos|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns1y": {POS: VERB, "morph": "Case=Nom|Degree=Cmp|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns1z": {POS: VERB, "morph": "Case=Nom|Degree=Sup|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns2x": {POS: VERB, "morph": "Case=Gen|Degree=Pos|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns2y": {POS: VERB, "morph": "Case=Gen|Degree=Cmp|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns2z": {POS: VERB, "morph": "Case=Gen|Degree=Sup|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns3x": {POS: VERB, "morph": "Case=Dat|Degree=Pos|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns3y": {POS: VERB, "morph": "Case=Dat|Degree=Cmp|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns3z": {POS: VERB, "morph": "Case=Dat|Degree=Sup|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns4x": {POS: VERB, "morph": "Case=Acc|Degree=Pos|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns4y": {POS: VERB, "morph": "Case=Acc|Degree=Cmp|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns4z": {POS: VERB, "morph": "Case=Acc|Degree=Sup|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns5x": {POS: VERB, "morph": "Case=Voc|Degree=Pos|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns5y": {POS: VERB, "morph": "Case=Voc|Degree=Cmp|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns5z": {POS: VERB, "morph": "Case=Voc|Degree=Sup|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns6x": {POS: VERB, "morph": "Case=Loc|Degree=Pos|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns6y": {POS: VERB, "morph": "Case=Loc|Degree=Cmp|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns6z": {POS: VERB, "morph": "Case=Loc|Degree=Sup|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns7x": {POS: VERB, "morph": "Case=Ins|Degree=Pos|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns7y": {POS: VERB, "morph": "Case=Ins|Degree=Cmp|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "Gtns7z": {POS: VERB, "morph": "Case=Ins|Degree=Sup|Gender=Neut|Number=Sing|VerbForm=Part|Voice=Pass"}, + "J": {POS: INTJ, "morph": "_"}, + "NAfp1": {POS: NUM, "morph": "Case=Nom|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "NAfp2": {POS: NUM, "morph": "Case=Gen|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "NAfp3": {POS: NUM, "morph": "Case=Dat|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "NAfp4": {POS: NUM, "morph": "Case=Acc|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "NAfp5": {POS: NUM, "morph": "Case=Voc|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "NAfp6": {POS: NUM, "morph": "Case=Loc|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "NAfp7": {POS: NUM, "morph": "Case=Ins|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "NAfs1": {POS: NUM, "morph": "Case=Nom|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "NAfs2": {POS: NUM, "morph": "Case=Gen|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "NAfs3": {POS: NUM, "morph": "Case=Dat|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "NAfs4": {POS: NUM, "morph": "Case=Acc|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "NAfs5": {POS: NUM, "morph": "Case=Voc|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "NAfs6": {POS: NUM, "morph": "Case=Loc|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "NAfs7": {POS: NUM, "morph": "Case=Ins|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "NAip1": {POS: NUM, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "NAip2": {POS: NUM, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "NAip3": {POS: NUM, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "NAip4": {POS: NUM, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "NAip5": {POS: NUM, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "NAip6": {POS: NUM, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "NAip7": {POS: NUM, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "NAis1": {POS: NUM, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "NAis2": {POS: NUM, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "NAis3": {POS: NUM, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "NAis4": {POS: NUM, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "NAis5": {POS: NUM, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "NAis6": {POS: NUM, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "NAis7": {POS: NUM, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "NAmp1": {POS: NUM, "morph": "Animacy=Anim|Case=Nom|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "NAmp2": {POS: NUM, "morph": "Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "NAmp3": {POS: NUM, "morph": "Animacy=Anim|Case=Dat|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "NAmp4": {POS: NUM, "morph": "Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "NAmp5": {POS: NUM, "morph": "Animacy=Anim|Case=Voc|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "NAmp6": {POS: NUM, "morph": "Animacy=Anim|Case=Loc|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "NAmp7": {POS: NUM, "morph": "Animacy=Anim|Case=Ins|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "NAms1": {POS: NUM, "morph": "Animacy=Anim|Case=Nom|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "NAms2": {POS: NUM, "morph": "Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "NAms3": {POS: NUM, "morph": "Animacy=Anim|Case=Dat|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "NAms4": {POS: NUM, "morph": "Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "NAms5": {POS: NUM, "morph": "Animacy=Anim|Case=Voc|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "NAms6": {POS: NUM, "morph": "Animacy=Anim|Case=Loc|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "NAms7": {POS: NUM, "morph": "Animacy=Anim|Case=Ins|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "NAnp1": {POS: NUM, "morph": "Case=Nom|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "NAnp2": {POS: NUM, "morph": "Case=Gen|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "NAnp3": {POS: NUM, "morph": "Case=Dat|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "NAnp4": {POS: NUM, "morph": "Case=Acc|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "NAnp5": {POS: NUM, "morph": "Case=Voc|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "NAnp6": {POS: NUM, "morph": "Case=Loc|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "NAnp7": {POS: NUM, "morph": "Case=Ins|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "NAns1": {POS: NUM, "morph": "Case=Nom|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "NAns2": {POS: NUM, "morph": "Case=Gen|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "NAns3": {POS: NUM, "morph": "Case=Dat|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "NAns4": {POS: NUM, "morph": "Case=Acc|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "NAns5": {POS: NUM, "morph": "Case=Voc|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "NAns6": {POS: NUM, "morph": "Case=Loc|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "NAns7": {POS: NUM, "morph": "Case=Ins|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "ND": {POS: NUM, "morph": "MorphPos=Adv"}, + "NFfp1": {POS: NUM, "morph": "Case=Nom|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "NFfp2": {POS: NUM, "morph": "Case=Gen|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "NFfp3": {POS: NUM, "morph": "Case=Dat|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "NFfp4": {POS: NUM, "morph": "Case=Acc|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "NFfp5": {POS: NUM, "morph": "Case=Voc|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "NFfp6": {POS: NUM, "morph": "Case=Loc|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "NFfp7": {POS: NUM, "morph": "Case=Ins|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "NFfs1": {POS: NUM, "morph": "Case=Nom|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "NFfs2": {POS: NUM, "morph": "Case=Gen|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "NFfs3": {POS: NUM, "morph": "Case=Dat|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "NFfs4": {POS: NUM, "morph": "Case=Acc|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "NFfs5": {POS: NUM, "morph": "Case=Voc|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "NFfs6": {POS: NUM, "morph": "Case=Loc|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "NFfs7": {POS: NUM, "morph": "Case=Ins|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "NFip1": {POS: NUM, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "NFip2": {POS: NUM, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "NFip3": {POS: NUM, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "NFip4": {POS: NUM, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "NFip5": {POS: NUM, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "NFip6": {POS: NUM, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "NFip7": {POS: NUM, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "NFis1": {POS: NUM, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "NFis2": {POS: NUM, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "NFis3": {POS: NUM, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "NFis4": {POS: NUM, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "NFis5": {POS: NUM, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "NFis6": {POS: NUM, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "NFis7": {POS: NUM, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "NFmp1": {POS: NUM, "morph": "Animacy=Anim|Case=Nom|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "NFmp2": {POS: NUM, "morph": "Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "NFmp3": {POS: NUM, "morph": "Animacy=Anim|Case=Dat|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "NFmp4": {POS: NUM, "morph": "Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "NFmp5": {POS: NUM, "morph": "Animacy=Anim|Case=Voc|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "NFmp6": {POS: NUM, "morph": "Animacy=Anim|Case=Loc|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "NFmp7": {POS: NUM, "morph": "Animacy=Anim|Case=Ins|Gender=Masc|MorphPos=Mix|Number=Plur"}, + "NFms1": {POS: NUM, "morph": "Animacy=Anim|Case=Nom|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "NFms2": {POS: NUM, "morph": "Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "NFms3": {POS: NUM, "morph": "Animacy=Anim|Case=Dat|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "NFms4": {POS: NUM, "morph": "Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "NFms5": {POS: NUM, "morph": "Animacy=Anim|Case=Voc|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "NFms6": {POS: NUM, "morph": "Animacy=Anim|Case=Loc|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "NFms7": {POS: NUM, "morph": "Animacy=Anim|Case=Ins|Gender=Masc|MorphPos=Mix|Number=Sing"}, + "NFnp1": {POS: NUM, "morph": "Case=Nom|Gender=Neut|MorphPos=Mix|Number=Plur"}, + "NFnp2": {POS: NUM, "morph": "Case=Gen|Gender=Neut|MorphPos=Mix|Number=Plur"}, + "NFnp3": {POS: NUM, "morph": "Case=Dat|Gender=Neut|MorphPos=Mix|Number=Plur"}, + "NFnp4": {POS: NUM, "morph": "Case=Acc|Gender=Neut|MorphPos=Mix|Number=Plur"}, + "NFnp5": {POS: NUM, "morph": "Case=Voc|Gender=Neut|MorphPos=Mix|Number=Plur"}, + "NFnp6": {POS: NUM, "morph": "Case=Loc|Gender=Neut|MorphPos=Mix|Number=Plur"}, + "NFnp7": {POS: NUM, "morph": "Case=Ins|Gender=Neut|MorphPos=Mix|Number=Plur"}, + "NFns1": {POS: NUM, "morph": "Case=Nom|Gender=Neut|MorphPos=Mix|Number=Sing"}, + "NFns2": {POS: NUM, "morph": "Case=Gen|Gender=Neut|MorphPos=Mix|Number=Sing"}, + "NFns3": {POS: NUM, "morph": "Case=Dat|Gender=Neut|MorphPos=Mix|Number=Sing"}, + "NFns4": {POS: NUM, "morph": "Case=Acc|Gender=Neut|MorphPos=Mix|Number=Sing"}, + "NFns5": {POS: NUM, "morph": "Case=Voc|Gender=Neut|MorphPos=Mix|Number=Sing"}, + "NFns6": {POS: NUM, "morph": "Case=Loc|Gender=Neut|MorphPos=Mix|Number=Sing"}, + "NFns7": {POS: NUM, "morph": "Case=Ins|Gender=Neut|MorphPos=Mix|Number=Sing"}, + "NNfp1": {POS: NUM, "morph": "Case=Nom|Gender=Fem|MorphPos=Num|Number=Plur"}, + "NNfp2": {POS: NUM, "morph": "Case=Gen|Gender=Fem|MorphPos=Num|Number=Plur"}, + "NNfp3": {POS: NUM, "morph": "Case=Dat|Gender=Fem|MorphPos=Num|Number=Plur"}, + "NNfp4": {POS: NUM, "morph": "Case=Acc|Gender=Fem|MorphPos=Num|Number=Plur"}, + "NNfp5": {POS: NUM, "morph": "Case=Voc|Gender=Fem|MorphPos=Num|Number=Plur"}, + "NNfp6": {POS: NUM, "morph": "Case=Loc|Gender=Fem|MorphPos=Num|Number=Plur"}, + "NNfp7": {POS: NUM, "morph": "Case=Ins|Gender=Fem|MorphPos=Num|Number=Plur"}, + "NNip1": {POS: NUM, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Num|Number=Plur"}, + "NNip2": {POS: NUM, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Num|Number=Plur"}, + "NNip3": {POS: NUM, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Num|Number=Plur"}, + "NNip4": {POS: NUM, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Num|Number=Plur"}, + "NNip5": {POS: NUM, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Num|Number=Plur"}, + "NNip6": {POS: NUM, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Num|Number=Plur"}, + "NNip7": {POS: NUM, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Num|Number=Plur"}, + "NNmp1": {POS: NUM, "morph": "Animacy=Anim|Case=Nom|Gender=Masc|MorphPos=Num|Number=Plur"}, + "NNmp2": {POS: NUM, "morph": "Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Num|Number=Plur"}, + "NNmp3": {POS: NUM, "morph": "Animacy=Anim|Case=Dat|Gender=Masc|MorphPos=Num|Number=Plur"}, + "NNmp4": {POS: NUM, "morph": "Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Num|Number=Plur"}, + "NNmp5": {POS: NUM, "morph": "Animacy=Anim|Case=Voc|Gender=Masc|MorphPos=Num|Number=Plur"}, + "NNmp6": {POS: NUM, "morph": "Animacy=Anim|Case=Loc|Gender=Masc|MorphPos=Num|Number=Plur"}, + "NNmp7": {POS: NUM, "morph": "Animacy=Anim|Case=Ins|Gender=Masc|MorphPos=Num|Number=Plur"}, + "NNnp1": {POS: NUM, "morph": "Case=Nom|Gender=Neut|MorphPos=Num|Number=Plur"}, + "NNnp2": {POS: NUM, "morph": "Case=Gen|Gender=Neut|MorphPos=Num|Number=Plur"}, + "NNnp3": {POS: NUM, "morph": "Case=Dat|Gender=Neut|MorphPos=Num|Number=Plur"}, + "NNnp4": {POS: NUM, "morph": "Case=Acc|Gender=Neut|MorphPos=Num|Number=Plur"}, + "NNnp5": {POS: NUM, "morph": "Case=Voc|Gender=Neut|MorphPos=Num|Number=Plur"}, + "NNnp6": {POS: NUM, "morph": "Case=Loc|Gender=Neut|MorphPos=Num|Number=Plur"}, + "NNnp7": {POS: NUM, "morph": "Case=Ins|Gender=Neut|MorphPos=Num|Number=Plur"}, + "NSfp1": {POS: NUM, "morph": "Case=Nom|Gender=Fem|MorphPos=Noun|Number=Plur"}, + "NSfp2": {POS: NUM, "morph": "Case=Gen|Gender=Fem|MorphPos=Noun|Number=Plur"}, + "NSfp3": {POS: NUM, "morph": "Case=Dat|Gender=Fem|MorphPos=Noun|Number=Plur"}, + "NSfp4": {POS: NUM, "morph": "Case=Acc|Gender=Fem|MorphPos=Noun|Number=Plur"}, + "NSfp5": {POS: NUM, "morph": "Case=Voc|Gender=Fem|MorphPos=Noun|Number=Plur"}, + "NSfp6": {POS: NUM, "morph": "Case=Loc|Gender=Fem|MorphPos=Noun|Number=Plur"}, + "NSfp7": {POS: NUM, "morph": "Case=Ins|Gender=Fem|MorphPos=Noun|Number=Plur"}, + "NSfs1": {POS: NUM, "morph": "Case=Nom|Gender=Fem|MorphPos=Noun|Number=Sing"}, + "NSfs2": {POS: NUM, "morph": "Case=Gen|Gender=Fem|MorphPos=Noun|Number=Sing"}, + "NSfs3": {POS: NUM, "morph": "Case=Dat|Gender=Fem|MorphPos=Noun|Number=Sing"}, + "NSfs4": {POS: NUM, "morph": "Case=Acc|Gender=Fem|MorphPos=Noun|Number=Sing"}, + "NSfs5": {POS: NUM, "morph": "Case=Voc|Gender=Fem|MorphPos=Noun|Number=Sing"}, + "NSfs6": {POS: NUM, "morph": "Case=Loc|Gender=Fem|MorphPos=Noun|Number=Sing"}, + "NSfs7": {POS: NUM, "morph": "Case=Ins|Gender=Fem|MorphPos=Noun|Number=Sing"}, + "NSip1": {POS: NUM, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "NSip2": {POS: NUM, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "NSip3": {POS: NUM, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "NSip4": {POS: NUM, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "NSip5": {POS: NUM, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "NSip6": {POS: NUM, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "NSip7": {POS: NUM, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "NSis1": {POS: NUM, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "NSis2": {POS: NUM, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "NSis3": {POS: NUM, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "NSis4": {POS: NUM, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "NSis5": {POS: NUM, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "NSis6": {POS: NUM, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "NSis7": {POS: NUM, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "NUfp1": {POS: NUM, "morph": "Case=Nom|Gender=Fem|MorphPos=Def|Number=Plur"}, + "NUfp2": {POS: NUM, "morph": "Case=Gen|Gender=Fem|MorphPos=Def|Number=Plur"}, + "NUfp3": {POS: NUM, "morph": "Case=Dat|Gender=Fem|MorphPos=Def|Number=Plur"}, + "NUfp4": {POS: NUM, "morph": "Case=Acc|Gender=Fem|MorphPos=Def|Number=Plur"}, + "NUfp5": {POS: NUM, "morph": "Case=Voc|Gender=Fem|MorphPos=Def|Number=Plur"}, + "NUfp6": {POS: NUM, "morph": "Case=Loc|Gender=Fem|MorphPos=Def|Number=Plur"}, + "NUfp7": {POS: NUM, "morph": "Case=Ins|Gender=Fem|MorphPos=Def|Number=Plur"}, + "NUip1": {POS: NUM, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Def|Number=Plur"}, + "NUip2": {POS: NUM, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Def|Number=Plur"}, + "NUip3": {POS: NUM, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Def|Number=Plur"}, + "NUip4": {POS: NUM, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Def|Number=Plur"}, + "NUip5": {POS: NUM, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Def|Number=Plur"}, + "NUip6": {POS: NUM, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Def|Number=Plur"}, + "NUip7": {POS: NUM, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Def|Number=Plur"}, + "NUis1": {POS: NUM, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Def|Number=Sing"}, + "NUis2": {POS: NUM, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Def|Number=Sing"}, + "NUis3": {POS: NUM, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Def|Number=Sing"}, + "NUis4": {POS: NUM, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Def|Number=Sing"}, + "NUis5": {POS: NUM, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Def|Number=Sing"}, + "NUis6": {POS: NUM, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Def|Number=Sing"}, + "NUis7": {POS: NUM, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Def|Number=Sing"}, + "NUmp1": {POS: NUM, "morph": "Animacy=Anim|Case=Nom|Gender=Masc|MorphPos=Def|Number=Plur"}, + "NUmp2": {POS: NUM, "morph": "Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Def|Number=Plur"}, + "NUmp3": {POS: NUM, "morph": "Animacy=Anim|Case=Dat|Gender=Masc|MorphPos=Def|Number=Plur"}, + "NUmp4": {POS: NUM, "morph": "Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Def|Number=Plur"}, + "NUmp5": {POS: NUM, "morph": "Animacy=Anim|Case=Voc|Gender=Masc|MorphPos=Def|Number=Plur"}, + "NUmp6": {POS: NUM, "morph": "Animacy=Anim|Case=Loc|Gender=Masc|MorphPos=Def|Number=Plur"}, + "NUmp7": {POS: NUM, "morph": "Animacy=Anim|Case=Ins|Gender=Masc|MorphPos=Def|Number=Plur"}, + "NUnp1": {POS: NUM, "morph": "Case=Nom|Gender=Neut|MorphPos=Def|Number=Plur"}, + "NUnp2": {POS: NUM, "morph": "Case=Gen|Gender=Neut|MorphPos=Def|Number=Plur"}, + "NUnp3": {POS: NUM, "morph": "Case=Dat|Gender=Neut|MorphPos=Def|Number=Plur"}, + "NUnp4": {POS: NUM, "morph": "Case=Acc|Gender=Neut|MorphPos=Def|Number=Plur"}, + "NUnp5": {POS: NUM, "morph": "Case=Voc|Gender=Neut|MorphPos=Def|Number=Plur"}, + "NUnp6": {POS: NUM, "morph": "Case=Loc|Gender=Neut|MorphPos=Def|Number=Plur"}, + "NUnp7": {POS: NUM, "morph": "Case=Ins|Gender=Neut|MorphPos=Def|Number=Plur"}, + "NUns1": {POS: NUM, "morph": "Case=Nom|Gender=Neut|MorphPos=Def|Number=Sing"}, + "NUns2": {POS: NUM, "morph": "Case=Gen|Gender=Neut|MorphPos=Def|Number=Sing"}, + "NUns3": {POS: NUM, "morph": "Case=Dat|Gender=Neut|MorphPos=Def|Number=Sing"}, + "NUns4": {POS: NUM, "morph": "Case=Acc|Gender=Neut|MorphPos=Def|Number=Sing"}, + "NUns5": {POS: NUM, "morph": "Case=Voc|Gender=Neut|MorphPos=Def|Number=Sing"}, + "NUns6": {POS: NUM, "morph": "Case=Loc|Gender=Neut|MorphPos=Def|Number=Sing"}, + "NUns7": {POS: NUM, "morph": "Case=Ins|Gender=Neut|MorphPos=Def|Number=Sing"}, + "O": {POS: CCONJ, "morph": "_"}, + "OY": {POS: CCONJ, "morph": "Mood=Cnd"}, + "PAfp1": {POS: PRON, "morph": "Case=Nom|Gender=Fem|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAfp2": {POS: PRON, "morph": "Case=Gen|Gender=Fem|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAfp3": {POS: PRON, "morph": "Case=Dat|Gender=Fem|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAfp4": {POS: PRON, "morph": "Case=Acc|Gender=Fem|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAfp5": {POS: PRON, "morph": "Case=Voc|Gender=Fem|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAfp6": {POS: PRON, "morph": "Case=Loc|Gender=Fem|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAfp7": {POS: PRON, "morph": "Case=Ins|Gender=Fem|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAfs1": {POS: PRON, "morph": "Case=Nom|Gender=Fem|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAfs2": {POS: PRON, "morph": "Case=Gen|Gender=Fem|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAfs3": {POS: PRON, "morph": "Case=Dat|Gender=Fem|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAfs4": {POS: PRON, "morph": "Case=Acc|Gender=Fem|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAfs5": {POS: PRON, "morph": "Case=Voc|Gender=Fem|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAfs6": {POS: PRON, "morph": "Case=Loc|Gender=Fem|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAfs7": {POS: PRON, "morph": "Case=Ins|Gender=Fem|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAip1": {POS: PRON, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAip2": {POS: PRON, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAip3": {POS: PRON, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAip4": {POS: PRON, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAip5": {POS: PRON, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAip6": {POS: PRON, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAip7": {POS: PRON, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAis1": {POS: PRON, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAis2": {POS: PRON, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAis3": {POS: PRON, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAis4": {POS: PRON, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAis5": {POS: PRON, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAis6": {POS: PRON, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAis7": {POS: PRON, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAmp1": {POS: PRON, "morph": "Animacy=Anim|Case=Nom|Gender=Masc|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAmp2": {POS: PRON, "morph": "Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAmp3": {POS: PRON, "morph": "Animacy=Anim|Case=Dat|Gender=Masc|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAmp4": {POS: PRON, "morph": "Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAmp5": {POS: PRON, "morph": "Animacy=Anim|Case=Voc|Gender=Masc|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAmp6": {POS: PRON, "morph": "Animacy=Anim|Case=Loc|Gender=Masc|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAmp7": {POS: PRON, "morph": "Animacy=Anim|Case=Ins|Gender=Masc|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAms1": {POS: PRON, "morph": "Animacy=Anim|Case=Nom|Gender=Masc|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAms2": {POS: PRON, "morph": "Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAms3": {POS: PRON, "morph": "Animacy=Anim|Case=Dat|Gender=Masc|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAms4": {POS: PRON, "morph": "Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAms5": {POS: PRON, "morph": "Animacy=Anim|Case=Voc|Gender=Masc|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAms6": {POS: PRON, "morph": "Animacy=Anim|Case=Loc|Gender=Masc|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAms7": {POS: PRON, "morph": "Animacy=Anim|Case=Ins|Gender=Masc|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAnp1": {POS: PRON, "morph": "Case=Nom|Gender=Neut|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAnp2": {POS: PRON, "morph": "Case=Gen|Gender=Neut|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAnp3": {POS: PRON, "morph": "Case=Dat|Gender=Neut|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAnp4": {POS: PRON, "morph": "Case=Acc|Gender=Neut|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAnp5": {POS: PRON, "morph": "Case=Voc|Gender=Neut|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAnp6": {POS: PRON, "morph": "Case=Loc|Gender=Neut|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAnp7": {POS: PRON, "morph": "Case=Ins|Gender=Neut|MorphPos=Adj|Number=Plur|PronType=Prs"}, + "PAns1": {POS: PRON, "morph": "Case=Nom|Gender=Neut|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAns2": {POS: PRON, "morph": "Case=Gen|Gender=Neut|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAns3": {POS: PRON, "morph": "Case=Dat|Gender=Neut|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAns4": {POS: PRON, "morph": "Case=Acc|Gender=Neut|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAns5": {POS: PRON, "morph": "Case=Voc|Gender=Neut|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAns6": {POS: PRON, "morph": "Case=Loc|Gender=Neut|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PAns7": {POS: PRON, "morph": "Case=Ins|Gender=Neut|MorphPos=Adj|Number=Sing|PronType=Prs"}, + "PD": {POS: PRON, "morph": "MorphPos=Adv|PronType=Prs"}, + "PFfp1": {POS: PRON, "morph": "Case=Nom|Gender=Fem|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFfp2": {POS: PRON, "morph": "Case=Gen|Gender=Fem|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFfp3": {POS: PRON, "morph": "Case=Dat|Gender=Fem|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFfp4": {POS: PRON, "morph": "Case=Acc|Gender=Fem|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFfp5": {POS: PRON, "morph": "Case=Voc|Gender=Fem|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFfp6": {POS: PRON, "morph": "Case=Loc|Gender=Fem|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFfp7": {POS: PRON, "morph": "Case=Ins|Gender=Fem|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFfs1": {POS: PRON, "morph": "Case=Nom|Gender=Fem|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFfs2": {POS: PRON, "morph": "Case=Gen|Gender=Fem|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFfs3": {POS: PRON, "morph": "Case=Dat|Gender=Fem|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFfs4": {POS: PRON, "morph": "Case=Acc|Gender=Fem|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFfs5": {POS: PRON, "morph": "Case=Voc|Gender=Fem|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFfs6": {POS: PRON, "morph": "Case=Loc|Gender=Fem|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFfs7": {POS: PRON, "morph": "Case=Ins|Gender=Fem|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFip1": {POS: PRON, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFip2": {POS: PRON, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFip3": {POS: PRON, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFip4": {POS: PRON, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFip5": {POS: PRON, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFip6": {POS: PRON, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFip7": {POS: PRON, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFis1": {POS: PRON, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFis2": {POS: PRON, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFis2g": {POS: PRON, "morph": "AdpType=Preppron|Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFis3": {POS: PRON, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFis4": {POS: PRON, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFis4g": {POS: PRON, "morph": "AdpType=Preppron|Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFis5": {POS: PRON, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFis6": {POS: PRON, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFis7": {POS: PRON, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFmp1": {POS: PRON, "morph": "Animacy=Anim|Case=Nom|Gender=Masc|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFmp2": {POS: PRON, "morph": "Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFmp3": {POS: PRON, "morph": "Animacy=Anim|Case=Dat|Gender=Masc|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFmp4": {POS: PRON, "morph": "Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFmp5": {POS: PRON, "morph": "Animacy=Anim|Case=Voc|Gender=Masc|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFmp6": {POS: PRON, "morph": "Animacy=Anim|Case=Loc|Gender=Masc|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFmp7": {POS: PRON, "morph": "Animacy=Anim|Case=Ins|Gender=Masc|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFms1": {POS: PRON, "morph": "Animacy=Anim|Case=Nom|Gender=Masc|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFms2": {POS: PRON, "morph": "Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFms2g": {POS: PRON, "morph": "AdpType=Preppron|Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFms3": {POS: PRON, "morph": "Animacy=Anim|Case=Dat|Gender=Masc|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFms4": {POS: PRON, "morph": "Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFms4g": {POS: PRON, "morph": "AdpType=Preppron|Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFms5": {POS: PRON, "morph": "Animacy=Anim|Case=Voc|Gender=Masc|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFms6": {POS: PRON, "morph": "Animacy=Anim|Case=Loc|Gender=Masc|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFms7": {POS: PRON, "morph": "Animacy=Anim|Case=Ins|Gender=Masc|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFnp1": {POS: PRON, "morph": "Case=Nom|Gender=Neut|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFnp2": {POS: PRON, "morph": "Case=Gen|Gender=Neut|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFnp3": {POS: PRON, "morph": "Case=Dat|Gender=Neut|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFnp4": {POS: PRON, "morph": "Case=Acc|Gender=Neut|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFnp5": {POS: PRON, "morph": "Case=Voc|Gender=Neut|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFnp6": {POS: PRON, "morph": "Case=Loc|Gender=Neut|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFnp7": {POS: PRON, "morph": "Case=Ins|Gender=Neut|MorphPos=Mix|Number=Plur|PronType=Prs"}, + "PFns1": {POS: PRON, "morph": "Case=Nom|Gender=Neut|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFns2": {POS: PRON, "morph": "Case=Gen|Gender=Neut|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFns2g": {POS: PRON, "morph": "AdpType=Preppron|Case=Gen|Gender=Neut|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFns3": {POS: PRON, "morph": "Case=Dat|Gender=Neut|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFns4": {POS: PRON, "morph": "Case=Acc|Gender=Neut|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFns4g": {POS: PRON, "morph": "AdpType=Preppron|Case=Acc|Gender=Neut|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFns5": {POS: PRON, "morph": "Case=Voc|Gender=Neut|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFns6": {POS: PRON, "morph": "Case=Loc|Gender=Neut|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PFns7": {POS: PRON, "morph": "Case=Ins|Gender=Neut|MorphPos=Mix|Number=Sing|PronType=Prs"}, + "PPhp1": {POS: PRON, "morph": "Case=Nom|MorphPos=Pron|Number=Plur|PronType=Prs"}, + "PPhp2": {POS: PRON, "morph": "Case=Gen|MorphPos=Pron|Number=Plur|PronType=Prs"}, + "PPhp3": {POS: PRON, "morph": "Case=Dat|MorphPos=Pron|Number=Plur|PronType=Prs"}, + "PPhp4": {POS: PRON, "morph": "Case=Acc|MorphPos=Pron|Number=Plur|PronType=Prs"}, + "PPhp5": {POS: PRON, "morph": "Case=Voc|MorphPos=Pron|Number=Plur|PronType=Prs"}, + "PPhp6": {POS: PRON, "morph": "Case=Loc|MorphPos=Pron|Number=Plur|PronType=Prs"}, + "PPhp7": {POS: PRON, "morph": "Case=Ins|MorphPos=Pron|Number=Plur|PronType=Prs"}, + "PPhs1": {POS: PRON, "morph": "Case=Nom|MorphPos=Pron|Number=Sing|PronType=Prs"}, + "PPhs2": {POS: PRON, "morph": "Case=Gen|MorphPos=Pron|Number=Sing|PronType=Prs"}, + "PPhs3": {POS: PRON, "morph": "Case=Dat|MorphPos=Pron|Number=Sing|PronType=Prs"}, + "PPhs4": {POS: PRON, "morph": "Case=Acc|MorphPos=Pron|Number=Sing|PronType=Prs"}, + "PPhs5": {POS: PRON, "morph": "Case=Voc|MorphPos=Pron|Number=Sing|PronType=Prs"}, + "PPhs6": {POS: PRON, "morph": "Case=Loc|MorphPos=Pron|Number=Sing|PronType=Prs"}, + "PPhs7": {POS: PRON, "morph": "Case=Ins|MorphPos=Pron|Number=Sing|PronType=Prs"}, + "PSfp1": {POS: PRON, "morph": "Case=Nom|Gender=Fem|MorphPos=Noun|Number=Plur|PronType=Prs"}, + "PSfp2": {POS: PRON, "morph": "Case=Gen|Gender=Fem|MorphPos=Noun|Number=Plur|PronType=Prs"}, + "PSfp3": {POS: PRON, "morph": "Case=Dat|Gender=Fem|MorphPos=Noun|Number=Plur|PronType=Prs"}, + "PSfp4": {POS: PRON, "morph": "Case=Acc|Gender=Fem|MorphPos=Noun|Number=Plur|PronType=Prs"}, + "PSfp5": {POS: PRON, "morph": "Case=Voc|Gender=Fem|MorphPos=Noun|Number=Plur|PronType=Prs"}, + "PSfp6": {POS: PRON, "morph": "Case=Loc|Gender=Fem|MorphPos=Noun|Number=Plur|PronType=Prs"}, + "PSfp7": {POS: PRON, "morph": "Case=Ins|Gender=Fem|MorphPos=Noun|Number=Plur|PronType=Prs"}, + "PSfs1": {POS: PRON, "morph": "Case=Nom|Gender=Fem|MorphPos=Noun|Number=Sing|PronType=Prs"}, + "PSfs2": {POS: PRON, "morph": "Case=Gen|Gender=Fem|MorphPos=Noun|Number=Sing|PronType=Prs"}, + "PSfs3": {POS: PRON, "morph": "Case=Dat|Gender=Fem|MorphPos=Noun|Number=Sing|PronType=Prs"}, + "PSfs4": {POS: PRON, "morph": "Case=Acc|Gender=Fem|MorphPos=Noun|Number=Sing|PronType=Prs"}, + "PSfs5": {POS: PRON, "morph": "Case=Voc|Gender=Fem|MorphPos=Noun|Number=Sing|PronType=Prs"}, + "PSfs6": {POS: PRON, "morph": "Case=Loc|Gender=Fem|MorphPos=Noun|Number=Sing|PronType=Prs"}, + "PSfs7": {POS: PRON, "morph": "Case=Ins|Gender=Fem|MorphPos=Noun|Number=Sing|PronType=Prs"}, + "PSns1": {POS: PRON, "morph": "Case=Nom|Gender=Neut|MorphPos=Noun|Number=Sing|PronType=Prs"}, + "PSns2": {POS: PRON, "morph": "Case=Gen|Gender=Neut|MorphPos=Noun|Number=Sing|PronType=Prs"}, + "PSns3": {POS: PRON, "morph": "Case=Dat|Gender=Neut|MorphPos=Noun|Number=Sing|PronType=Prs"}, + "PSns4": {POS: PRON, "morph": "Case=Acc|Gender=Neut|MorphPos=Noun|Number=Sing|PronType=Prs"}, + "PSns5": {POS: PRON, "morph": "Case=Voc|Gender=Neut|MorphPos=Noun|Number=Sing|PronType=Prs"}, + "PSns6": {POS: PRON, "morph": "Case=Loc|Gender=Neut|MorphPos=Noun|Number=Sing|PronType=Prs"}, + "PSns7": {POS: PRON, "morph": "Case=Ins|Gender=Neut|MorphPos=Noun|Number=Sing|PronType=Prs"}, + "PUfp1": {POS: PRON, "morph": "Case=Nom|Gender=Fem|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUfp2": {POS: PRON, "morph": "Case=Gen|Gender=Fem|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUfp3": {POS: PRON, "morph": "Case=Dat|Gender=Fem|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUfp4": {POS: PRON, "morph": "Case=Acc|Gender=Fem|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUfp5": {POS: PRON, "morph": "Case=Voc|Gender=Fem|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUfp6": {POS: PRON, "morph": "Case=Loc|Gender=Fem|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUfp7": {POS: PRON, "morph": "Case=Ins|Gender=Fem|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUfs1": {POS: PRON, "morph": "Case=Nom|Gender=Fem|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUfs2": {POS: PRON, "morph": "Case=Gen|Gender=Fem|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUfs3": {POS: PRON, "morph": "Case=Dat|Gender=Fem|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUfs4": {POS: PRON, "morph": "Case=Acc|Gender=Fem|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUfs5": {POS: PRON, "morph": "Case=Voc|Gender=Fem|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUfs6": {POS: PRON, "morph": "Case=Loc|Gender=Fem|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUfs7": {POS: PRON, "morph": "Case=Ins|Gender=Fem|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUip1": {POS: PRON, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUip2": {POS: PRON, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUip3": {POS: PRON, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUip4": {POS: PRON, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUip5": {POS: PRON, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUip6": {POS: PRON, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUip7": {POS: PRON, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUis1": {POS: PRON, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUis2": {POS: PRON, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUis3": {POS: PRON, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUis4": {POS: PRON, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUis5": {POS: PRON, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUis6": {POS: PRON, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUis7": {POS: PRON, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUmp1": {POS: PRON, "morph": "Animacy=Anim|Case=Nom|Gender=Masc|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUmp2": {POS: PRON, "morph": "Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUmp3": {POS: PRON, "morph": "Animacy=Anim|Case=Dat|Gender=Masc|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUmp4": {POS: PRON, "morph": "Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUmp5": {POS: PRON, "morph": "Animacy=Anim|Case=Voc|Gender=Masc|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUmp6": {POS: PRON, "morph": "Animacy=Anim|Case=Loc|Gender=Masc|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUmp7": {POS: PRON, "morph": "Animacy=Anim|Case=Ins|Gender=Masc|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUms1": {POS: PRON, "morph": "Animacy=Anim|Case=Nom|Gender=Masc|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUms2": {POS: PRON, "morph": "Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUms3": {POS: PRON, "morph": "Animacy=Anim|Case=Dat|Gender=Masc|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUms4": {POS: PRON, "morph": "Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUms5": {POS: PRON, "morph": "Animacy=Anim|Case=Voc|Gender=Masc|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUms6": {POS: PRON, "morph": "Animacy=Anim|Case=Loc|Gender=Masc|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUms7": {POS: PRON, "morph": "Animacy=Anim|Case=Ins|Gender=Masc|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUnp1": {POS: PRON, "morph": "Case=Nom|Gender=Neut|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUnp2": {POS: PRON, "morph": "Case=Gen|Gender=Neut|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUnp3": {POS: PRON, "morph": "Case=Dat|Gender=Neut|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUnp4": {POS: PRON, "morph": "Case=Acc|Gender=Neut|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUnp5": {POS: PRON, "morph": "Case=Voc|Gender=Neut|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUnp6": {POS: PRON, "morph": "Case=Loc|Gender=Neut|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUnp7": {POS: PRON, "morph": "Case=Ins|Gender=Neut|MorphPos=Def|Number=Plur|PronType=Prs"}, + "PUns1": {POS: PRON, "morph": "Case=Nom|Gender=Neut|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUns2": {POS: PRON, "morph": "Case=Gen|Gender=Neut|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUns3": {POS: PRON, "morph": "Case=Dat|Gender=Neut|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUns4": {POS: PRON, "morph": "Case=Acc|Gender=Neut|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUns5": {POS: PRON, "morph": "Case=Voc|Gender=Neut|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUns6": {POS: PRON, "morph": "Case=Loc|Gender=Neut|MorphPos=Def|Number=Sing|PronType=Prs"}, + "PUns7": {POS: PRON, "morph": "Case=Ins|Gender=Neut|MorphPos=Def|Number=Sing|PronType=Prs"}, + "Q": {POS: X, "morph": "Hyph=Yes"}, + "R": {POS: PRON, "morph": "PronType=Prs|Reflex=Yes"}, + "SAfp1": {POS: NOUN, "morph": "Case=Nom|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "SAfp2": {POS: NOUN, "morph": "Case=Gen|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "SAfp3": {POS: NOUN, "morph": "Case=Dat|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "SAfp4": {POS: NOUN, "morph": "Case=Acc|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "SAfp5": {POS: NOUN, "morph": "Case=Voc|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "SAfp6": {POS: NOUN, "morph": "Case=Loc|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "SAfp7": {POS: NOUN, "morph": "Case=Ins|Gender=Fem|MorphPos=Adj|Number=Plur"}, + "SAfs1": {POS: NOUN, "morph": "Case=Nom|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "SAfs2": {POS: NOUN, "morph": "Case=Gen|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "SAfs3": {POS: NOUN, "morph": "Case=Dat|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "SAfs4": {POS: NOUN, "morph": "Case=Acc|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "SAfs5": {POS: NOUN, "morph": "Case=Voc|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "SAfs6": {POS: NOUN, "morph": "Case=Loc|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "SAfs7": {POS: NOUN, "morph": "Case=Ins|Gender=Fem|MorphPos=Adj|Number=Sing"}, + "SAip1": {POS: NOUN, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "SAip2": {POS: NOUN, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "SAip3": {POS: NOUN, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "SAip4": {POS: NOUN, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "SAip5": {POS: NOUN, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "SAip6": {POS: NOUN, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "SAip7": {POS: NOUN, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "SAis1": {POS: NOUN, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "SAis2": {POS: NOUN, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "SAis3": {POS: NOUN, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "SAis4": {POS: NOUN, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "SAis5": {POS: NOUN, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "SAis6": {POS: NOUN, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "SAis7": {POS: NOUN, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "SAmp1": {POS: NOUN, "morph": "Animacy=Anim|Case=Nom|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "SAmp2": {POS: NOUN, "morph": "Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "SAmp3": {POS: NOUN, "morph": "Animacy=Anim|Case=Dat|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "SAmp4": {POS: NOUN, "morph": "Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "SAmp5": {POS: NOUN, "morph": "Animacy=Anim|Case=Voc|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "SAmp6": {POS: NOUN, "morph": "Animacy=Anim|Case=Loc|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "SAmp7": {POS: NOUN, "morph": "Animacy=Anim|Case=Ins|Gender=Masc|MorphPos=Adj|Number=Plur"}, + "SAms1": {POS: NOUN, "morph": "Animacy=Anim|Case=Nom|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "SAms2": {POS: NOUN, "morph": "Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "SAms3": {POS: NOUN, "morph": "Animacy=Anim|Case=Dat|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "SAms4": {POS: NOUN, "morph": "Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "SAms5": {POS: NOUN, "morph": "Animacy=Anim|Case=Voc|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "SAms6": {POS: NOUN, "morph": "Animacy=Anim|Case=Loc|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "SAms7": {POS: NOUN, "morph": "Animacy=Anim|Case=Ins|Gender=Masc|MorphPos=Adj|Number=Sing"}, + "SAnp1": {POS: NOUN, "morph": "Case=Nom|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "SAnp2": {POS: NOUN, "morph": "Case=Gen|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "SAnp3": {POS: NOUN, "morph": "Case=Dat|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "SAnp4": {POS: NOUN, "morph": "Case=Acc|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "SAnp5": {POS: NOUN, "morph": "Case=Voc|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "SAnp6": {POS: NOUN, "morph": "Case=Loc|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "SAnp7": {POS: NOUN, "morph": "Case=Ins|Gender=Neut|MorphPos=Adj|Number=Plur"}, + "SAns1": {POS: NOUN, "morph": "Case=Nom|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "SAns2": {POS: NOUN, "morph": "Case=Gen|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "SAns3": {POS: NOUN, "morph": "Case=Dat|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "SAns4": {POS: NOUN, "morph": "Case=Acc|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "SAns5": {POS: NOUN, "morph": "Case=Voc|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "SAns6": {POS: NOUN, "morph": "Case=Loc|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "SAns7": {POS: NOUN, "morph": "Case=Ins|Gender=Neut|MorphPos=Adj|Number=Sing"}, + "SFfp1": {POS: NOUN, "morph": "Case=Nom|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "SFfp2": {POS: NOUN, "morph": "Case=Gen|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "SFfp3": {POS: NOUN, "morph": "Case=Dat|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "SFfp4": {POS: NOUN, "morph": "Case=Acc|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "SFfp5": {POS: NOUN, "morph": "Case=Voc|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "SFfp6": {POS: NOUN, "morph": "Case=Loc|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "SFfp7": {POS: NOUN, "morph": "Case=Ins|Gender=Fem|MorphPos=Mix|Number=Plur"}, + "SFfs1": {POS: NOUN, "morph": "Case=Nom|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "SFfs2": {POS: NOUN, "morph": "Case=Gen|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "SFfs3": {POS: NOUN, "morph": "Case=Dat|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "SFfs4": {POS: NOUN, "morph": "Case=Acc|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "SFfs5": {POS: NOUN, "morph": "Case=Voc|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "SFfs6": {POS: NOUN, "morph": "Case=Loc|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "SFfs7": {POS: NOUN, "morph": "Case=Ins|Gender=Fem|MorphPos=Mix|Number=Sing"}, + "SSfp1": {POS: NOUN, "morph": "Case=Nom|Gender=Fem|MorphPos=Noun|Number=Plur"}, + "SSfp2": {POS: NOUN, "morph": "Case=Gen|Gender=Fem|MorphPos=Noun|Number=Plur"}, + "SSfp3": {POS: NOUN, "morph": "Case=Dat|Gender=Fem|MorphPos=Noun|Number=Plur"}, + "SSfp4": {POS: NOUN, "morph": "Case=Acc|Gender=Fem|MorphPos=Noun|Number=Plur"}, + "SSfp5": {POS: NOUN, "morph": "Case=Voc|Gender=Fem|MorphPos=Noun|Number=Plur"}, + "SSfp6": {POS: NOUN, "morph": "Case=Loc|Gender=Fem|MorphPos=Noun|Number=Plur"}, + "SSfp7": {POS: NOUN, "morph": "Case=Ins|Gender=Fem|MorphPos=Noun|Number=Plur"}, + "SSfs1": {POS: NOUN, "morph": "Case=Nom|Gender=Fem|MorphPos=Noun|Number=Sing"}, + "SSfs2": {POS: NOUN, "morph": "Case=Gen|Gender=Fem|MorphPos=Noun|Number=Sing"}, + "SSfs3": {POS: NOUN, "morph": "Case=Dat|Gender=Fem|MorphPos=Noun|Number=Sing"}, + "SSfs4": {POS: NOUN, "morph": "Case=Acc|Gender=Fem|MorphPos=Noun|Number=Sing"}, + "SSfs5": {POS: NOUN, "morph": "Case=Voc|Gender=Fem|MorphPos=Noun|Number=Sing"}, + "SSfs6": {POS: NOUN, "morph": "Case=Loc|Gender=Fem|MorphPos=Noun|Number=Sing"}, + "SSfs7": {POS: NOUN, "morph": "Case=Ins|Gender=Fem|MorphPos=Noun|Number=Sing"}, + "SSip1": {POS: NOUN, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "SSip2": {POS: NOUN, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "SSip3": {POS: NOUN, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "SSip4": {POS: NOUN, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "SSip5": {POS: NOUN, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "SSip6": {POS: NOUN, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "SSip7": {POS: NOUN, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "SSis1": {POS: NOUN, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "SSis2": {POS: NOUN, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "SSis3": {POS: NOUN, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "SSis4": {POS: NOUN, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "SSis5": {POS: NOUN, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "SSis6": {POS: NOUN, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "SSis7": {POS: NOUN, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "SSmp1": {POS: NOUN, "morph": "Animacy=Anim|Case=Nom|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "SSmp2": {POS: NOUN, "morph": "Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "SSmp3": {POS: NOUN, "morph": "Animacy=Anim|Case=Dat|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "SSmp4": {POS: NOUN, "morph": "Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "SSmp5": {POS: NOUN, "morph": "Animacy=Anim|Case=Voc|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "SSmp6": {POS: NOUN, "morph": "Animacy=Anim|Case=Loc|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "SSmp7": {POS: NOUN, "morph": "Animacy=Anim|Case=Ins|Gender=Masc|MorphPos=Noun|Number=Plur"}, + "SSms1": {POS: NOUN, "morph": "Animacy=Anim|Case=Nom|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "SSms2": {POS: NOUN, "morph": "Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "SSms3": {POS: NOUN, "morph": "Animacy=Anim|Case=Dat|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "SSms4": {POS: NOUN, "morph": "Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "SSms5": {POS: NOUN, "morph": "Animacy=Anim|Case=Voc|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "SSms6": {POS: NOUN, "morph": "Animacy=Anim|Case=Loc|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "SSms7": {POS: NOUN, "morph": "Animacy=Anim|Case=Ins|Gender=Masc|MorphPos=Noun|Number=Sing"}, + "SSnp1": {POS: NOUN, "morph": "Case=Nom|Gender=Neut|MorphPos=Noun|Number=Plur"}, + "SSnp2": {POS: NOUN, "morph": "Case=Gen|Gender=Neut|MorphPos=Noun|Number=Plur"}, + "SSnp3": {POS: NOUN, "morph": "Case=Dat|Gender=Neut|MorphPos=Noun|Number=Plur"}, + "SSnp4": {POS: NOUN, "morph": "Case=Acc|Gender=Neut|MorphPos=Noun|Number=Plur"}, + "SSnp5": {POS: NOUN, "morph": "Case=Voc|Gender=Neut|MorphPos=Noun|Number=Plur"}, + "SSnp6": {POS: NOUN, "morph": "Case=Loc|Gender=Neut|MorphPos=Noun|Number=Plur"}, + "SSnp7": {POS: NOUN, "morph": "Case=Ins|Gender=Neut|MorphPos=Noun|Number=Plur"}, + "SSns1": {POS: NOUN, "morph": "Case=Nom|Gender=Neut|MorphPos=Noun|Number=Sing"}, + "SSns2": {POS: NOUN, "morph": "Case=Gen|Gender=Neut|MorphPos=Noun|Number=Sing"}, + "SSns3": {POS: NOUN, "morph": "Case=Dat|Gender=Neut|MorphPos=Noun|Number=Sing"}, + "SSns4": {POS: NOUN, "morph": "Case=Acc|Gender=Neut|MorphPos=Noun|Number=Sing"}, + "SSns5": {POS: NOUN, "morph": "Case=Voc|Gender=Neut|MorphPos=Noun|Number=Sing"}, + "SSns6": {POS: NOUN, "morph": "Case=Loc|Gender=Neut|MorphPos=Noun|Number=Sing"}, + "SSns7": {POS: NOUN, "morph": "Case=Ins|Gender=Neut|MorphPos=Noun|Number=Sing"}, + "SUfp1": {POS: NOUN, "morph": "Case=Nom|Gender=Fem|MorphPos=Def|Number=Plur"}, + "SUfp2": {POS: NOUN, "morph": "Case=Gen|Gender=Fem|MorphPos=Def|Number=Plur"}, + "SUfp3": {POS: NOUN, "morph": "Case=Dat|Gender=Fem|MorphPos=Def|Number=Plur"}, + "SUfp4": {POS: NOUN, "morph": "Case=Acc|Gender=Fem|MorphPos=Def|Number=Plur"}, + "SUfp5": {POS: NOUN, "morph": "Case=Voc|Gender=Fem|MorphPos=Def|Number=Plur"}, + "SUfp6": {POS: NOUN, "morph": "Case=Loc|Gender=Fem|MorphPos=Def|Number=Plur"}, + "SUfp7": {POS: NOUN, "morph": "Case=Ins|Gender=Fem|MorphPos=Def|Number=Plur"}, + "SUfs1": {POS: NOUN, "morph": "Case=Nom|Gender=Fem|MorphPos=Def|Number=Sing"}, + "SUfs2": {POS: NOUN, "morph": "Case=Gen|Gender=Fem|MorphPos=Def|Number=Sing"}, + "SUfs3": {POS: NOUN, "morph": "Case=Dat|Gender=Fem|MorphPos=Def|Number=Sing"}, + "SUfs4": {POS: NOUN, "morph": "Case=Acc|Gender=Fem|MorphPos=Def|Number=Sing"}, + "SUfs5": {POS: NOUN, "morph": "Case=Voc|Gender=Fem|MorphPos=Def|Number=Sing"}, + "SUfs6": {POS: NOUN, "morph": "Case=Loc|Gender=Fem|MorphPos=Def|Number=Sing"}, + "SUfs7": {POS: NOUN, "morph": "Case=Ins|Gender=Fem|MorphPos=Def|Number=Sing"}, + "SUip1": {POS: NOUN, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Def|Number=Plur"}, + "SUip2": {POS: NOUN, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Def|Number=Plur"}, + "SUip3": {POS: NOUN, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Def|Number=Plur"}, + "SUip4": {POS: NOUN, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Def|Number=Plur"}, + "SUip5": {POS: NOUN, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Def|Number=Plur"}, + "SUip6": {POS: NOUN, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Def|Number=Plur"}, + "SUip7": {POS: NOUN, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Def|Number=Plur"}, + "SUis1": {POS: NOUN, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|MorphPos=Def|Number=Sing"}, + "SUis2": {POS: NOUN, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|MorphPos=Def|Number=Sing"}, + "SUis3": {POS: NOUN, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|MorphPos=Def|Number=Sing"}, + "SUis4": {POS: NOUN, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|MorphPos=Def|Number=Sing"}, + "SUis5": {POS: NOUN, "morph": "Animacy=Inan|Case=Voc|Gender=Masc|MorphPos=Def|Number=Sing"}, + "SUis6": {POS: NOUN, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|MorphPos=Def|Number=Sing"}, + "SUis7": {POS: NOUN, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|MorphPos=Def|Number=Sing"}, + "SUmp1": {POS: NOUN, "morph": "Animacy=Anim|Case=Nom|Gender=Masc|MorphPos=Def|Number=Plur"}, + "SUmp2": {POS: NOUN, "morph": "Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Def|Number=Plur"}, + "SUmp3": {POS: NOUN, "morph": "Animacy=Anim|Case=Dat|Gender=Masc|MorphPos=Def|Number=Plur"}, + "SUmp4": {POS: NOUN, "morph": "Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Def|Number=Plur"}, + "SUmp5": {POS: NOUN, "morph": "Animacy=Anim|Case=Voc|Gender=Masc|MorphPos=Def|Number=Plur"}, + "SUmp6": {POS: NOUN, "morph": "Animacy=Anim|Case=Loc|Gender=Masc|MorphPos=Def|Number=Plur"}, + "SUmp7": {POS: NOUN, "morph": "Animacy=Anim|Case=Ins|Gender=Masc|MorphPos=Def|Number=Plur"}, + "SUms1": {POS: NOUN, "morph": "Animacy=Anim|Case=Nom|Gender=Masc|MorphPos=Def|Number=Sing"}, + "SUms2": {POS: NOUN, "morph": "Animacy=Anim|Case=Gen|Gender=Masc|MorphPos=Def|Number=Sing"}, + "SUms3": {POS: NOUN, "morph": "Animacy=Anim|Case=Dat|Gender=Masc|MorphPos=Def|Number=Sing"}, + "SUms4": {POS: NOUN, "morph": "Animacy=Anim|Case=Acc|Gender=Masc|MorphPos=Def|Number=Sing"}, + "SUms5": {POS: NOUN, "morph": "Animacy=Anim|Case=Voc|Gender=Masc|MorphPos=Def|Number=Sing"}, + "SUms6": {POS: NOUN, "morph": "Animacy=Anim|Case=Loc|Gender=Masc|MorphPos=Def|Number=Sing"}, + "SUms7": {POS: NOUN, "morph": "Animacy=Anim|Case=Ins|Gender=Masc|MorphPos=Def|Number=Sing"}, + "SUnp1": {POS: NOUN, "morph": "Case=Nom|Gender=Neut|MorphPos=Def|Number=Plur"}, + "SUnp2": {POS: NOUN, "morph": "Case=Gen|Gender=Neut|MorphPos=Def|Number=Plur"}, + "SUnp3": {POS: NOUN, "morph": "Case=Dat|Gender=Neut|MorphPos=Def|Number=Plur"}, + "SUnp4": {POS: NOUN, "morph": "Case=Acc|Gender=Neut|MorphPos=Def|Number=Plur"}, + "SUnp5": {POS: NOUN, "morph": "Case=Voc|Gender=Neut|MorphPos=Def|Number=Plur"}, + "SUnp6": {POS: NOUN, "morph": "Case=Loc|Gender=Neut|MorphPos=Def|Number=Plur"}, + "SUnp7": {POS: NOUN, "morph": "Case=Ins|Gender=Neut|MorphPos=Def|Number=Plur"}, + "SUns1": {POS: NOUN, "morph": "Case=Nom|Gender=Neut|MorphPos=Def|Number=Sing"}, + "SUns2": {POS: NOUN, "morph": "Case=Gen|Gender=Neut|MorphPos=Def|Number=Sing"}, + "SUns3": {POS: NOUN, "morph": "Case=Dat|Gender=Neut|MorphPos=Def|Number=Sing"}, + "SUns4": {POS: NOUN, "morph": "Case=Acc|Gender=Neut|MorphPos=Def|Number=Sing"}, + "SUns5": {POS: NOUN, "morph": "Case=Voc|Gender=Neut|MorphPos=Def|Number=Sing"}, + "SUns6": {POS: NOUN, "morph": "Case=Loc|Gender=Neut|MorphPos=Def|Number=Sing"}, + "SUns7": {POS: NOUN, "morph": "Case=Ins|Gender=Neut|MorphPos=Def|Number=Sing"}, + "T": {POS: PART, "morph": "_"}, + "TY": {POS: PART, "morph": "Mood=Cnd"}, + "VBepa-": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Polarity=Neg|Tense=Fut|VerbForm=Fin"}, + "VBepa+": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Polarity=Pos|Tense=Fut|VerbForm=Fin"}, + "VBepb-": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Polarity=Neg|Tense=Fut|VerbForm=Fin"}, + "VBepb+": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Polarity=Pos|Tense=Fut|VerbForm=Fin"}, + "VBepc-": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Polarity=Neg|Tense=Fut|VerbForm=Fin"}, + "VBepc+": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Polarity=Pos|Tense=Fut|VerbForm=Fin"}, + "VBesa-": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Fut|VerbForm=Fin"}, + "VBesa+": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Polarity=Pos|Tense=Fut|VerbForm=Fin"}, + "VBesb-": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Polarity=Neg|Tense=Fut|VerbForm=Fin"}, + "VBesb+": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Polarity=Pos|Tense=Fut|VerbForm=Fin"}, + "VBesc-": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Polarity=Neg|Tense=Fut|VerbForm=Fin"}, + "VBesc+": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Polarity=Pos|Tense=Fut|VerbForm=Fin"}, + "VBjpa-": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Plur|Person=1|Polarity=Neg|Tense=Fut|VerbForm=Fin"}, + "VBjpa+": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Plur|Person=1|Polarity=Pos|Tense=Fut|VerbForm=Fin"}, + "VBjpb-": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Plur|Person=2|Polarity=Neg|Tense=Fut|VerbForm=Fin"}, + "VBjpb+": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Plur|Person=2|Polarity=Pos|Tense=Fut|VerbForm=Fin"}, + "VBjpc-": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Plur|Person=3|Polarity=Neg|Tense=Fut|VerbForm=Fin"}, + "VBjpc+": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Plur|Person=3|Polarity=Pos|Tense=Fut|VerbForm=Fin"}, + "VBjsa-": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Fut|VerbForm=Fin"}, + "VBjsa+": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Sing|Person=1|Polarity=Pos|Tense=Fut|VerbForm=Fin"}, + "VBjsb-": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Sing|Person=2|Polarity=Neg|Tense=Fut|VerbForm=Fin"}, + "VBjsb+": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Sing|Person=2|Polarity=Pos|Tense=Fut|VerbForm=Fin"}, + "VBjsc-": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Sing|Person=3|Polarity=Neg|Tense=Fut|VerbForm=Fin"}, + "VBjsc+": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Sing|Person=3|Polarity=Pos|Tense=Fut|VerbForm=Fin"}, + "VHd-": {POS: VERB, "morph": "Aspect=Perf|Polarity=Neg|VerbForm=Conv"}, + "VHd+": {POS: VERB, "morph": "Aspect=Perf|Polarity=Pos|VerbForm=Conv"}, + "VHe-": {POS: VERB, "morph": "Aspect=Imp|Polarity=Neg|VerbForm=Conv"}, + "VHe+": {POS: VERB, "morph": "Aspect=Imp|Polarity=Pos|VerbForm=Conv"}, + "VHj-": {POS: VERB, "morph": "Aspect=Imp,Perf|Polarity=Neg|VerbForm=Conv"}, + "VHj+": {POS: VERB, "morph": "Aspect=Imp,Perf|Polarity=Pos|VerbForm=Conv"}, + "VId-": {POS: VERB, "morph": "Aspect=Perf|Polarity=Neg|VerbForm=Inf"}, + "VId+": {POS: VERB, "morph": "Aspect=Perf|Polarity=Pos|VerbForm=Inf"}, + "VIe-": {POS: VERB, "morph": "Aspect=Imp|Polarity=Neg|VerbForm=Inf"}, + "VIe+": {POS: VERB, "morph": "Aspect=Imp|Polarity=Pos|VerbForm=Inf"}, + "VIj-": {POS: VERB, "morph": "Aspect=Imp,Perf|Polarity=Neg|VerbForm=Inf"}, + "VIj+": {POS: VERB, "morph": "Aspect=Imp,Perf|Polarity=Pos|VerbForm=Inf"}, + "VKdpa-": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Plur|Person=1|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKdpa+": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Plur|Person=1|Polarity=Pos|Tense=Pres|VerbForm=Fin"}, + "VKdpb-": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Plur|Person=2|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKdpb+": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Plur|Person=2|Polarity=Pos|Tense=Pres|VerbForm=Fin"}, + "VKdpc-": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKdpc+": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Polarity=Pos|Tense=Pres|VerbForm=Fin"}, + "VKdsa-": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKdsa+": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Sing|Person=1|Polarity=Pos|Tense=Pres|VerbForm=Fin"}, + "VKdsb-": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Sing|Person=2|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKdsb+": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Sing|Person=2|Polarity=Pos|Tense=Pres|VerbForm=Fin"}, + "VKdsc-": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKdsc+": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Polarity=Pos|Tense=Pres|VerbForm=Fin"}, + "VKe-": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKepa-": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKepa+": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Polarity=Pos|Tense=Pres|VerbForm=Fin"}, + "VKepb-": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKepb+": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Polarity=Pos|Tense=Pres|VerbForm=Fin"}, + "VKepc-": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKepc+": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Polarity=Pos|Tense=Pres|VerbForm=Fin"}, + "VKesa-": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKesa+": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Polarity=Pos|Tense=Pres|VerbForm=Fin"}, + "VKesb-": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKesb+": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Polarity=Pos|Tense=Pres|VerbForm=Fin"}, + "VKesc-": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKesc+": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Polarity=Pos|Tense=Pres|VerbForm=Fin"}, + "VKjpa-": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Plur|Person=1|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKjpa+": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Plur|Person=1|Polarity=Pos|Tense=Pres|VerbForm=Fin"}, + "VKjpb-": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Plur|Person=2|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKjpb+": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Plur|Person=2|Polarity=Pos|Tense=Pres|VerbForm=Fin"}, + "VKjpc-": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Plur|Person=3|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKjpc+": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Plur|Person=3|Polarity=Pos|Tense=Pres|VerbForm=Fin"}, + "VKjsa-": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKjsa+": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Sing|Person=1|Polarity=Pos|Tense=Pres|VerbForm=Fin"}, + "VKjsb-": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Sing|Person=2|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKjsb+": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Sing|Person=2|Polarity=Pos|Tense=Pres|VerbForm=Fin"}, + "VKjsc-": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Sing|Person=3|Polarity=Neg|Tense=Pres|VerbForm=Fin"}, + "VKjsc+": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Ind|Number=Sing|Person=3|Polarity=Pos|Tense=Pres|VerbForm=Fin"}, + "VLdpah-": {POS: VERB, "morph": "Aspect=Perf|Number=Plur|Person=1|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLdpah+": {POS: VERB, "morph": "Aspect=Perf|Number=Plur|Person=1|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLdpbh-": {POS: VERB, "morph": "Aspect=Perf|Number=Plur|Person=2|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLdpbh+": {POS: VERB, "morph": "Aspect=Perf|Number=Plur|Person=2|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLdpcf-": {POS: VERB, "morph": "Aspect=Perf|Gender=Fem|Number=Plur|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLdpcf+": {POS: VERB, "morph": "Aspect=Perf|Gender=Fem|Number=Plur|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLdpci-": {POS: VERB, "morph": "Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Plur|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLdpci+": {POS: VERB, "morph": "Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Plur|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLdpcm-": {POS: VERB, "morph": "Animacy=Anim|Aspect=Perf|Gender=Masc|Number=Plur|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLdpcm+": {POS: VERB, "morph": "Animacy=Anim|Aspect=Perf|Gender=Masc|Number=Plur|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLdpcn-": {POS: VERB, "morph": "Aspect=Perf|Gender=Neut|Number=Plur|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLdpcn+": {POS: VERB, "morph": "Aspect=Perf|Gender=Neut|Number=Plur|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLdsaf-": {POS: VERB, "morph": "Aspect=Perf|Gender=Fem|Number=Sing|Person=1|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLdsaf+": {POS: VERB, "morph": "Aspect=Perf|Gender=Fem|Number=Sing|Person=1|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLdsai-": {POS: VERB, "morph": "Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Sing|Person=1|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLdsai+": {POS: VERB, "morph": "Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Sing|Person=1|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLdsam-": {POS: VERB, "morph": "Animacy=Anim|Aspect=Perf|Gender=Masc|Number=Sing|Person=1|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLdsam+": {POS: VERB, "morph": "Animacy=Anim|Aspect=Perf|Gender=Masc|Number=Sing|Person=1|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLdsan-": {POS: VERB, "morph": "Aspect=Perf|Gender=Neut|Number=Sing|Person=1|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLdsan+": {POS: VERB, "morph": "Aspect=Perf|Gender=Neut|Number=Sing|Person=1|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLdsbf-": {POS: VERB, "morph": "Aspect=Perf|Gender=Fem|Number=Sing|Person=2|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLdsbf+": {POS: VERB, "morph": "Aspect=Perf|Gender=Fem|Number=Sing|Person=2|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLdsbi-": {POS: VERB, "morph": "Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Sing|Person=2|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLdsbi+": {POS: VERB, "morph": "Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Sing|Person=2|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLdsbm-": {POS: VERB, "morph": "Animacy=Anim|Aspect=Perf|Gender=Masc|Number=Sing|Person=2|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLdsbm+": {POS: VERB, "morph": "Animacy=Anim|Aspect=Perf|Gender=Masc|Number=Sing|Person=2|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLdsbn-": {POS: VERB, "morph": "Aspect=Perf|Gender=Neut|Number=Sing|Person=2|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLdsbn+": {POS: VERB, "morph": "Aspect=Perf|Gender=Neut|Number=Sing|Person=2|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLdscf-": {POS: VERB, "morph": "Aspect=Perf|Gender=Fem|Number=Sing|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLdscf+": {POS: VERB, "morph": "Aspect=Perf|Gender=Fem|Number=Sing|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLdsci-": {POS: VERB, "morph": "Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Sing|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLdsci+": {POS: VERB, "morph": "Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Sing|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLdscm-": {POS: VERB, "morph": "Animacy=Anim|Aspect=Perf|Gender=Masc|Number=Sing|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLdscm+": {POS: VERB, "morph": "Animacy=Anim|Aspect=Perf|Gender=Masc|Number=Sing|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLdscn-": {POS: VERB, "morph": "Aspect=Perf|Gender=Neut|Number=Sing|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLdscn+": {POS: VERB, "morph": "Aspect=Perf|Gender=Neut|Number=Sing|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLepah-": {POS: VERB, "morph": "Aspect=Imp|Number=Plur|Person=1|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLepah+": {POS: VERB, "morph": "Aspect=Imp|Number=Plur|Person=1|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLepbh-": {POS: VERB, "morph": "Aspect=Imp|Number=Plur|Person=2|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLepbh+": {POS: VERB, "morph": "Aspect=Imp|Number=Plur|Person=2|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLepcf-": {POS: VERB, "morph": "Aspect=Imp|Gender=Fem|Number=Plur|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLepcf+": {POS: VERB, "morph": "Aspect=Imp|Gender=Fem|Number=Plur|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLepci-": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Plur|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLepci+": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Plur|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLepcm-": {POS: VERB, "morph": "Animacy=Anim|Aspect=Imp|Gender=Masc|Number=Plur|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLepcm+": {POS: VERB, "morph": "Animacy=Anim|Aspect=Imp|Gender=Masc|Number=Plur|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLepcn-": {POS: VERB, "morph": "Aspect=Imp|Gender=Neut|Number=Plur|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLepcn+": {POS: VERB, "morph": "Aspect=Imp|Gender=Neut|Number=Plur|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLesaf-": {POS: VERB, "morph": "Aspect=Imp|Gender=Fem|Number=Sing|Person=1|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLesaf+": {POS: VERB, "morph": "Aspect=Imp|Gender=Fem|Number=Sing|Person=1|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLesai-": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Sing|Person=1|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLesai+": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Sing|Person=1|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLesam-": {POS: VERB, "morph": "Animacy=Anim|Aspect=Imp|Gender=Masc|Number=Sing|Person=1|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLesam+": {POS: VERB, "morph": "Animacy=Anim|Aspect=Imp|Gender=Masc|Number=Sing|Person=1|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLesan-": {POS: VERB, "morph": "Aspect=Imp|Gender=Neut|Number=Sing|Person=1|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLesan+": {POS: VERB, "morph": "Aspect=Imp|Gender=Neut|Number=Sing|Person=1|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLesbf-": {POS: VERB, "morph": "Aspect=Imp|Gender=Fem|Number=Sing|Person=2|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLesbf+": {POS: VERB, "morph": "Aspect=Imp|Gender=Fem|Number=Sing|Person=2|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLesbi-": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Sing|Person=2|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLesbi+": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Sing|Person=2|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLesbm-": {POS: VERB, "morph": "Animacy=Anim|Aspect=Imp|Gender=Masc|Number=Sing|Person=2|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLesbm+": {POS: VERB, "morph": "Animacy=Anim|Aspect=Imp|Gender=Masc|Number=Sing|Person=2|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLesbn-": {POS: VERB, "morph": "Aspect=Imp|Gender=Neut|Number=Sing|Person=2|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLesbn+": {POS: VERB, "morph": "Aspect=Imp|Gender=Neut|Number=Sing|Person=2|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLescf-": {POS: VERB, "morph": "Aspect=Imp|Gender=Fem|Number=Sing|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLescf+": {POS: VERB, "morph": "Aspect=Imp|Gender=Fem|Number=Sing|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLesci-": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Sing|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLesci+": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Sing|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLescm-": {POS: VERB, "morph": "Animacy=Anim|Aspect=Imp|Gender=Masc|Number=Sing|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLescm+": {POS: VERB, "morph": "Animacy=Anim|Aspect=Imp|Gender=Masc|Number=Sing|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLescn-": {POS: VERB, "morph": "Aspect=Imp|Gender=Neut|Number=Sing|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLescn+": {POS: VERB, "morph": "Aspect=Imp|Gender=Neut|Number=Sing|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLjpah-": {POS: VERB, "morph": "Aspect=Imp,Perf|Number=Plur|Person=1|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLjpah+": {POS: VERB, "morph": "Aspect=Imp,Perf|Number=Plur|Person=1|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLjpbh-": {POS: VERB, "morph": "Aspect=Imp,Perf|Number=Plur|Person=2|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLjpbh+": {POS: VERB, "morph": "Aspect=Imp,Perf|Number=Plur|Person=2|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLjpcf-": {POS: VERB, "morph": "Aspect=Imp,Perf|Gender=Fem|Number=Plur|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLjpcf+": {POS: VERB, "morph": "Aspect=Imp,Perf|Gender=Fem|Number=Plur|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLjpci-": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp,Perf|Gender=Masc|Number=Plur|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLjpci+": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp,Perf|Gender=Masc|Number=Plur|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLjpcm-": {POS: VERB, "morph": "Animacy=Anim|Aspect=Imp,Perf|Gender=Masc|Number=Plur|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLjpcm+": {POS: VERB, "morph": "Animacy=Anim|Aspect=Imp,Perf|Gender=Masc|Number=Plur|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLjpcn-": {POS: VERB, "morph": "Aspect=Imp,Perf|Gender=Neut|Number=Plur|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLjpcn+": {POS: VERB, "morph": "Aspect=Imp,Perf|Gender=Neut|Number=Plur|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLjsaf-": {POS: VERB, "morph": "Aspect=Imp,Perf|Gender=Fem|Number=Sing|Person=1|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLjsaf+": {POS: VERB, "morph": "Aspect=Imp,Perf|Gender=Fem|Number=Sing|Person=1|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLjsai-": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp,Perf|Gender=Masc|Number=Sing|Person=1|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLjsai+": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp,Perf|Gender=Masc|Number=Sing|Person=1|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLjsam-": {POS: VERB, "morph": "Animacy=Anim|Aspect=Imp,Perf|Gender=Masc|Number=Sing|Person=1|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLjsam+": {POS: VERB, "morph": "Animacy=Anim|Aspect=Imp,Perf|Gender=Masc|Number=Sing|Person=1|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLjsan-": {POS: VERB, "morph": "Aspect=Imp,Perf|Gender=Neut|Number=Sing|Person=1|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLjsan+": {POS: VERB, "morph": "Aspect=Imp,Perf|Gender=Neut|Number=Sing|Person=1|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLjsbf-": {POS: VERB, "morph": "Aspect=Imp,Perf|Gender=Fem|Number=Sing|Person=2|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLjsbf+": {POS: VERB, "morph": "Aspect=Imp,Perf|Gender=Fem|Number=Sing|Person=2|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLjsbi-": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp,Perf|Gender=Masc|Number=Sing|Person=2|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLjsbi+": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp,Perf|Gender=Masc|Number=Sing|Person=2|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLjsbm-": {POS: VERB, "morph": "Animacy=Anim|Aspect=Imp,Perf|Gender=Masc|Number=Sing|Person=2|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLjsbm+": {POS: VERB, "morph": "Animacy=Anim|Aspect=Imp,Perf|Gender=Masc|Number=Sing|Person=2|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLjsbn-": {POS: VERB, "morph": "Aspect=Imp,Perf|Gender=Neut|Number=Sing|Person=2|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLjsbn+": {POS: VERB, "morph": "Aspect=Imp,Perf|Gender=Neut|Number=Sing|Person=2|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLjscf-": {POS: VERB, "morph": "Aspect=Imp,Perf|Gender=Fem|Number=Sing|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLjscf+": {POS: VERB, "morph": "Aspect=Imp,Perf|Gender=Fem|Number=Sing|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLjsci-": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp,Perf|Gender=Masc|Number=Sing|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLjsci+": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp,Perf|Gender=Masc|Number=Sing|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLjscm-": {POS: VERB, "morph": "Animacy=Anim|Aspect=Imp,Perf|Gender=Masc|Number=Sing|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLjscm+": {POS: VERB, "morph": "Animacy=Anim|Aspect=Imp,Perf|Gender=Masc|Number=Sing|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VLjscn-": {POS: VERB, "morph": "Aspect=Imp,Perf|Gender=Neut|Number=Sing|Person=3|Polarity=Neg|Tense=Past|VerbForm=Part"}, + "VLjscn+": {POS: VERB, "morph": "Aspect=Imp,Perf|Gender=Neut|Number=Sing|Person=3|Polarity=Pos|Tense=Past|VerbForm=Part"}, + "VMdpa-": {POS: VERB, "morph": "Aspect=Perf|Mood=Imp|Number=Plur|Person=1|Polarity=Neg|VerbForm=Fin"}, + "VMdpa+": {POS: VERB, "morph": "Aspect=Perf|Mood=Imp|Number=Plur|Person=1|Polarity=Pos|VerbForm=Fin"}, + "VMdpb-": {POS: VERB, "morph": "Aspect=Perf|Mood=Imp|Number=Plur|Person=2|Polarity=Neg|VerbForm=Fin"}, + "VMdpb+": {POS: VERB, "morph": "Aspect=Perf|Mood=Imp|Number=Plur|Person=2|Polarity=Pos|VerbForm=Fin"}, + "VMdsb-": {POS: VERB, "morph": "Aspect=Perf|Mood=Imp|Number=Sing|Person=2|Polarity=Neg|VerbForm=Fin"}, + "VMdsb+": {POS: VERB, "morph": "Aspect=Perf|Mood=Imp|Number=Sing|Person=2|Polarity=Pos|VerbForm=Fin"}, + "VMepa-": {POS: VERB, "morph": "Aspect=Imp|Mood=Imp|Number=Plur|Person=1|Polarity=Neg|VerbForm=Fin"}, + "VMepa+": {POS: VERB, "morph": "Aspect=Imp|Mood=Imp|Number=Plur|Person=1|Polarity=Pos|VerbForm=Fin"}, + "VMepb-": {POS: VERB, "morph": "Aspect=Imp|Mood=Imp|Number=Plur|Person=2|Polarity=Neg|VerbForm=Fin"}, + "VMepb+": {POS: VERB, "morph": "Aspect=Imp|Mood=Imp|Number=Plur|Person=2|Polarity=Pos|VerbForm=Fin"}, + "VMesb-": {POS: VERB, "morph": "Aspect=Imp|Mood=Imp|Number=Sing|Person=2|Polarity=Neg|VerbForm=Fin"}, + "VMesb+": {POS: VERB, "morph": "Aspect=Imp|Mood=Imp|Number=Sing|Person=2|Polarity=Pos|VerbForm=Fin"}, + "VMjpa-": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Imp|Number=Plur|Person=1|Polarity=Neg|VerbForm=Fin"}, + "VMjpa+": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Imp|Number=Plur|Person=1|Polarity=Pos|VerbForm=Fin"}, + "VMjpb-": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Imp|Number=Plur|Person=2|Polarity=Neg|VerbForm=Fin"}, + "VMjpb+": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Imp|Number=Plur|Person=2|Polarity=Pos|VerbForm=Fin"}, + "VMjsb-": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Imp|Number=Sing|Person=2|Polarity=Neg|VerbForm=Fin"}, + "VMjsb+": {POS: VERB, "morph": "Aspect=Imp,Perf|Mood=Imp|Number=Sing|Person=2|Polarity=Pos|VerbForm=Fin"}, + "W": {POS: X, "morph": "Abbr=Yes"}, + "Y": {POS: AUX, "morph": "Mood=Cnd"}, +} From 9c08d9baa31622e9e9daff37a9774774e42d8778 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Mon, 3 Feb 2020 13:10:46 +0100 Subject: [PATCH 33/49] Remove old sections [ci skip] (closes #4961) --- website/docs/usage/facts-figures.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/website/docs/usage/facts-figures.md b/website/docs/usage/facts-figures.md index f6f08ab52..e2549ecfc 100644 --- a/website/docs/usage/facts-figures.md +++ b/website/docs/usage/facts-figures.md @@ -5,8 +5,6 @@ next: /usage/spacy-101 menu: - ['Feature Comparison', 'comparison'] - ['Benchmarks', 'benchmarks'] - - ['Powered by spaCy', 'powered-by'] - - ['Other Libraries', 'other-libraries'] --- ## Feature comparison {#comparison} From 5d8cb60e43194519e4cd5e7d5fd94dcb34573857 Mon Sep 17 00:00:00 2001 From: adrianeboyd Date: Tue, 11 Feb 2020 02:30:54 +0100 Subject: [PATCH 34/49] Update lower pin for srsly to 1.0.1 (#4976) --- requirements.txt | 2 +- setup.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 1786ee186..4f0579313 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ thinc==7.4.0.dev0 blis>=0.4.0,<0.5.0 murmurhash>=0.28.0,<1.1.0 wasabi>=0.4.0,<1.1.0 -srsly>=0.1.0,<1.1.0 +srsly>=1.0.1,<1.1.0 catalogue>=0.0.7,<1.1.0 # Third party dependencies numpy>=1.15.0 diff --git a/setup.cfg b/setup.cfg index 2c8268517..55396e011 100644 --- a/setup.cfg +++ b/setup.cfg @@ -47,7 +47,7 @@ install_requires = thinc==7.4.0.dev0 blis>=0.4.0,<0.5.0 wasabi>=0.4.0,<1.1.0 - srsly>=0.1.0,<1.1.0 + srsly>=1.0.1,<1.1.0 catalogue>=0.0.7,<1.1.0 # Third-party dependencies tqdm>=4.38.0,<5.0.0 From 479e81bafc90f61556bab5583566e134a00f6aaa Mon Sep 17 00:00:00 2001 From: Julin S <48789920+ju-sh@users.noreply.github.com> Date: Tue, 11 Feb 2020 07:01:26 +0530 Subject: [PATCH 35/49] fix link (#4977) --- website/docs/usage/linguistic-features.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/usage/linguistic-features.md b/website/docs/usage/linguistic-features.md index 3af7d9fd1..685619c88 100644 --- a/website/docs/usage/linguistic-features.md +++ b/website/docs/usage/linguistic-features.md @@ -327,7 +327,7 @@ displaCy in our [online demo](https://explosion.ai/demos/displacy).. ### Disabling the parser {#disabling} In the [default models](/models), the parser is loaded and enabled as part of -the [standard processing pipeline](/usage/processing-pipelin). If you don't need +the [standard processing pipeline](/usage/processing-pipelines). If you don't need any of the syntactic information, you should disable the parser. Disabling the parser will make spaCy load and run much faster. If you want to load the parser, but need to disable it for specific documents, you can also control its use on From e1f777b15171fef086c20a5d2415f9f2154e1544 Mon Sep 17 00:00:00 2001 From: Antti Ajanki Date: Tue, 11 Feb 2020 03:32:43 +0200 Subject: [PATCH 36/49] Improvements for Finnish tokenizer (#4985) * don't split on a colon. Colon is used to attach suffixes for abbreviations * tokenize on any of LIST_HYPHENS (except a single hyphen), not just on -- * simplify infix rules by merging similar rules --- spacy/lang/fi/punctuation.py | 9 ++++----- spacy/lang/fi/tokenizer_exceptions.py | 3 +++ spacy/tests/lang/fi/test_tokenizer.py | 22 ++++++++++++++++++++++ 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/spacy/lang/fi/punctuation.py b/spacy/lang/fi/punctuation.py index 02eb1b200..a85c0b228 100644 --- a/spacy/lang/fi/punctuation.py +++ b/spacy/lang/fi/punctuation.py @@ -1,12 +1,13 @@ # coding: utf8 from __future__ import unicode_literals -from ..char_classes import LIST_ELLIPSES, LIST_ICONS +from ..char_classes import LIST_ELLIPSES, LIST_ICONS, LIST_HYPHENS from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER from ..punctuation import TOKENIZER_SUFFIXES _quotes = CONCAT_QUOTES.replace("'", "") +DASHES = "|".join(x for x in LIST_HYPHENS if x != "-") _infixes = ( LIST_ELLIPSES @@ -14,11 +15,9 @@ _infixes = ( + [ r"(?<=[{al}])\.(?=[{au}])".format(al=ALPHA_LOWER, au=ALPHA_UPPER), r"(?<=[{a}])[,!?](?=[{a}])".format(a=ALPHA), - r"(?<=[{a}])[:<>=](?=[{a}])".format(a=ALPHA), - r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA), r"(?<=[{a}])([{q}\)\]\(\[])(?=[{a}])".format(a=ALPHA, q=_quotes), - r"(?<=[{a}])--(?=[{a}])".format(a=ALPHA), - r"(?<=[{a}0-9])[:<>=/](?=[{a}])".format(a=ALPHA), + r"(?<=[{a}])(?:{d})(?=[{a}])".format(a=ALPHA, d=DASHES), + r"(?<=[{a}0-9])[<>=/](?=[{a}])".format(a=ALPHA), ] ) diff --git a/spacy/lang/fi/tokenizer_exceptions.py b/spacy/lang/fi/tokenizer_exceptions.py index d74deb22b..5469e345e 100644 --- a/spacy/lang/fi/tokenizer_exceptions.py +++ b/spacy/lang/fi/tokenizer_exceptions.py @@ -31,6 +31,9 @@ for exc_data in [ {ORTH: "myöh.", LEMMA: "myöhempi"}, {ORTH: "n.", LEMMA: "noin"}, {ORTH: "nimim.", LEMMA: "nimimerkki"}, + {ORTH: "n:o", LEMMA: "numero"}, + {ORTH: "N:o", LEMMA: "numero"}, + {ORTH: "nro", LEMMA: "numero"}, {ORTH: "ns.", LEMMA: "niin sanottu"}, {ORTH: "nyk.", LEMMA: "nykyinen"}, {ORTH: "oik.", LEMMA: "oikealla"}, diff --git a/spacy/tests/lang/fi/test_tokenizer.py b/spacy/tests/lang/fi/test_tokenizer.py index 17f6f0ccc..aab063982 100644 --- a/spacy/tests/lang/fi/test_tokenizer.py +++ b/spacy/tests/lang/fi/test_tokenizer.py @@ -19,6 +19,21 @@ HYPHENATED_TESTS = [ ) ] +ABBREVIATION_INFLECTION_TESTS = [ + ( + "VTT:ssa ennen v:ta 2010 suoritetut mittaukset", + ["VTT:ssa", "ennen", "v:ta", "2010", "suoritetut", "mittaukset"] + ), + ( + "ALV:n osuus on 24 %.", + ["ALV:n", "osuus", "on", "24", "%", "."] + ), + ( + "Hiihtäjä oli kilpailun 14:s.", + ["Hiihtäjä", "oli", "kilpailun", "14:s", "."] + ) +] + @pytest.mark.parametrize("text,expected_tokens", ABBREVIATION_TESTS) def test_fi_tokenizer_abbreviations(fi_tokenizer, text, expected_tokens): @@ -32,3 +47,10 @@ def test_fi_tokenizer_hyphenated_words(fi_tokenizer, text, expected_tokens): tokens = fi_tokenizer(text) token_list = [token.text for token in tokens if not token.is_space] assert expected_tokens == token_list + + +@pytest.mark.parametrize("text,expected_tokens", ABBREVIATION_INFLECTION_TESTS) +def test_fi_tokenizer_abbreviation_inflections(fi_tokenizer, text, expected_tokens): + tokens = fi_tokenizer(text) + token_list = [token.text for token in tokens if not token.is_space] + assert expected_tokens == token_list From 1c0184258878d78b6058f6f6e3638da214f207b6 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Tue, 11 Feb 2020 23:42:17 +0100 Subject: [PATCH 37/49] add pyx and pxd files to the distribution (#5000) --- MANIFEST.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 78655a5f4..1947b9140 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,5 @@ recursive-include include *.h -recursive-include spacy *.txt +recursive-include spacy *.txt *.pyx *.pxd include LICENSE include README.md include bin/spacy From 842dfddbb96e598d8e2b27b305d4d3dfa4d69d83 Mon Sep 17 00:00:00 2001 From: adrianeboyd Date: Tue, 11 Feb 2020 23:44:56 +0100 Subject: [PATCH 38/49] Standardize Greek tag map setup (#4997) * Rename `tag_map.py` to `tag_map_fine.py` to indicate that it's not the default tag map * Remove duplicate generic UD tag map and load `../tag_map.py` instead --- spacy/lang/el/__init__.py | 2 +- spacy/lang/el/{tag_map.py => tag_map_fine.py} | 0 spacy/lang/el/tag_map_general.py | 27 ------------------- 3 files changed, 1 insertion(+), 28 deletions(-) rename spacy/lang/el/{tag_map.py => tag_map_fine.py} (100%) delete mode 100644 spacy/lang/el/tag_map_general.py diff --git a/spacy/lang/el/__init__.py b/spacy/lang/el/__init__.py index 16863e6d7..6d551cc4e 100644 --- a/spacy/lang/el/__init__.py +++ b/spacy/lang/el/__init__.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS -from .tag_map_general import TAG_MAP +from ..tag_map import TAG_MAP from .stop_words import STOP_WORDS from .lex_attrs import LEX_ATTRS from .lemmatizer import GreekLemmatizer diff --git a/spacy/lang/el/tag_map.py b/spacy/lang/el/tag_map_fine.py similarity index 100% rename from spacy/lang/el/tag_map.py rename to spacy/lang/el/tag_map_fine.py diff --git a/spacy/lang/el/tag_map_general.py b/spacy/lang/el/tag_map_general.py deleted file mode 100644 index 42e64a013..000000000 --- a/spacy/lang/el/tag_map_general.py +++ /dev/null @@ -1,27 +0,0 @@ -# coding: utf8 -from __future__ import unicode_literals - -from ...symbols import POS, ADV, NOUN, ADP, PRON, SCONJ, PROPN, DET, SYM, INTJ -from ...symbols import PUNCT, NUM, AUX, X, ADJ, VERB, PART, SPACE, CCONJ - - -TAG_MAP = { - "ADJ": {POS: ADJ}, - "ADV": {POS: ADV}, - "INTJ": {POS: INTJ}, - "NOUN": {POS: NOUN}, - "PROPN": {POS: PROPN}, - "VERB": {POS: VERB}, - "ADP": {POS: ADP}, - "CCONJ": {POS: CCONJ}, - "SCONJ": {POS: SCONJ}, - "PART": {POS: PART}, - "PUNCT": {POS: PUNCT}, - "SYM": {POS: SYM}, - "NUM": {POS: NUM}, - "PRON": {POS: PRON}, - "AUX": {POS: AUX}, - "SPACE": {POS: SPACE}, - "DET": {POS: DET}, - "X": {POS: X}, -} From 99a543367dc35b12aad00c4cd845ddd1f4870056 Mon Sep 17 00:00:00 2001 From: adrianeboyd Date: Tue, 11 Feb 2020 23:45:41 +0100 Subject: [PATCH 39/49] Set GPU before loading any models in train CLI (#4989) Set the GPU before loading any existing models in the train CLI so that you can start with a base model and train on GPU. --- spacy/cli/train.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/spacy/cli/train.py b/spacy/cli/train.py index 7de1d445d..0a9285863 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -14,6 +14,7 @@ import contextlib import random from .._ml import create_default_optimizer +from ..util import use_gpu as set_gpu from ..attrs import PROB, IS_OOV, CLUSTER, LANG from ..gold import GoldCorpus from ..compat import path2str @@ -147,6 +148,18 @@ def train( disabled_pipes = None pipes_added = False msg.text("Training pipeline: {}".format(pipeline)) + if use_gpu >= 0: + activated_gpu = None + try: + activated_gpu = set_gpu(use_gpu) + except Exception as e: + msg.warn("Exception: {}".format(e)) + if activated_gpu is not None: + msg.text("Using GPU: {}".format(use_gpu)) + else: + msg.warn("Unable to activate GPU: {}".format(use_gpu)) + msg.text("Using CPU only") + use_gpu = -1 if base_model: msg.text("Starting with base model '{}'".format(base_model)) nlp = util.load_model(base_model) From 979a3fd1f51b7f3982f6d8c58ff327b122893913 Mon Sep 17 00:00:00 2001 From: nlptechbook <60931109+nlptechbook@users.noreply.github.com> Date: Sat, 15 Feb 2020 09:44:55 -0500 Subject: [PATCH 40/49] Update universe.json (#5022) e-book is available from https://nostarch.com/NLPPython --- website/meta/universe.json | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/website/meta/universe.json b/website/meta/universe.json index cf5978edc..e0e48a916 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -999,6 +999,17 @@ "author": "Graphbrain", "category": ["standalone"] }, + { + "type": "education", + "id": "nostarch-nlp-python", + "title": "Natural Language Processing Using Python", + "slogan": "No Starch Press, 2020", + "description": "Natural Language Processing Using Python is an introduction to natural language processing (NLP), the task of converting human language into data that a computer can process. The book uses spaCy, a leading Python library for NLP, to guide readers through common NLP tasks related to generating and understanding human language with code. It addresses problems like understanding a user's intent, continuing a conversation with a human, and maintaining the state of a conversation.", + "cover": "https://nostarch.com/sites/default/files/styles/uc_product_full/public/NaturalLanguageProcessing_final_v01.jpg", + "url": "https://nostarch.com/NLPPython", + "author": "Yuli Vasiliev", + "category": ["books"] + }, { "type": "education", "id": "oreilly-python-ds", From ff8e71f46d8ee52a0ec94d973c2bcb87f57c563d Mon Sep 17 00:00:00 2001 From: Christos Aridas Date: Sat, 15 Feb 2020 16:49:09 +0200 Subject: [PATCH 41/49] Update streamlit app (#5017) * Update streamlit app [ci skip] * Add all labels by default * Tidy up and auto-format Co-authored-by: Ines Montani --- examples/streamlit_spacy.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/examples/streamlit_spacy.py b/examples/streamlit_spacy.py index 1afa1cd32..a2da123c2 100644 --- a/examples/streamlit_spacy.py +++ b/examples/streamlit_spacy.py @@ -26,12 +26,12 @@ DEFAULT_TEXT = "Mark Zuckerberg is the CEO of Facebook." HTML_WRAPPER = """
{}
""" -@st.cache(ignore_hash=True) +@st.cache(allow_output_mutation=True) def load_model(name): return spacy.load(name) -@st.cache(ignore_hash=True) +@st.cache(allow_output_mutation=True) def process_text(model_name, text): nlp = load_model(model_name) return nlp(text) @@ -79,7 +79,9 @@ if "ner" in nlp.pipe_names: st.header("Named Entities") st.sidebar.header("Named Entities") label_set = nlp.get_pipe("ner").labels - labels = st.sidebar.multiselect("Entity labels", label_set, label_set) + labels = st.sidebar.multiselect( + "Entity labels", options=label_set, default=list(label_set) + ) html = displacy.render(doc, style="ent", options={"ents": labels}) # Newlines seem to mess with the rendering html = html.replace("\n", " ") From a27c77ce62193fdd777353bbf93b20dc9eda142e Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Sat, 15 Feb 2020 15:50:17 +0100 Subject: [PATCH 42/49] add message when cli train script throws exception (#5009) * add message when cli train script throws exception * fix formatting --- spacy/cli/train.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spacy/cli/train.py b/spacy/cli/train.py index 0a9285863..82d4da38e 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -508,6 +508,8 @@ def train( "score = {}".format(best_score, current_score) ) break + except Exception as e: + msg.warn("Aborting and saving the final best model. Encountered exception: {}".format(e)) finally: best_pipes = nlp.pipe_names if disabled_pipes: From 257246017572433af7825d561de573dae73828f0 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Sun, 16 Feb 2020 17:16:41 +0100 Subject: [PATCH 43/49] add tok2vec parameters to train script to facilitate init_tok2vec (#5021) --- spacy/cli/pretrain.py | 14 ++++++++++---- spacy/cli/train.py | 45 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py index c1aade2b2..aaec1ea75 100644 --- a/spacy/cli/pretrain.py +++ b/spacy/cli/pretrain.py @@ -34,7 +34,7 @@ from .train import _load_pretrained_tok2vec vectors_model=("Name or path to spaCy model with vectors to learn from"), output_dir=("Directory to write models to on each epoch", "positional", None, str), width=("Width of CNN layers", "option", "cw", int), - depth=("Depth of CNN layers", "option", "cd", int), + conv_depth=("Depth of CNN layers", "option", "cd", int), cnn_window=("Window size for CNN layers", "option", "cW", int), cnn_pieces=("Maxout size for CNN layers. 1 for Mish", "option", "cP", int), use_chars=("Whether to use character-based embedding", "flag", "chr", bool), @@ -84,7 +84,7 @@ def pretrain( vectors_model, output_dir, width=96, - depth=4, + conv_depth=4, bilstm_depth=0, cnn_pieces=3, sa_depth=0, @@ -132,9 +132,15 @@ def pretrain( msg.info("Using GPU" if has_gpu else "Not using GPU") output_dir = Path(output_dir) + if output_dir.exists() and [p for p in output_dir.iterdir()]: + msg.warn( + "Output directory is not empty", + "It is better to use an empty directory or refer to a new output path, " + "then the new directory will be created for you.", + ) if not output_dir.exists(): output_dir.mkdir() - msg.good("Created output directory") + msg.good("Created output directory: {}".format(output_dir)) srsly.write_json(output_dir / "config.json", config) msg.good("Saved settings to config.json") @@ -162,7 +168,7 @@ def pretrain( Tok2Vec( width, embed_rows, - conv_depth=depth, + conv_depth=conv_depth, pretrained_vectors=pretrained_vectors, bilstm_depth=bilstm_depth, # Requires PyTorch. Experimental. subword_features=not use_chars, # Set to False for Chinese etc diff --git a/spacy/cli/train.py b/spacy/cli/train.py index 82d4da38e..5af93a8f3 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -33,6 +33,13 @@ from .. import about pipeline=("Comma-separated names of pipeline components", "option", "p", str), replace_components=("Replace components from base model", "flag", "R", bool), vectors=("Model to load vectors from", "option", "v", str), + width=("Width of CNN layers of Tok2Vec component", "option", "cw", int), + conv_depth=("Depth of CNN layers of Tok2Vec component", "option", "cd", int), + cnn_window=("Window size for CNN layers of Tok2Vec component", "option", "cW", int), + cnn_pieces=("Maxout size for CNN layers of Tok2Vec component. 1 for Mish", "option", "cP", int), + use_chars=("Whether to use character-based embedding of Tok2Vec component", "flag", "chr", bool), + bilstm_depth=("Depth of BiLSTM layers of Tok2Vec component (requires PyTorch)", "option", "lstm", int), + embed_rows=("Number of embedding rows of Tok2Vec component", "option", "er", int), n_iter=("Number of iterations", "option", "n", int), n_early_stopping=("Maximum number of training epochs without dev accuracy improvement", "option", "ne", int), n_examples=("Number of examples", "option", "ns", int), @@ -64,6 +71,13 @@ def train( pipeline="tagger,parser,ner", replace_components=False, vectors=None, + width=96, + conv_depth=4, + cnn_window=1, + cnn_pieces=3, + use_chars=False, + bilstm_depth=0, + embed_rows=2000, n_iter=30, n_early_stopping=None, n_examples=0, @@ -116,6 +130,7 @@ def train( ) if not output_path.exists(): output_path.mkdir() + msg.good("Created output directory: {}".format(output_path)) # Take dropout and batch size as generators of values -- dropout # starts high and decays sharply, to force the optimizer to explore. @@ -250,7 +265,15 @@ def train( optimizer = create_default_optimizer(Model.ops) else: # Start with a blank model, call begin_training - optimizer = nlp.begin_training(lambda: corpus.train_tuples, device=use_gpu) + cfg = {"device": use_gpu} + cfg["conv_depth"] = conv_depth + cfg["token_vector_width"] = width + cfg["bilstm_depth"] = bilstm_depth + cfg["cnn_maxout_pieces"] = cnn_pieces + cfg["embed_size"] = embed_rows + cfg["conv_window"] = cnn_window + cfg["subword_features"] = not use_chars + optimizer = nlp.begin_training(lambda: corpus.train_tuples, **cfg) nlp._optimizer = None @@ -375,13 +398,19 @@ def train( if not batch: continue docs, golds = zip(*batch) - nlp.update( - docs, - golds, - sgd=optimizer, - drop=next(dropout_rates), - losses=losses, - ) + try: + nlp.update( + docs, + golds, + sgd=optimizer, + drop=next(dropout_rates), + losses=losses, + ) + except ValueError as e: + msg.warn("Error during training") + if init_tok2vec: + msg.warn("Did you provide the same parameters during 'train' as during 'pretrain'?") + msg.fail("Original error message: {}".format(e), exits=1) if raw_text: # If raw text is available, perform 'rehearsal' updates, # which use unlabelled data to reduce overfitting. From 5b102963bf67b6f49fe1c88d1e6fe9f337e6a621 Mon Sep 17 00:00:00 2001 From: adrianeboyd Date: Sun, 16 Feb 2020 17:17:09 +0100 Subject: [PATCH 44/49] Require HEAD for is_parsed in Doc.from_array() (#5011) Modify flag settings so that `DEP` is not sufficient to set `is_parsed` and only run `set_children_from_heads()` if `HEAD` is provided. Then the combination `[SENT_START, DEP]` will set deps and not clobber sent starts with a lot of one-word sentences. --- spacy/tests/doc/test_doc_api.py | 35 ++++++++++++++++++++++++++++++++- spacy/tokens/doc.pyx | 2 +- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py index 86c7fbf72..52f856d3e 100644 --- a/spacy/tests/doc/test_doc_api.py +++ b/spacy/tests/doc/test_doc_api.py @@ -7,7 +7,7 @@ import numpy from spacy.tokens import Doc, Span from spacy.vocab import Vocab from spacy.errors import ModelsWarning -from spacy.attrs import ENT_TYPE, ENT_IOB +from spacy.attrs import ENT_TYPE, ENT_IOB, SENT_START, HEAD, DEP from ..util import get_doc @@ -274,6 +274,39 @@ def test_doc_is_nered(en_vocab): assert new_doc.is_nered +def test_doc_from_array_sent_starts(en_vocab): + words = ["I", "live", "in", "New", "York", ".", "I", "like", "cats", "."] + heads = [0, 0, 0, 0, 0, 0, 6, 6, 6, 6] + deps = ["ROOT", "dep", "dep", "dep", "dep", "dep", "ROOT", "dep", "dep", "dep", "dep"] + doc = Doc(en_vocab, words=words) + for i, (dep, head) in enumerate(zip(deps, heads)): + doc[i].dep_ = dep + doc[i].head = doc[head] + if head == i: + doc[i].is_sent_start = True + doc.is_parsed + + attrs = [SENT_START, HEAD] + arr = doc.to_array(attrs) + new_doc = Doc(en_vocab, words=words) + with pytest.raises(ValueError): + new_doc.from_array(attrs, arr) + + attrs = [SENT_START, DEP] + arr = doc.to_array(attrs) + new_doc = Doc(en_vocab, words=words) + new_doc.from_array(attrs, arr) + assert [t.is_sent_start for t in doc] == [t.is_sent_start for t in new_doc] + assert not new_doc.is_parsed + + attrs = [HEAD, DEP] + arr = doc.to_array(attrs) + new_doc = Doc(en_vocab, words=words) + new_doc.from_array(attrs, arr) + assert [t.is_sent_start for t in doc] == [t.is_sent_start for t in new_doc] + assert new_doc.is_parsed + + def test_doc_lang(en_vocab): doc = Doc(en_vocab, words=["Hello", "world"]) assert doc.lang_ == "en" diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 4aee21153..04e02fd98 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -813,7 +813,7 @@ cdef class Doc: if attr_ids[j] != TAG: Token.set_struct_attr(token, attr_ids[j], array[i, j]) # Set flags - self.is_parsed = bool(self.is_parsed or HEAD in attrs or DEP in attrs) + self.is_parsed = bool(self.is_parsed or HEAD in attrs) self.is_tagged = bool(self.is_tagged or TAG in attrs or POS in attrs) # If document is parsed, set children if self.is_parsed: From 0c47a53b5ece01d5740eea7203400b0f90ce2f15 Mon Sep 17 00:00:00 2001 From: adrianeboyd Date: Sun, 16 Feb 2020 17:19:41 +0100 Subject: [PATCH 45/49] Use int only in key2row for better performance (#4990) Cast all keys and rows to `int` in `vectors.key2row` for more efficient access and serialization. --- spacy/vectors.pyx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx index 6b26bf123..c6526b89d 100644 --- a/spacy/vectors.pyx +++ b/spacy/vectors.pyx @@ -283,7 +283,11 @@ cdef class Vectors: DOCS: https://spacy.io/api/vectors#add """ - key = get_string_id(key) + # use int for all keys and rows in key2row for more efficient access + # and serialization + key = int(get_string_id(key)) + if row is not None: + row = int(row) if row is None and key in self.key2row: row = self.key2row[key] elif row is None: From 3b22eb651be9e80160efa4fcdbd453a71b6de857 Mon Sep 17 00:00:00 2001 From: adrianeboyd Date: Sun, 16 Feb 2020 17:20:36 +0100 Subject: [PATCH 46/49] Sync Span __eq__ and __hash__ (#5005) * Sync Span __eq__ and __hash__ Use the same tuple for `__eq__` and `__hash__`, including all attributes except `vector` and `vector_norm`. * Update entity comparison in tests Update `assert_docs_equal()` test util to compare `Span` properties for ents rather than `Span` objects. --- spacy/tests/doc/test_span.py | 9 +++++++++ spacy/tests/util.py | 6 +++++- spacy/tokens/span.pyx | 13 +++++++++---- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/spacy/tests/doc/test_span.py b/spacy/tests/doc/test_span.py index 01bb93c50..917f22e9c 100644 --- a/spacy/tests/doc/test_span.py +++ b/spacy/tests/doc/test_span.py @@ -279,3 +279,12 @@ def test_filter_spans(doc): assert len(filtered[1]) == 5 assert filtered[0].start == 1 and filtered[0].end == 4 assert filtered[1].start == 5 and filtered[1].end == 10 + + +def test_span_eq_hash(doc, doc_not_parsed): + assert doc[0:2] == doc[0:2] + assert doc[0:2] != doc[1:3] + assert doc[0:2] != doc_not_parsed[0:2] + assert hash(doc[0:2]) == hash(doc[0:2]) + assert hash(doc[0:2]) != hash(doc[1:3]) + assert hash(doc[0:2]) != hash(doc_not_parsed[0:2]) diff --git a/spacy/tests/util.py b/spacy/tests/util.py index 175480fe7..9ee5b89f8 100644 --- a/spacy/tests/util.py +++ b/spacy/tests/util.py @@ -95,7 +95,11 @@ def assert_docs_equal(doc1, doc2): assert [t.ent_type for t in doc1] == [t.ent_type for t in doc2] assert [t.ent_iob for t in doc1] == [t.ent_iob for t in doc2] - assert [ent for ent in doc1.ents] == [ent for ent in doc2.ents] + for ent1, ent2 in zip(doc1.ents, doc2.ents): + assert ent1.start == ent2.start + assert ent1.end == ent2.end + assert ent1.label == ent2.label + assert ent1.kb_id == ent2.kb_id def assert_packed_msg_equal(b1, b2): diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index 24857790b..35c70f236 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -127,22 +127,27 @@ cdef class Span: return False else: return True - # Eq + # < if op == 0: return self.start_char < other.start_char + # <= elif op == 1: return self.start_char <= other.start_char + # == elif op == 2: - return self.start_char == other.start_char and self.end_char == other.end_char + return (self.doc, self.start_char, self.end_char, self.label, self.kb_id) == (other.doc, other.start_char, other.end_char, other.label, other.kb_id) + # != elif op == 3: - return self.start_char != other.start_char or self.end_char != other.end_char + return (self.doc, self.start_char, self.end_char, self.label, self.kb_id) != (other.doc, other.start_char, other.end_char, other.label, other.kb_id) + # > elif op == 4: return self.start_char > other.start_char + # >= elif op == 5: return self.start_char >= other.start_char def __hash__(self): - return hash((self.doc, self.label, self.start_char, self.end_char)) + return hash((self.doc, self.start_char, self.end_char, self.label, self.kb_id)) def __len__(self): """Get the number of tokens in the span. From 72c964bcf408f34ecf7e9da94404213edec3e9e6 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Sun, 16 Feb 2020 17:21:18 +0100 Subject: [PATCH 47/49] define pretrained_dims which is used by build_text_classifier (#5004) --- spacy/language.py | 1 + spacy/pipeline/pipes.pyx | 1 + 2 files changed, 2 insertions(+) diff --git a/spacy/language.py b/spacy/language.py index 5544b6341..869fa09a7 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -608,6 +608,7 @@ class Language(object): link_vectors_to_models(self.vocab) if self.vocab.vectors.data.shape[1]: cfg["pretrained_vectors"] = self.vocab.vectors.name + cfg['pretrained_dims'] = self.vocab.vectors.data.shape[1] if sgd is None: sgd = create_default_optimizer(Model.ops) self._optimizer = sgd diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx index b4fecf5cb..3b190debe 100644 --- a/spacy/pipeline/pipes.pyx +++ b/spacy/pipeline/pipes.pyx @@ -1044,6 +1044,7 @@ class TextCategorizer(Pipe): self.add_label(cat) if self.model is True: self.cfg["pretrained_vectors"] = kwargs.get("pretrained_vectors") + self.cfg["pretrained_dims"] = kwargs.get("pretrained_dims") self.require_labels() self.model = self.Model(len(self.labels), **self.cfg) link_vectors_to_models(self.vocab) From f6ed07b85c0b9204b5d388eb91da5cee30d5b842 Mon Sep 17 00:00:00 2001 From: Kabir Khan Date: Sun, 16 Feb 2020 09:17:47 -0800 Subject: [PATCH 48/49] Use nlp.pipe in EntityRuler for phrase patterns in add_patterns (#4931) * Fix ent_ids and labels properties when id attribute used in patterns * use set for labels * sort end_ids for comparison in entity_ruler tests * fixing entity_ruler ent_ids test * add to set * Run make_doc optimistically if using phrase matcher patterns. * remove unused coveragerc I was testing with * format * Refactor EntityRuler.add_patterns to use nlp.pipe for phrase patterns. Improves speed substantially. * Removing old add_patterns function * Fixing spacing * Make sure token_patterns loaded as well, before generator was being emptied in from_disk --- spacy/pipeline/entityruler.py | 41 +++++++++++++++++++++-- website/docs/usage/rule-based-matching.md | 27 +++++++++++++++ 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py index 1c8429049..c3ef429e9 100644 --- a/spacy/pipeline/entityruler.py +++ b/spacy/pipeline/entityruler.py @@ -8,7 +8,7 @@ from ..language import component from ..errors import Errors from ..compat import basestring_ from ..util import ensure_path, to_disk, from_disk -from ..tokens import Span +from ..tokens import Doc, Span from ..matcher import Matcher, PhraseMatcher DEFAULT_ENT_ID_SEP = "||" @@ -162,6 +162,7 @@ class EntityRuler(object): @property def patterns(self): """Get all patterns that were added to the entity ruler. + RETURNS (list): The original patterns, one dictionary per pattern. DOCS: https://spacy.io/api/entityruler#patterns @@ -194,6 +195,7 @@ class EntityRuler(object): DOCS: https://spacy.io/api/entityruler#add_patterns """ + # disable the nlp components after this one in case they hadn't been initialized / deserialised yet try: current_index = self.nlp.pipe_names.index(self.name) @@ -203,7 +205,33 @@ class EntityRuler(object): except ValueError: subsequent_pipes = [] with self.nlp.disable_pipes(subsequent_pipes): + token_patterns = [] + phrase_pattern_labels = [] + phrase_pattern_texts = [] + phrase_pattern_ids = [] + for entry in patterns: + if isinstance(entry["pattern"], basestring_): + phrase_pattern_labels.append(entry["label"]) + phrase_pattern_texts.append(entry["pattern"]) + phrase_pattern_ids.append(entry.get("id")) + elif isinstance(entry["pattern"], list): + token_patterns.append(entry) + + phrase_patterns = [] + for label, pattern, ent_id in zip( + phrase_pattern_labels, + self.nlp.pipe(phrase_pattern_texts), + phrase_pattern_ids + ): + phrase_pattern = { + "label": label, "pattern": pattern, "id": ent_id + } + if ent_id: + phrase_pattern["id"] = ent_id + phrase_patterns.append(phrase_pattern) + + for entry in token_patterns + phrase_patterns: label = entry["label"] if "id" in entry: ent_label = label @@ -212,8 +240,8 @@ class EntityRuler(object): self._ent_ids[key] = (ent_label, entry["id"]) pattern = entry["pattern"] - if isinstance(pattern, basestring_): - self.phrase_patterns[label].append(self.nlp(pattern)) + if isinstance(pattern, Doc): + self.phrase_patterns[label].append(pattern) elif isinstance(pattern, list): self.token_patterns[label].append(pattern) else: @@ -226,6 +254,8 @@ class EntityRuler(object): def _split_label(self, label): """Split Entity label into ent_label and ent_id if it contains self.ent_id_sep + label (str): The value of label in a pattern entry + RETURNS (tuple): ent_label, ent_id """ if self.ent_id_sep in label: @@ -239,6 +269,9 @@ class EntityRuler(object): def _create_label(self, label, ent_id): """Join Entity label with ent_id if the pattern has an `id` attribute + label (str): The label to set for ent.label_ + ent_id (str): The label + RETURNS (str): The ent_label joined with configured `ent_id_sep` """ if isinstance(ent_id, basestring_): @@ -250,6 +283,7 @@ class EntityRuler(object): patterns_bytes (bytes): The bytestring to load. **kwargs: Other config paramters, mostly for consistency. + RETURNS (EntityRuler): The loaded entity ruler. DOCS: https://spacy.io/api/entityruler#from_bytes @@ -292,6 +326,7 @@ class EntityRuler(object): path (unicode / Path): The JSONL file to load. **kwargs: Other config paramters, mostly for consistency. + RETURNS (EntityRuler): The loaded entity ruler. DOCS: https://spacy.io/api/entityruler#from_disk diff --git a/website/docs/usage/rule-based-matching.md b/website/docs/usage/rule-based-matching.md index cae4f074a..f8866aec1 100644 --- a/website/docs/usage/rule-based-matching.md +++ b/website/docs/usage/rule-based-matching.md @@ -1096,6 +1096,33 @@ with the patterns. When you load the model back in, all pipeline components will be restored and deserialized – including the entity ruler. This lets you ship powerful model packages with binary weights _and_ rules included! +### Using a large number of phrase patterns {#entityruler-large-phrase-patterns new="2.2.4"} + +When using a large amount of **phrase patterns** (roughly > 10000) it's useful to understand how the `add_patterns` function of the EntityRuler works. For each **phrase pattern**, +the EntityRuler calls the nlp object to construct a doc object. This happens in case you try +to add the EntityRuler at the end of an existing pipeline with, for example, a POS tagger and want to +extract matches based on the pattern's POS signature. + +In this case you would pass a config value of `phrase_matcher_attr="POS"` for the EntityRuler. + +Running the full language pipeline across every pattern in a large list scales linearly and can therefore take a long time on large amounts of phrase patterns. + +As of spaCy 2.2.4 the `add_patterns` function has been refactored to use nlp.pipe on all phrase patterns resulting in about a 10x-20x speed up with 5,000-100,000 phrase patterns respectively. + +Even with this speedup (but especially if you're using an older version) the `add_patterns` function can still take a long time. + +An easy workaround to make this function run faster is disabling the other language pipes +while adding the phrase patterns. + +```python +entityruler = EntityRuler(nlp) +patterns = [{"label": "TEST", "pattern": str(i)} for i in range(100000)] + +other_pipes = [p for p in nlp.pipe_names if p != "tagger"] +with nlp.disable_pipes(*disable_pipes): + entityruler.add_patterns(patterns) +``` + ## Combining models and rules {#models-rules} You can combine statistical and rule-based components in a variety of ways. From c7e4fe9c5c979aad5327888c84f77db7a3da38cb Mon Sep 17 00:00:00 2001 From: Jan Jessewitsch <61113983+Jan-711@users.noreply.github.com> Date: Mon, 17 Feb 2020 18:59:22 +0100 Subject: [PATCH 49/49] Fix/Improve german stop words (#5024) * Fix german stop words Two stop words ("einige" and "einigen") are sticking together. Remove three nouns that may serve as stop words in a specific context (e.g. religious or news) but are not applicable for general use. * Create Jan-711.md --- .github/contributors/Jan-711.md | 106 ++++++++++++++++++++++++++++++++ spacy/lang/de/stop_words.py | 9 ++- 2 files changed, 110 insertions(+), 5 deletions(-) create mode 100644 .github/contributors/Jan-711.md diff --git a/.github/contributors/Jan-711.md b/.github/contributors/Jan-711.md new file mode 100644 index 000000000..60297640c --- /dev/null +++ b/.github/contributors/Jan-711.md @@ -0,0 +1,106 @@ +# spaCy contributor agreement + +This spaCy Contributor Agreement (**"SCA"**) is based on the +[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf). +The SCA applies to any contribution that you make to any product or project +managed by us (the **"project"**), and sets out the intellectual property rights +you grant to us in the contributed materials. The term **"us"** shall mean +[ExplosionAI GmbH](https://explosion.ai/legal). The term +**"you"** shall mean the person or entity identified below. + +If you agree to be bound by these terms, fill in the information requested +below and include the filled-in version with your first pull request, under the +folder [`.github/contributors/`](/.github/contributors/). The name of the file +should be your GitHub username, with the extension `.md`. For example, the user +example_user would create the file `.github/contributors/example_user.md`. + +Read this agreement carefully before signing. These terms and conditions +constitute a binding legal agreement. + +## Contributor Agreement + +1. The term "contribution" or "contributed materials" means any source code, +object code, patch, tool, sample, graphic, specification, manual, +documentation, or any other material posted or submitted by you to the project. + +2. With respect to any worldwide copyrights, or copyright applications and +registrations, in your contribution: + + * you hereby assign to us joint ownership, and to the extent that such + assignment is or becomes invalid, ineffective or unenforceable, you hereby + grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, + royalty-free, unrestricted license to exercise all rights under those + copyrights. This includes, at our option, the right to sublicense these same + rights to third parties through multiple levels of sublicensees or other + licensing arrangements; + + * you agree that each of us can do all things in relation to your + contribution as if each of us were the sole owners, and if one of us makes + a derivative work of your contribution, the one who makes the derivative + work (or has it made will be the sole owner of that derivative work; + + * you agree that you will not assert any moral rights in your contribution + against us, our licensees or transferees; + + * you agree that we may register a copyright in your contribution and + exercise all ownership rights associated with it; and + + * you agree that neither of us has any duty to consult with, obtain the + consent of, pay or render an accounting to the other for any use or + distribution of your contribution. + +3. With respect to any patents you own, or that you can license without payment +to any third party, you hereby grant to us a perpetual, irrevocable, +non-exclusive, worldwide, no-charge, royalty-free license to: + + * make, have made, use, sell, offer to sell, import, and otherwise transfer + your contribution in whole or in part, alone or in combination with or + included in any product, work or materials arising out of the project to + which your contribution was submitted, and + + * at our option, to sublicense these same rights to third parties through + multiple levels of sublicensees or other licensing arrangements. + +4. Except as set out above, you keep all right, title, and interest in your +contribution. The rights that you grant to us under these terms are effective +on the date you first submitted a contribution to us, even if your submission +took place before the date you sign these terms. + +5. You covenant, represent, warrant and agree that: + + * Each contribution that you submit is and shall be an original work of + authorship and you can legally grant the rights set out in this SCA; + + * to the best of your knowledge, each contribution will not violate any + third party's copyrights, trademarks, patents, or other intellectual + property rights; and + + * each contribution shall be in compliance with U.S. export control laws and + other applicable export and import laws. You agree to notify us if you + become aware of any circumstance which would make any of the foregoing + representations inaccurate in any respect. We may publicly disclose your + participation in the project, including the fact that you have signed the SCA. + +6. This SCA is governed by the laws of the State of California and applicable +U.S. Federal law. Any choice of law rules will not apply. + +7. Please place an “x” on one of the applicable statement below. Please do NOT +mark both statements: + + * [x] I am signing on behalf of myself as an individual and no other person + or entity, including my employer, has or will have rights with respect to my + contributions. + + * [ ] I am signing on behalf of my employer or a legal entity and I have the + actual authority to contractually bind that entity. + +## Contributor Details + +| Field | Entry | +|------------------------------- | -------------------- | +| Name | Jan Jessewitsch | +| Company name (if applicable) | | +| Title or role (if applicable) | | +| Date | 16.02.2020 | +| GitHub username | Jan-711 | +| Website (optional) | | diff --git a/spacy/lang/de/stop_words.py b/spacy/lang/de/stop_words.py index cf3204d5e..69134124f 100644 --- a/spacy/lang/de/stop_words.py +++ b/spacy/lang/de/stop_words.py @@ -22,14 +22,14 @@ dort drei drin dritte dritten dritter drittes du durch durchaus dürfen dürft durfte durften eben ebenso ehrlich eigen eigene eigenen eigener eigenes ein einander eine -einem einen einer eines einigeeinigen einiger einiges einmal einmaleins elf en +einem einen einer eines einige einigen einiger einiges einmal einmaleins elf en ende endlich entweder er erst erste ersten erster erstes es etwa etwas euch früher fünf fünfte fünften fünfter fünftes für gab ganz ganze ganzen ganzer ganzes gar gedurft gegen gegenüber gehabt gehen geht gekannt gekonnt gemacht gemocht gemusst genug gerade gern gesagt geschweige -gewesen gewollt geworden gibt ging gleich gott gross groß grosse große grossen +gewesen gewollt geworden gibt ging gleich gross groß grosse große grossen großen grosser großer grosses großes gut gute guter gutes habe haben habt hast hat hatte hätte hatten hätten heisst heißt her heute hier @@ -47,9 +47,8 @@ kleines kommen kommt können könnt konnte könnte konnten kurz lang lange leicht leider lieber los machen macht machte mag magst man manche manchem manchen mancher manches mehr -mein meine meinem meinen meiner meines mensch menschen mich mir mit mittel -mochte möchte mochten mögen möglich mögt morgen muss muß müssen musst müsst -musste mussten +mein meine meinem meinen meiner meines mich mir mit mittel mochte möchte mochten +mögen möglich mögt morgen muss muß müssen musst müsst musste mussten na nach nachdem nahm natürlich neben nein neue neuen neun neunte neunten neunter neuntes nicht nichts nie niemand niemandem niemanden noch nun nur