diff --git a/.buildkite/sdist.yml b/.buildkite/sdist.yml
deleted file mode 100644
index 9b94e3752..000000000
--- a/.buildkite/sdist.yml
+++ /dev/null
@@ -1,11 +0,0 @@
-steps:
- -
- command: "fab env clean make test sdist"
- label: ":dizzy: :python:"
- artifact_paths: "dist/*.tar.gz"
- - wait
- - trigger: "spacy-sdist-against-models"
- label: ":dizzy: :hammer:"
- build:
- env:
- SPACY_VERSION: "{$SPACY_VERSION}"
diff --git a/.buildkite/train.yml b/.buildkite/train.yml
deleted file mode 100644
index b257db87c..000000000
--- a/.buildkite/train.yml
+++ /dev/null
@@ -1,11 +0,0 @@
-steps:
- -
- command: "fab env clean make test wheel"
- label: ":dizzy: :python:"
- artifact_paths: "dist/*.whl"
- - wait
- - trigger: "spacy-train-from-wheel"
- label: ":dizzy: :train:"
- build:
- env:
- SPACY_VERSION: "{$SPACY_VERSION}"
diff --git a/.github/contributors/tiangolo.md b/.github/contributors/tiangolo.md
new file mode 100644
index 000000000..5fd253fe9
--- /dev/null
+++ b/.github/contributors/tiangolo.md
@@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI GmbH](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+ * you hereby assign to us joint ownership, and to the extent that such
+ assignment is or becomes invalid, ineffective or unenforceable, you hereby
+ grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+ royalty-free, unrestricted license to exercise all rights under those
+ copyrights. This includes, at our option, the right to sublicense these same
+ rights to third parties through multiple levels of sublicensees or other
+ licensing arrangements;
+
+ * you agree that each of us can do all things in relation to your
+ contribution as if each of us were the sole owners, and if one of us makes
+ a derivative work of your contribution, the one who makes the derivative
+ work (or has it made will be the sole owner of that derivative work;
+
+ * you agree that you will not assert any moral rights in your contribution
+ against us, our licensees or transferees;
+
+ * you agree that we may register a copyright in your contribution and
+ exercise all ownership rights associated with it; and
+
+ * you agree that neither of us has any duty to consult with, obtain the
+ consent of, pay or render an accounting to the other for any use or
+ distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+ * make, have made, use, sell, offer to sell, import, and otherwise transfer
+ your contribution in whole or in part, alone or in combination with or
+ included in any product, work or materials arising out of the project to
+ which your contribution was submitted, and
+
+ * at our option, to sublicense these same rights to third parties through
+ multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+ * Each contribution that you submit is and shall be an original work of
+ authorship and you can legally grant the rights set out in this SCA;
+
+ * to the best of your knowledge, each contribution will not violate any
+ third party's copyrights, trademarks, patents, or other intellectual
+ property rights; and
+
+ * each contribution shall be in compliance with U.S. export control laws and
+ other applicable export and import laws. You agree to notify us if you
+ become aware of any circumstance which would make any of the foregoing
+ representations inaccurate in any respect. We may publicly disclose your
+ participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+ * [ ] I am signing on behalf of myself as an individual and no other person
+ or entity, including my employer, has or will have rights with respect to my
+ contributions.
+
+ * [ ] I am signing on behalf of my employer or a legal entity and I have the
+ actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field | Entry |
+|------------------------------- | -------------------- |
+| Name | Sebastián Ramírez |
+| Company name (if applicable) | |
+| Title or role (if applicable) | |
+| Date | 2020-07-01 |
+| GitHub username | tiangolo |
+| Website (optional) | |
diff --git a/.gitignore b/.gitignore
index eb6be73dd..4dbcd67f7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,8 +18,7 @@ website/.npm
website/logs
*.log
npm-debug.log*
-website/www/
-website/_deploy.sh
+quickstart-training-generator.js
# Cython / C extensions
cythonize.json
@@ -44,12 +43,14 @@ __pycache__/
.env*
.~env/
.venv
+env3.6/
venv/
env3.*/
.dev
.denv
.pypyenv
.pytest_cache/
+.mypy_cache/
# Distribution / packaging
env/
@@ -119,3 +120,6 @@ Desktop.ini
# Pycharm project files
*.idea
+
+# IPython
+.ipynb_checkpoints/
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index e3ce53024..000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-language: python
-sudo: false
-cache: pip
-dist: trusty
-group: edge
-python:
- - "2.7"
-os:
- - linux
-install:
- - "pip install -r requirements.txt"
- - "python setup.py build_ext --inplace"
- - "pip install -e ."
-script:
- - "cat /proc/cpuinfo | grep flags | head -n 1"
- - "python -m pytest --tb=native spacy"
-branches:
- except:
- - spacy.io
-notifications:
- slack:
- secure: F8GvqnweSdzImuLL64TpfG0i5rYl89liyr9tmFVsHl4c0DNiDuGhZivUz0M1broS8svE3OPOllLfQbACG/4KxD890qfF9MoHzvRDlp7U+RtwMV/YAkYn8MGWjPIbRbX0HpGdY7O2Rc9Qy4Kk0T8ZgiqXYIqAz2Eva9/9BlSmsJQ=
- email: false
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 3c2b56cd3..0abde2abf 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -5,7 +5,7 @@
Thanks for your interest in contributing to spaCy 🎉 The project is maintained
by [@honnibal](https://github.com/honnibal) and [@ines](https://github.com/ines),
and we'll do our best to help you get started. This page will give you a quick
-overview of how things are organised and most importantly, how to get involved.
+overview of how things are organized and most importantly, how to get involved.
## Table of contents
@@ -43,33 +43,33 @@ can also submit a [regression test](#fixing-bugs) straight away. When you're
opening an issue to report the bug, simply refer to your pull request in the
issue body. A few more tips:
-- **Describing your issue:** Try to provide as many details as possible. What
- exactly goes wrong? _How_ is it failing? Is there an error?
- "XY doesn't work" usually isn't that helpful for tracking down problems. Always
- remember to include the code you ran and if possible, extract only the relevant
- parts and don't just dump your entire script. This will make it easier for us to
- reproduce the error.
+- **Describing your issue:** Try to provide as many details as possible. What
+ exactly goes wrong? _How_ is it failing? Is there an error?
+ "XY doesn't work" usually isn't that helpful for tracking down problems. Always
+ remember to include the code you ran and if possible, extract only the relevant
+ parts and don't just dump your entire script. This will make it easier for us to
+ reproduce the error.
-- **Getting info about your spaCy installation and environment:** If you're
- using spaCy v1.7+, you can use the command line interface to print details and
- even format them as Markdown to copy-paste into GitHub issues:
- `python -m spacy info --markdown`.
+- **Getting info about your spaCy installation and environment:** If you're
+ using spaCy v1.7+, you can use the command line interface to print details and
+ even format them as Markdown to copy-paste into GitHub issues:
+ `python -m spacy info --markdown`.
-- **Checking the model compatibility:** If you're having problems with a
- [statistical model](https://spacy.io/models), it may be because the
- model is incompatible with your spaCy installation. In spaCy v2.0+, you can check
- this on the command line by running `python -m spacy validate`.
+- **Checking the model compatibility:** If you're having problems with a
+ [statistical model](https://spacy.io/models), it may be because the
+ model is incompatible with your spaCy installation. In spaCy v2.0+, you can check
+ this on the command line by running `python -m spacy validate`.
-- **Sharing a model's output, like dependencies and entities:** spaCy v2.0+
- comes with [built-in visualizers](https://spacy.io/usage/visualizers) that
- you can run from within your script or a Jupyter notebook. For some issues, it's
- helpful to **include a screenshot** of the visualization. You can simply drag and
- drop the image into GitHub's editor and it will be uploaded and included.
+- **Sharing a model's output, like dependencies and entities:** spaCy v2.0+
+ comes with [built-in visualizers](https://spacy.io/usage/visualizers) that
+ you can run from within your script or a Jupyter notebook. For some issues, it's
+ helpful to **include a screenshot** of the visualization. You can simply drag and
+ drop the image into GitHub's editor and it will be uploaded and included.
-- **Sharing long blocks of code or logs:** If you need to include long code,
- logs or tracebacks, you can wrap them in `` and ` `. This
- [collapses the content](https://developer.mozilla.org/en/docs/Web/HTML/Element/details)
- so it only becomes visible on click, making the issue easier to read and follow.
+- **Sharing long blocks of code or logs:** If you need to include long code,
+ logs or tracebacks, you can wrap them in `` and ` `. This
+ [collapses the content](https://developer.mozilla.org/en/docs/Web/HTML/Element/details)
+ so it only becomes visible on click, making the issue easier to read and follow.
### Issue labels
@@ -94,39 +94,39 @@ shipped in the core library, and what could be provided in other packages. Our
philosophy is to prefer a smaller core library. We generally ask the following
questions:
-- **What would this feature look like if implemented in a separate package?**
- Some features would be very difficult to implement externally – for example,
- changes to spaCy's built-in methods. In contrast, a library of word
- alignment functions could easily live as a separate package that depended on
- spaCy — there's little difference between writing `import word_aligner` and
- `import spacy.word_aligner`. spaCy v2.0+ makes it easy to implement
- [custom pipeline components](https://spacy.io/usage/processing-pipelines#custom-components),
- and add your own attributes, properties and methods to the `Doc`, `Token` and
- `Span`. If you're looking to implement a new spaCy feature, starting with a
- custom component package is usually the best strategy. You won't have to worry
- about spaCy's internals and you can test your module in an isolated
- environment. And if it works well, we can always integrate it into the core
- library later.
+- **What would this feature look like if implemented in a separate package?**
+ Some features would be very difficult to implement externally – for example,
+ changes to spaCy's built-in methods. In contrast, a library of word
+ alignment functions could easily live as a separate package that depended on
+ spaCy — there's little difference between writing `import word_aligner` and
+ `import spacy.word_aligner`. spaCy v2.0+ makes it easy to implement
+ [custom pipeline components](https://spacy.io/usage/processing-pipelines#custom-components),
+ and add your own attributes, properties and methods to the `Doc`, `Token` and
+ `Span`. If you're looking to implement a new spaCy feature, starting with a
+ custom component package is usually the best strategy. You won't have to worry
+ about spaCy's internals and you can test your module in an isolated
+ environment. And if it works well, we can always integrate it into the core
+ library later.
-- **Would the feature be easier to implement if it relied on "heavy" dependencies spaCy doesn't currently require?**
- Python has a very rich ecosystem. Libraries like scikit-learn, SciPy, Gensim or
- TensorFlow/Keras do lots of useful things — but we don't want to have them as
- dependencies. If the feature requires functionality in one of these libraries,
- it's probably better to break it out into a different package.
+- **Would the feature be easier to implement if it relied on "heavy" dependencies spaCy doesn't currently require?**
+ Python has a very rich ecosystem. Libraries like scikit-learn, SciPy, Gensim or
+ TensorFlow/Keras do lots of useful things — but we don't want to have them as
+ dependencies. If the feature requires functionality in one of these libraries,
+ it's probably better to break it out into a different package.
-- **Is the feature orthogonal to the current spaCy functionality, or overlapping?**
- spaCy strongly prefers to avoid having 6 different ways of doing the same thing.
- As better techniques are developed, we prefer to drop support for "the old way".
- However, it's rare that one approach _entirely_ dominates another. It's very
- common that there's still a use-case for the "obsolete" approach. For instance,
- [WordNet](https://wordnet.princeton.edu/) is still very useful — but word
- vectors are better for most use-cases, and the two approaches to lexical
- semantics do a lot of the same things. spaCy therefore only supports word
- vectors, and support for WordNet is currently left for other packages.
+- **Is the feature orthogonal to the current spaCy functionality, or overlapping?**
+ spaCy strongly prefers to avoid having 6 different ways of doing the same thing.
+ As better techniques are developed, we prefer to drop support for "the old way".
+ However, it's rare that one approach _entirely_ dominates another. It's very
+ common that there's still a use-case for the "obsolete" approach. For instance,
+ [WordNet](https://wordnet.princeton.edu/) is still very useful — but word
+ vectors are better for most use-cases, and the two approaches to lexical
+ semantics do a lot of the same things. spaCy therefore only supports word
+ vectors, and support for WordNet is currently left for other packages.
-- **Do you need the feature to get basic things done?** We do want spaCy to be
- at least somewhat self-contained. If we keep needing some feature in our
- recipes, that does provide some argument for bringing it "in house".
+- **Do you need the feature to get basic things done?** We do want spaCy to be
+ at least somewhat self-contained. If we keep needing some feature in our
+ recipes, that does provide some argument for bringing it "in house".
### Getting started
@@ -195,7 +195,7 @@ modules in `.py` files, not Cython modules in `.pyx` and `.pxd` files.**
### Code formatting
[`black`](https://github.com/ambv/black) is an opinionated Python code
-formatter, optimised to produce readable code and small diffs. You can run
+formatter, optimized to produce readable code and small diffs. You can run
`black` from the command-line, or via your code editor. For example, if you're
using [Visual Studio Code](https://code.visualstudio.com/), you can add the
following to your `settings.json` to use `black` for formatting and auto-format
@@ -203,10 +203,10 @@ your files on save:
```json
{
- "python.formatting.provider": "black",
- "[python]": {
- "editor.formatOnSave": true
- }
+ "python.formatting.provider": "black",
+ "[python]": {
+ "editor.formatOnSave": true
+ }
}
```
@@ -216,7 +216,7 @@ list of available editor integrations.
#### Disabling formatting
There are a few cases where auto-formatting doesn't improve readability – for
-example, in some of the the language data files like the `tag_map.py`, or in
+example, in some of the language data files like the `tag_map.py`, or in
the tests that construct `Doc` objects from lists of words and other labels.
Wrapping a block in `# fmt: off` and `# fmt: on` lets you disable formatting
for that particular code. Here's an example:
@@ -280,29 +280,13 @@ except: # noqa: E722
### Python conventions
-All Python code must be written in an **intersection of Python 2 and Python 3**.
-This is easy in Cython, but somewhat ugly in Python. Logic that deals with
-Python or platform compatibility should only live in
-[`spacy.compat`](spacy/compat.py). To distinguish them from the builtin
-functions, replacement functions are suffixed with an underscore, for example
-`unicode_`. If you need to access the user's version or platform information,
-for example to show more specific error messages, you can use the `is_config()`
-helper function.
-
-```python
-from .compat import unicode_, is_config
-
-compatible_unicode = unicode_('hello world')
-if is_config(windows=True, python2=True):
- print("You are using Python 2 on Windows.")
-```
-
+All Python code must be written **compatible with Python 3.6+**.
Code that interacts with the file-system should accept objects that follow the
`pathlib.Path` API, without assuming that the object inherits from `pathlib.Path`.
If the function is user-facing and takes a path as an argument, it should check
whether the path is provided as a string. Strings should be converted to
`pathlib.Path` objects. Serialization and deserialization functions should always
-accept **file-like objects**, as it makes the library io-agnostic. Working on
+accept **file-like objects**, as it makes the library IO-agnostic. Working on
buffers makes the code more general, easier to test, and compatible with Python
3's asynchronous IO.
@@ -400,7 +384,7 @@ of Python and C++, with additional complexity and syntax from numpy. The
many "traps for new players". Working in Cython is very rewarding once you're
over the initial learning curve. As with C and C++, the first way you write
something in Cython will often be the performance-optimal approach. In contrast,
-Python optimisation generally requires a lot of experimentation. Is it faster to
+Python optimization generally requires a lot of experimentation. Is it faster to
have an `if item in my_dict` check, or to use `.get()`? What about `try`/`except`?
Does this numpy operation create a copy? There's no way to guess the answers to
these questions, and you'll usually be dissatisfied with your results — so
@@ -413,10 +397,10 @@ Python. If it's not fast enough the first time, just switch to Cython.
### Resources to get you started
-- [PEP 8 Style Guide for Python Code](https://www.python.org/dev/peps/pep-0008/) (python.org)
-- [Official Cython documentation](http://docs.cython.org/en/latest/) (cython.org)
-- [Writing C in Cython](https://explosion.ai/blog/writing-c-in-cython) (explosion.ai)
-- [Multi-threading spaCy’s parser and named entity recogniser](https://explosion.ai/blog/multithreading-with-cython) (explosion.ai)
+- [PEP 8 Style Guide for Python Code](https://www.python.org/dev/peps/pep-0008/) (python.org)
+- [Official Cython documentation](http://docs.cython.org/en/latest/) (cython.org)
+- [Writing C in Cython](https://explosion.ai/blog/writing-c-in-cython) (explosion.ai)
+- [Multi-threading spaCy’s parser and named entity recognizer](https://explosion.ai/blog/multithreading-with-cython) (explosion.ai)
## Adding tests
@@ -428,7 +412,7 @@ name. For example, tests for the `Tokenizer` can be found in
all test files and test functions need to be prefixed with `test_`.
When adding tests, make sure to use descriptive names, keep the code short and
-concise and only test for one behaviour at a time. Try to `parametrize` test
+concise and only test for one behavior at a time. Try to `parametrize` test
cases wherever possible, use our pre-defined fixtures for spaCy components and
avoid unnecessary imports.
@@ -456,25 +440,25 @@ simply click on the "Suggest edits" button at the bottom of a page.
We're very excited about all the new possibilities for **community extensions**
and plugins in spaCy v2.0, and we can't wait to see what you build with it!
-- An extension or plugin should add substantial functionality, be
- **well-documented** and **open-source**. It should be available for users to download
- and install as a Python package – for example via [PyPi](http://pypi.python.org).
+- An extension or plugin should add substantial functionality, be
+ **well-documented** and **open-source**. It should be available for users to download
+ and install as a Python package – for example via [PyPi](http://pypi.python.org).
-- Extensions that write to `Doc`, `Token` or `Span` attributes should be wrapped
- as [pipeline components](https://spacy.io/usage/processing-pipelines#custom-components)
- that users can **add to their processing pipeline** using `nlp.add_pipe()`.
+- Extensions that write to `Doc`, `Token` or `Span` attributes should be wrapped
+ as [pipeline components](https://spacy.io/usage/processing-pipelines#custom-components)
+ that users can **add to their processing pipeline** using `nlp.add_pipe()`.
-- When publishing your extension on GitHub, **tag it** with the topics
- [`spacy`](https://github.com/topics/spacy?o=desc&s=stars) and
- [`spacy-extensions`](https://github.com/topics/spacy-extension?o=desc&s=stars)
- to make it easier to find. Those are also the topics we're linking to from the
- spaCy website. If you're sharing your project on Twitter, feel free to tag
- [@spacy_io](https://twitter.com/spacy_io) so we can check it out.
+- When publishing your extension on GitHub, **tag it** with the topics
+ [`spacy`](https://github.com/topics/spacy?o=desc&s=stars) and
+ [`spacy-extensions`](https://github.com/topics/spacy-extension?o=desc&s=stars)
+ to make it easier to find. Those are also the topics we're linking to from the
+ spaCy website. If you're sharing your project on Twitter, feel free to tag
+ [@spacy_io](https://twitter.com/spacy_io) so we can check it out.
-- Once your extension is published, you can open an issue on the
- [issue tracker](https://github.com/explosion/spacy/issues) to suggest it for the
- [resources directory](https://spacy.io/usage/resources#extensions) on the
- website.
+- Once your extension is published, you can open an issue on the
+ [issue tracker](https://github.com/explosion/spacy/issues) to suggest it for the
+ [resources directory](https://spacy.io/usage/resources#extensions) on the
+ website.
📖 **For more tips and best practices, see the [checklist for developing spaCy extensions](https://spacy.io/usage/processing-pipelines#extensions).**
diff --git a/MANIFEST.in b/MANIFEST.in
index 9819c7b70..b4887cdb8 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,9 +1,9 @@
recursive-include include *.h
-recursive-include spacy *.txt *.pyx *.pxd
+recursive-include spacy *.pyx *.pxd *.txt *.cfg *.jinja
include LICENSE
include README.md
-include bin/spacy
include pyproject.toml
recursive-exclude spacy/lang *.json
recursive-include spacy/lang *.json.gz
+recursive-include spacy/cli *.json *.yml
recursive-include licenses *
diff --git a/Makefile b/Makefile
index 6c0a59ba8..c4e77d101 100644
--- a/Makefile
+++ b/Makefile
@@ -1,29 +1,57 @@
SHELL := /bin/bash
-PYVER := 3.6
+
+ifndef SPACY_EXTRAS
+override SPACY_EXTRAS = spacy-lookups-data jieba pkuseg==0.0.25 sudachipy sudachidict_core
+endif
+
+ifndef PYVER
+override PYVER = 3.6
+endif
+
VENV := ./env$(PYVER)
version := $(shell "bin/get-version.sh")
+package := $(shell "bin/get-package.sh")
-dist/spacy-$(version).pex : wheelhouse/spacy-$(version).stamp
- $(VENV)/bin/pex -f ./wheelhouse --no-index --disable-cache -m spacy -o $@ spacy==$(version) jsonschema spacy-lookups-data jieba pkuseg==0.0.25 sudachipy sudachidict_core
+ifndef SPACY_BIN
+override SPACY_BIN = $(package)-$(version).pex
+endif
+
+ifndef WHEELHOUSE
+override WHEELHOUSE = "./wheelhouse"
+endif
+
+
+
+dist/$(SPACY_BIN) : $(WHEELHOUSE)/spacy-$(PYVER)-$(version).stamp
+ $(VENV)/bin/pex \
+ -f $(WHEELHOUSE) \
+ --no-index \
+ --disable-cache \
+ -m spacy \
+ -o $@ \
+ $(package)==$(version) \
+ $(SPACY_EXTRAS)
chmod a+rx $@
cp $@ dist/spacy.pex
-dist/pytest.pex : wheelhouse/pytest-*.whl
- $(VENV)/bin/pex -f ./wheelhouse --no-index --disable-cache -m pytest -o $@ pytest pytest-timeout mock
+dist/pytest.pex : $(WHEELHOUSE)/pytest-*.whl
+ $(VENV)/bin/pex -f $(WHEELHOUSE) --no-index --disable-cache -m pytest -o $@ pytest pytest-timeout mock
chmod a+rx $@
-wheelhouse/spacy-$(version).stamp : $(VENV)/bin/pex setup.py spacy/*.py* spacy/*/*.py*
- $(VENV)/bin/pip wheel . -w ./wheelhouse
- $(VENV)/bin/pip wheel jsonschema spacy-lookups-data jieba pkuseg==0.0.25 sudachipy sudachidict_core -w ./wheelhouse
+$(WHEELHOUSE)/spacy-$(PYVER)-$(version).stamp : $(VENV)/bin/pex setup.py spacy/*.py* spacy/*/*.py*
+ $(VENV)/bin/pip wheel . -w $(WHEELHOUSE)
+ $(VENV)/bin/pip wheel $(SPACY_EXTRAS) -w $(WHEELHOUSE)
+
touch $@
-wheelhouse/pytest-%.whl : $(VENV)/bin/pex
- $(VENV)/bin/pip wheel pytest pytest-timeout mock -w ./wheelhouse
+$(WHEELHOUSE)/pytest-%.whl : $(VENV)/bin/pex
+ $(VENV)/bin/pip wheel pytest pytest-timeout mock -w $(WHEELHOUSE)
$(VENV)/bin/pex :
python$(PYVER) -m venv $(VENV)
$(VENV)/bin/pip install -U pip setuptools pex wheel
+ $(VENV)/bin/pip install numpy
.PHONY : clean test
@@ -33,6 +61,6 @@ test : dist/spacy-$(version).pex dist/pytest.pex
clean : setup.py
rm -rf dist/*
- rm -rf ./wheelhouse
+ rm -rf $(WHEELHOUSE)/*
rm -rf $(VENV)
python setup.py clean --all
diff --git a/README.md b/README.md
index 4b5f3d0fa..cef2a1fdd 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,6 @@ It's commercial open-source software, released under the MIT license.
[Check out the release notes here.](https://github.com/explosion/spaCy/releases)
[![Azure Pipelines]()](https://dev.azure.com/explosion-ai/public/_build?definitionId=8)
-[![Travis Build Status]()](https://travis-ci.org/explosion/spaCy)
[![Current Release Version](https://img.shields.io/github/release/explosion/spacy.svg?style=flat-square&logo=github)](https://github.com/explosion/spaCy/releases)
[![pypi Version](https://img.shields.io/pypi/v/spacy.svg?style=flat-square&logo=pypi&logoColor=white)](https://pypi.org/project/spacy/)
[![conda Version](https://img.shields.io/conda/vn/conda-forge/spacy.svg?style=flat-square&logo=conda-forge&logoColor=white)](https://anaconda.org/conda-forge/spacy)
@@ -50,9 +49,8 @@ It's commercial open-source software, released under the MIT license.
## 💬 Where to ask questions
-The spaCy project is maintained by [@honnibal](https://github.com/honnibal) and
-[@ines](https://github.com/ines), along with core contributors
-[@svlandeg](https://github.com/svlandeg) and
+The spaCy project is maintained by [@honnibal](https://github.com/honnibal),
+[@ines](https://github.com/ines), [@svlandeg](https://github.com/svlandeg) and
[@adrianeboyd](https://github.com/adrianeboyd). Please understand that we won't
be able to provide individual support via email. We also believe that help is
much more valuable if it's shared publicly, so that more people can benefit from
@@ -98,12 +96,19 @@ For detailed installation instructions, see the
- **Operating system**: macOS / OS X · Linux · Windows (Cygwin, MinGW, Visual
Studio)
-- **Python version**: Python 2.7, 3.5+ (only 64 bit)
+- **Python version**: Python 3.6+ (only 64 bit)
- **Package managers**: [pip] · [conda] (via `conda-forge`)
[pip]: https://pypi.org/project/spacy/
[conda]: https://anaconda.org/conda-forge/spacy
+> ⚠️ **Important note for Python 3.8:** We can't yet ship pre-compiled binary
+> wheels for spaCy that work on Python 3.8, as we're still waiting for our CI
+> providers and other tooling to support it. This means that in order to run
+> spaCy on Python 3.8, you'll need [a compiler installed](#source) and compile
+> the library and its Cython dependencies locally. If this is causing problems
+> for you, the easiest solution is to **use Python 3.7** in the meantime.
+
### pip
Using pip, spaCy releases are available as source packages and binary wheels (as
@@ -188,7 +193,7 @@ pip install https://github.com/explosion/spacy-models/releases/download/en_core_
### Loading and using models
-To load a model, use `spacy.load()` with the model name, a shortcut link or a
+To load a model, use `spacy.load()` with the model name or a
path to the model data directory.
```python
@@ -263,9 +268,7 @@ and git preinstalled.
Install a version of the
[Visual C++ Build Tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/)
or [Visual Studio Express](https://visualstudio.microsoft.com/vs/express/) that
-matches the version that was used to compile your Python interpreter. For
-official distributions these are VS 2008 (Python 2.7), VS 2010 (Python 3.4) and
-VS 2015 (Python 3.5).
+matches the version that was used to compile your Python interpreter.
## Run tests
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 147d2e903..4dfb51296 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -27,7 +27,7 @@ jobs:
inputs:
versionSpec: '3.7'
- script: |
- pip install flake8
+ pip install flake8==3.5.0
python -m flake8 spacy --count --select=E901,E999,F821,F822,F823 --show-source --statistics
displayName: 'flake8'
@@ -35,12 +35,6 @@ jobs:
dependsOn: 'Validate'
strategy:
matrix:
- Python35Linux:
- imageName: 'ubuntu-16.04'
- python.version: '3.5'
- Python35Windows:
- imageName: 'vs2017-win2016'
- python.version: '3.5'
Python36Linux:
imageName: 'ubuntu-16.04'
python.version: '3.6'
@@ -58,7 +52,7 @@ jobs:
# imageName: 'vs2017-win2016'
# python.version: '3.7'
# Python37Mac:
- # imageName: 'macos-10.13'
+ # imageName: 'macos-10.14'
# python.version: '3.7'
Python38Linux:
imageName: 'ubuntu-16.04'
diff --git a/bin/cythonize.py b/bin/cythonize.py
deleted file mode 100755
index 4814f8df0..000000000
--- a/bin/cythonize.py
+++ /dev/null
@@ -1,169 +0,0 @@
-#!/usr/bin/env python
-""" cythonize.py
-
-Cythonize pyx files into C++ files as needed.
-
-Usage: cythonize.py [root]
-
-Checks pyx files to see if they have been changed relative to their
-corresponding C++ files. If they have, then runs cython on these files to
-recreate the C++ files.
-
-Additionally, checks pxd files and setup.py if they have been changed. If
-they have, rebuilds everything.
-
-Change detection based on file hashes stored in JSON format.
-
-For now, this script should be run by developers when changing Cython files
-and the resulting C++ files checked in, so that end-users (and Python-only
-developers) do not get the Cython dependencies.
-
-Based upon:
-
-https://raw.github.com/dagss/private-scipy-refactor/cythonize/cythonize.py
-https://raw.githubusercontent.com/numpy/numpy/master/tools/cythonize.py
-
-Note: this script does not check any of the dependent C++ libraries.
-"""
-from __future__ import print_function
-
-import os
-import sys
-import json
-import hashlib
-import subprocess
-import argparse
-
-
-HASH_FILE = "cythonize.json"
-
-
-def process_pyx(fromfile, tofile, language_level="-2"):
- print("Processing %s" % fromfile)
- try:
- from Cython.Compiler.Version import version as cython_version
- from distutils.version import LooseVersion
-
- if LooseVersion(cython_version) < LooseVersion("0.19"):
- raise Exception("Require Cython >= 0.19")
-
- except ImportError:
- pass
-
- flags = ["--fast-fail", language_level]
- if tofile.endswith(".cpp"):
- flags += ["--cplus"]
-
- try:
- try:
- r = subprocess.call(
- ["cython"] + flags + ["-o", tofile, fromfile], env=os.environ
- ) # See Issue #791
- if r != 0:
- raise Exception("Cython failed")
- except OSError:
- # There are ways of installing Cython that don't result in a cython
- # executable on the path, see gh-2397.
- r = subprocess.call(
- [
- sys.executable,
- "-c",
- "import sys; from Cython.Compiler.Main import "
- "setuptools_main as main; sys.exit(main())",
- ]
- + flags
- + ["-o", tofile, fromfile]
- )
- if r != 0:
- raise Exception("Cython failed")
- except OSError:
- raise OSError("Cython needs to be installed")
-
-
-def preserve_cwd(path, func, *args):
- orig_cwd = os.getcwd()
- try:
- os.chdir(path)
- func(*args)
- finally:
- os.chdir(orig_cwd)
-
-
-def load_hashes(filename):
- try:
- return json.load(open(filename))
- except (ValueError, IOError):
- return {}
-
-
-def save_hashes(hash_db, filename):
- with open(filename, "w") as f:
- f.write(json.dumps(hash_db))
-
-
-def get_hash(path):
- return hashlib.md5(open(path, "rb").read()).hexdigest()
-
-
-def hash_changed(base, path, db):
- full_path = os.path.normpath(os.path.join(base, path))
- return not get_hash(full_path) == db.get(full_path)
-
-
-def hash_add(base, path, db):
- full_path = os.path.normpath(os.path.join(base, path))
- db[full_path] = get_hash(full_path)
-
-
-def process(base, filename, db):
- root, ext = os.path.splitext(filename)
- if ext in [".pyx", ".cpp"]:
- if hash_changed(base, filename, db) or not os.path.isfile(
- os.path.join(base, root + ".cpp")
- ):
- preserve_cwd(base, process_pyx, root + ".pyx", root + ".cpp")
- hash_add(base, root + ".cpp", db)
- hash_add(base, root + ".pyx", db)
-
-
-def check_changes(root, db):
- res = False
- new_db = {}
-
- setup_filename = "setup.py"
- hash_add(".", setup_filename, new_db)
- if hash_changed(".", setup_filename, db):
- res = True
-
- for base, _, files in os.walk(root):
- for filename in files:
- if filename.endswith(".pxd"):
- hash_add(base, filename, new_db)
- if hash_changed(base, filename, db):
- res = True
-
- if res:
- db.clear()
- db.update(new_db)
- return res
-
-
-def run(root):
- db = load_hashes(HASH_FILE)
-
- try:
- check_changes(root, db)
- for base, _, files in os.walk(root):
- for filename in files:
- process(base, filename, db)
- finally:
- save_hashes(db, HASH_FILE)
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser(
- description="Cythonize pyx files into C++ files as needed"
- )
- parser.add_argument("root", help="root directory")
- args = parser.parse_args()
- run(args.root)
diff --git a/bin/get-package.sh b/bin/get-package.sh
new file mode 100755
index 000000000..d60b930b4
--- /dev/null
+++ b/bin/get-package.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+set -e
+
+version=$(grep "__title__ = " spacy/about.py)
+version=${version/__title__ = }
+version=${version/\'/}
+version=${version/\'/}
+version=${version/\"/}
+version=${version/\"/}
+
+echo $version
diff --git a/bin/load_reddit.py b/bin/load_reddit.py
deleted file mode 100644
index afddd3798..000000000
--- a/bin/load_reddit.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-import bz2
-import re
-import srsly
-import sys
-import random
-import datetime
-import plac
-from pathlib import Path
-
-_unset = object()
-
-
-class Reddit(object):
- """Stream cleaned comments from Reddit."""
-
- pre_format_re = re.compile(r"^[`*~]")
- post_format_re = re.compile(r"[`*~]$")
- url_re = re.compile(r"\[([^]]+)\]\(%%URL\)")
- link_re = re.compile(r"\[([^]]+)\]\(https?://[^\)]+\)")
-
- def __init__(self, file_path, meta_keys={"subreddit": "section"}):
- """
- file_path (unicode / Path): Path to archive or directory of archives.
- meta_keys (dict): Meta data key included in the Reddit corpus, mapped
- to display name in Prodigy meta.
- RETURNS (Reddit): The Reddit loader.
- """
- self.meta = meta_keys
- file_path = Path(file_path)
- if not file_path.exists():
- raise IOError("Can't find file path: {}".format(file_path))
- if not file_path.is_dir():
- self.files = [file_path]
- else:
- self.files = list(file_path.iterdir())
-
- def __iter__(self):
- for file_path in self.iter_files():
- with bz2.open(str(file_path)) as f:
- for line in f:
- line = line.strip()
- if not line:
- continue
- comment = srsly.json_loads(line)
- if self.is_valid(comment):
- text = self.strip_tags(comment["body"])
- yield {"text": text}
-
- def get_meta(self, item):
- return {name: item.get(key, "n/a") for key, name in self.meta.items()}
-
- def iter_files(self):
- for file_path in self.files:
- yield file_path
-
- def strip_tags(self, text):
- text = self.link_re.sub(r"\1", text)
- text = text.replace(">", ">").replace("<", "<")
- text = self.pre_format_re.sub("", text)
- text = self.post_format_re.sub("", text)
- text = re.sub(r"\s+", " ", text)
- return text.strip()
-
- def is_valid(self, comment):
- return (
- comment["body"] is not None
- and comment["body"] != "[deleted]"
- and comment["body"] != "[removed]"
- )
-
-
-def main(path):
- reddit = Reddit(path)
- for comment in reddit:
- print(srsly.json_dumps(comment))
-
-
-if __name__ == "__main__":
- import socket
-
- try:
- BrokenPipeError
- except NameError:
- BrokenPipeError = socket.error
- try:
- plac.call(main)
- except BrokenPipeError:
- import os, sys
-
- # Python flushes standard streams on exit; redirect remaining output
- # to devnull to avoid another BrokenPipeError at shutdown
- devnull = os.open(os.devnull, os.O_WRONLY)
- os.dup2(devnull, sys.stdout.fileno())
- sys.exit(1) # Python exits with error code 1 on EPIPE
diff --git a/bin/spacy b/bin/spacy
deleted file mode 100644
index 11359669c..000000000
--- a/bin/spacy
+++ /dev/null
@@ -1,2 +0,0 @@
-#! /bin/sh
-python -m spacy "$@"
diff --git a/bin/train_word_vectors.py b/bin/train_word_vectors.py
deleted file mode 100644
index 663ce060d..000000000
--- a/bin/train_word_vectors.py
+++ /dev/null
@@ -1,81 +0,0 @@
-#!/usr/bin/env python
-from __future__ import print_function, unicode_literals, division
-
-import logging
-from pathlib import Path
-from collections import defaultdict
-from gensim.models import Word2Vec
-import plac
-import spacy
-
-logger = logging.getLogger(__name__)
-
-
-class Corpus(object):
- def __init__(self, directory, nlp):
- self.directory = directory
- self.nlp = nlp
-
- def __iter__(self):
- for text_loc in iter_dir(self.directory):
- with text_loc.open("r", encoding="utf-8") as file_:
- text = file_.read()
-
- # This is to keep the input to the blank model (which doesn't
- # sentencize) from being too long. It works particularly well with
- # the output of [WikiExtractor](https://github.com/attardi/wikiextractor)
- paragraphs = text.split('\n\n')
- for par in paragraphs:
- yield [word.orth_ for word in self.nlp(par)]
-
-
-def iter_dir(loc):
- dir_path = Path(loc)
- for fn_path in dir_path.iterdir():
- if fn_path.is_dir():
- for sub_path in fn_path.iterdir():
- yield sub_path
- else:
- yield fn_path
-
-
-@plac.annotations(
- lang=("ISO language code"),
- in_dir=("Location of input directory"),
- out_loc=("Location of output file"),
- n_workers=("Number of workers", "option", "n", int),
- size=("Dimension of the word vectors", "option", "d", int),
- window=("Context window size", "option", "w", int),
- min_count=("Min count", "option", "m", int),
- negative=("Number of negative samples", "option", "g", int),
- nr_iter=("Number of iterations", "option", "i", int),
-)
-def main(
- lang,
- in_dir,
- out_loc,
- negative=5,
- n_workers=4,
- window=5,
- size=128,
- min_count=10,
- nr_iter=5,
-):
- logging.basicConfig(
- format="%(asctime)s : %(levelname)s : %(message)s", level=logging.INFO
- )
- nlp = spacy.blank(lang)
- corpus = Corpus(in_dir, nlp)
- model = Word2Vec(
- sentences=corpus,
- size=size,
- window=window,
- min_count=min_count,
- workers=n_workers,
- sample=1e-5,
- negative=negative,
- )
- model.save(out_loc)
-
-if __name__ == "__main__":
- plac.call(main)
diff --git a/bin/ud/__init__.py b/bin/ud/__init__.py
deleted file mode 100644
index 119c46ba4..000000000
--- a/bin/ud/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from .conll17_ud_eval import main as ud_evaluate # noqa: F401
-from .ud_train import main as ud_train # noqa: F401
diff --git a/bin/ud/conll17_ud_eval.py b/bin/ud/conll17_ud_eval.py
deleted file mode 100644
index 88acfabac..000000000
--- a/bin/ud/conll17_ud_eval.py
+++ /dev/null
@@ -1,614 +0,0 @@
-#!/usr/bin/env python
-# flake8: noqa
-
-# CoNLL 2017 UD Parsing evaluation script.
-#
-# Compatible with Python 2.7 and 3.2+, can be used either as a module
-# or a standalone executable.
-#
-# Copyright 2017 Institute of Formal and Applied Linguistics (UFAL),
-# Faculty of Mathematics and Physics, Charles University, Czech Republic.
-#
-# Changelog:
-# - [02 Jan 2017] Version 0.9: Initial release
-# - [25 Jan 2017] Version 0.9.1: Fix bug in LCS alignment computation
-# - [10 Mar 2017] Version 1.0: Add documentation and test
-# Compare HEADs correctly using aligned words
-# Allow evaluation with errorneous spaces in forms
-# Compare forms in LCS case insensitively
-# Detect cycles and multiple root nodes
-# Compute AlignedAccuracy
-
-# Command line usage
-# ------------------
-# conll17_ud_eval.py [-v] [-w weights_file] gold_conllu_file system_conllu_file
-#
-# - if no -v is given, only the CoNLL17 UD Shared Task evaluation LAS metrics
-# is printed
-# - if -v is given, several metrics are printed (as precision, recall, F1 score,
-# and in case the metric is computed on aligned words also accuracy on these):
-# - Tokens: how well do the gold tokens match system tokens
-# - Sentences: how well do the gold sentences match system sentences
-# - Words: how well can the gold words be aligned to system words
-# - UPOS: using aligned words, how well does UPOS match
-# - XPOS: using aligned words, how well does XPOS match
-# - Feats: using aligned words, how well does FEATS match
-# - AllTags: using aligned words, how well does UPOS+XPOS+FEATS match
-# - Lemmas: using aligned words, how well does LEMMA match
-# - UAS: using aligned words, how well does HEAD match
-# - LAS: using aligned words, how well does HEAD+DEPREL(ignoring subtypes) match
-# - if weights_file is given (with lines containing deprel-weight pairs),
-# one more metric is shown:
-# - WeightedLAS: as LAS, but each deprel (ignoring subtypes) has different weight
-
-# API usage
-# ---------
-# - load_conllu(file)
-# - loads CoNLL-U file from given file object to an internal representation
-# - the file object should return str on both Python 2 and Python 3
-# - raises UDError exception if the given file cannot be loaded
-# - evaluate(gold_ud, system_ud)
-# - evaluate the given gold and system CoNLL-U files (loaded with load_conllu)
-# - raises UDError if the concatenated tokens of gold and system file do not match
-# - returns a dictionary with the metrics described above, each metrics having
-# four fields: precision, recall, f1 and aligned_accuracy (when using aligned
-# words, otherwise this is None)
-
-# Description of token matching
-# -----------------------------
-# In order to match tokens of gold file and system file, we consider the text
-# resulting from concatenation of gold tokens and text resulting from
-# concatenation of system tokens. These texts should match -- if they do not,
-# the evaluation fails.
-#
-# If the texts do match, every token is represented as a range in this original
-# text, and tokens are equal only if their range is the same.
-
-# Description of word matching
-# ----------------------------
-# When matching words of gold file and system file, we first match the tokens.
-# The words which are also tokens are matched as tokens, but words in multi-word
-# tokens have to be handled differently.
-#
-# To handle multi-word tokens, we start by finding "multi-word spans".
-# Multi-word span is a span in the original text such that
-# - it contains at least one multi-word token
-# - all multi-word tokens in the span (considering both gold and system ones)
-# are completely inside the span (i.e., they do not "stick out")
-# - the multi-word span is as small as possible
-#
-# For every multi-word span, we align the gold and system words completely
-# inside this span using LCS on their FORMs. The words not intersecting
-# (even partially) any multi-word span are then aligned as tokens.
-
-
-from __future__ import division
-from __future__ import print_function
-
-import argparse
-import io
-import sys
-import unittest
-
-# CoNLL-U column names
-ID, FORM, LEMMA, UPOS, XPOS, FEATS, HEAD, DEPREL, DEPS, MISC = range(10)
-
-# UD Error is used when raising exceptions in this module
-class UDError(Exception):
- pass
-
-# Load given CoNLL-U file into internal representation
-def load_conllu(file, check_parse=True):
- # Internal representation classes
- class UDRepresentation:
- def __init__(self):
- # Characters of all the tokens in the whole file.
- # Whitespace between tokens is not included.
- self.characters = []
- # List of UDSpan instances with start&end indices into `characters`.
- self.tokens = []
- # List of UDWord instances.
- self.words = []
- # List of UDSpan instances with start&end indices into `characters`.
- self.sentences = []
- class UDSpan:
- def __init__(self, start, end, characters):
- self.start = start
- # Note that self.end marks the first position **after the end** of span,
- # so we can use characters[start:end] or range(start, end).
- self.end = end
- self.characters = characters
-
- @property
- def text(self):
- return ''.join(self.characters[self.start:self.end])
-
- def __str__(self):
- return self.text
-
- def __repr__(self):
- return self.text
- class UDWord:
- def __init__(self, span, columns, is_multiword):
- # Span of this word (or MWT, see below) within ud_representation.characters.
- self.span = span
- # 10 columns of the CoNLL-U file: ID, FORM, LEMMA,...
- self.columns = columns
- # is_multiword==True means that this word is part of a multi-word token.
- # In that case, self.span marks the span of the whole multi-word token.
- self.is_multiword = is_multiword
- # Reference to the UDWord instance representing the HEAD (or None if root).
- self.parent = None
- # Let's ignore language-specific deprel subtypes.
- self.columns[DEPREL] = columns[DEPREL].split(':')[0]
-
- ud = UDRepresentation()
-
- # Load the CoNLL-U file
- index, sentence_start = 0, None
- linenum = 0
- while True:
- line = file.readline()
- linenum += 1
- if not line:
- break
- line = line.rstrip("\r\n")
-
- # Handle sentence start boundaries
- if sentence_start is None:
- # Skip comments
- if line.startswith("#"):
- continue
- # Start a new sentence
- ud.sentences.append(UDSpan(index, 0, ud.characters))
- sentence_start = len(ud.words)
- if not line:
- # Add parent UDWord links and check there are no cycles
- def process_word(word):
- if word.parent == "remapping":
- raise UDError("There is a cycle in a sentence")
- if word.parent is None:
- head = int(word.columns[HEAD])
- if head > len(ud.words) - sentence_start:
- raise UDError("Line {}: HEAD '{}' points outside of the sentence".format(
- linenum, word.columns[HEAD]))
- if head:
- parent = ud.words[sentence_start + head - 1]
- word.parent = "remapping"
- process_word(parent)
- word.parent = parent
-
- for word in ud.words[sentence_start:]:
- process_word(word)
-
- # Check there is a single root node
- if check_parse:
- if len([word for word in ud.words[sentence_start:] if word.parent is None]) != 1:
- raise UDError("There are multiple roots in a sentence")
-
- # End the sentence
- ud.sentences[-1].end = index
- sentence_start = None
- continue
-
- # Read next token/word
- columns = line.split("\t")
- if len(columns) != 10:
- raise UDError("The CoNLL-U line {} does not contain 10 tab-separated columns: '{}'".format(linenum, line))
-
- # Skip empty nodes
- if "." in columns[ID]:
- continue
-
- # Delete spaces from FORM so gold.characters == system.characters
- # even if one of them tokenizes the space.
- columns[FORM] = columns[FORM].replace(" ", "")
- if not columns[FORM]:
- raise UDError("There is an empty FORM in the CoNLL-U file -- line %d" % linenum)
-
- # Save token
- ud.characters.extend(columns[FORM])
- ud.tokens.append(UDSpan(index, index + len(columns[FORM]), ud.characters))
- index += len(columns[FORM])
-
- # Handle multi-word tokens to save word(s)
- if "-" in columns[ID]:
- try:
- start, end = map(int, columns[ID].split("-"))
- except:
- raise UDError("Cannot parse multi-word token ID '{}'".format(columns[ID]))
-
- for _ in range(start, end + 1):
- word_line = file.readline().rstrip("\r\n")
- word_columns = word_line.split("\t")
- if len(word_columns) != 10:
- print(columns)
- raise UDError("The CoNLL-U line {} does not contain 10 tab-separated columns: '{}'".format(linenum, word_line))
- ud.words.append(UDWord(ud.tokens[-1], word_columns, is_multiword=True))
- # Basic tokens/words
- else:
- try:
- word_id = int(columns[ID])
- except:
- raise UDError("Cannot parse word ID '{}'".format(columns[ID]))
- if word_id != len(ud.words) - sentence_start + 1:
- raise UDError("Incorrect word ID '{}' for word '{}', expected '{}'".format(columns[ID], columns[FORM], len(ud.words) - sentence_start + 1))
-
- try:
- head_id = int(columns[HEAD])
- except:
- raise UDError("Cannot parse HEAD '{}'".format(columns[HEAD]))
- if head_id < 0:
- raise UDError("HEAD cannot be negative")
-
- ud.words.append(UDWord(ud.tokens[-1], columns, is_multiword=False))
-
- if sentence_start is not None:
- raise UDError("The CoNLL-U file does not end with empty line")
-
- return ud
-
-# Evaluate the gold and system treebanks (loaded using load_conllu).
-def evaluate(gold_ud, system_ud, deprel_weights=None, check_parse=True):
- class Score:
- def __init__(self, gold_total, system_total, correct, aligned_total=None, undersegmented=None, oversegmented=None):
- self.precision = correct / system_total if system_total else 0.0
- self.recall = correct / gold_total if gold_total else 0.0
- self.f1 = 2 * correct / (system_total + gold_total) if system_total + gold_total else 0.0
- self.aligned_accuracy = correct / aligned_total if aligned_total else aligned_total
- self.undersegmented = undersegmented
- self.oversegmented = oversegmented
- self.under_perc = len(undersegmented) / gold_total if gold_total and undersegmented else 0.0
- self.over_perc = len(oversegmented) / gold_total if gold_total and oversegmented else 0.0
- class AlignmentWord:
- def __init__(self, gold_word, system_word):
- self.gold_word = gold_word
- self.system_word = system_word
- self.gold_parent = None
- self.system_parent_gold_aligned = None
- class Alignment:
- def __init__(self, gold_words, system_words):
- self.gold_words = gold_words
- self.system_words = system_words
- self.matched_words = []
- self.matched_words_map = {}
- def append_aligned_words(self, gold_word, system_word):
- self.matched_words.append(AlignmentWord(gold_word, system_word))
- self.matched_words_map[system_word] = gold_word
- def fill_parents(self):
- # We represent root parents in both gold and system data by '0'.
- # For gold data, we represent non-root parent by corresponding gold word.
- # For system data, we represent non-root parent by either gold word aligned
- # to parent system nodes, or by None if no gold words is aligned to the parent.
- for words in self.matched_words:
- words.gold_parent = words.gold_word.parent if words.gold_word.parent is not None else 0
- words.system_parent_gold_aligned = self.matched_words_map.get(words.system_word.parent, None) \
- if words.system_word.parent is not None else 0
-
- def lower(text):
- if sys.version_info < (3, 0) and isinstance(text, str):
- return text.decode("utf-8").lower()
- return text.lower()
-
- def spans_score(gold_spans, system_spans):
- correct, gi, si = 0, 0, 0
- undersegmented = []
- oversegmented = []
- combo = 0
- previous_end_si_earlier = False
- previous_end_gi_earlier = False
- while gi < len(gold_spans) and si < len(system_spans):
- previous_si = system_spans[si-1] if si > 0 else None
- previous_gi = gold_spans[gi-1] if gi > 0 else None
- if system_spans[si].start < gold_spans[gi].start:
- # avoid counting the same mistake twice
- if not previous_end_si_earlier:
- combo += 1
- oversegmented.append(str(previous_gi).strip())
- si += 1
- elif gold_spans[gi].start < system_spans[si].start:
- # avoid counting the same mistake twice
- if not previous_end_gi_earlier:
- combo += 1
- undersegmented.append(str(previous_si).strip())
- gi += 1
- else:
- correct += gold_spans[gi].end == system_spans[si].end
- if gold_spans[gi].end < system_spans[si].end:
- undersegmented.append(str(system_spans[si]).strip())
- previous_end_gi_earlier = True
- previous_end_si_earlier = False
- elif gold_spans[gi].end > system_spans[si].end:
- oversegmented.append(str(gold_spans[gi]).strip())
- previous_end_si_earlier = True
- previous_end_gi_earlier = False
- else:
- previous_end_gi_earlier = False
- previous_end_si_earlier = False
- si += 1
- gi += 1
-
- return Score(len(gold_spans), len(system_spans), correct, None, undersegmented, oversegmented)
-
- def alignment_score(alignment, key_fn, weight_fn=lambda w: 1):
- gold, system, aligned, correct = 0, 0, 0, 0
-
- for word in alignment.gold_words:
- gold += weight_fn(word)
-
- for word in alignment.system_words:
- system += weight_fn(word)
-
- for words in alignment.matched_words:
- aligned += weight_fn(words.gold_word)
-
- if key_fn is None:
- # Return score for whole aligned words
- return Score(gold, system, aligned)
-
- for words in alignment.matched_words:
- if key_fn(words.gold_word, words.gold_parent) == key_fn(words.system_word, words.system_parent_gold_aligned):
- correct += weight_fn(words.gold_word)
-
- return Score(gold, system, correct, aligned)
-
- def beyond_end(words, i, multiword_span_end):
- if i >= len(words):
- return True
- if words[i].is_multiword:
- return words[i].span.start >= multiword_span_end
- return words[i].span.end > multiword_span_end
-
- def extend_end(word, multiword_span_end):
- if word.is_multiword and word.span.end > multiword_span_end:
- return word.span.end
- return multiword_span_end
-
- def find_multiword_span(gold_words, system_words, gi, si):
- # We know gold_words[gi].is_multiword or system_words[si].is_multiword.
- # Find the start of the multiword span (gs, ss), so the multiword span is minimal.
- # Initialize multiword_span_end characters index.
- if gold_words[gi].is_multiword:
- multiword_span_end = gold_words[gi].span.end
- if not system_words[si].is_multiword and system_words[si].span.start < gold_words[gi].span.start:
- si += 1
- else: # if system_words[si].is_multiword
- multiword_span_end = system_words[si].span.end
- if not gold_words[gi].is_multiword and gold_words[gi].span.start < system_words[si].span.start:
- gi += 1
- gs, ss = gi, si
-
- # Find the end of the multiword span
- # (so both gi and si are pointing to the word following the multiword span end).
- while not beyond_end(gold_words, gi, multiword_span_end) or \
- not beyond_end(system_words, si, multiword_span_end):
- if gi < len(gold_words) and (si >= len(system_words) or
- gold_words[gi].span.start <= system_words[si].span.start):
- multiword_span_end = extend_end(gold_words[gi], multiword_span_end)
- gi += 1
- else:
- multiword_span_end = extend_end(system_words[si], multiword_span_end)
- si += 1
- return gs, ss, gi, si
-
- def compute_lcs(gold_words, system_words, gi, si, gs, ss):
- lcs = [[0] * (si - ss) for i in range(gi - gs)]
- for g in reversed(range(gi - gs)):
- for s in reversed(range(si - ss)):
- if lower(gold_words[gs + g].columns[FORM]) == lower(system_words[ss + s].columns[FORM]):
- lcs[g][s] = 1 + (lcs[g+1][s+1] if g+1 < gi-gs and s+1 < si-ss else 0)
- lcs[g][s] = max(lcs[g][s], lcs[g+1][s] if g+1 < gi-gs else 0)
- lcs[g][s] = max(lcs[g][s], lcs[g][s+1] if s+1 < si-ss else 0)
- return lcs
-
- def align_words(gold_words, system_words):
- alignment = Alignment(gold_words, system_words)
-
- gi, si = 0, 0
- while gi < len(gold_words) and si < len(system_words):
- if gold_words[gi].is_multiword or system_words[si].is_multiword:
- # A: Multi-word tokens => align via LCS within the whole "multiword span".
- gs, ss, gi, si = find_multiword_span(gold_words, system_words, gi, si)
-
- if si > ss and gi > gs:
- lcs = compute_lcs(gold_words, system_words, gi, si, gs, ss)
-
- # Store aligned words
- s, g = 0, 0
- while g < gi - gs and s < si - ss:
- if lower(gold_words[gs + g].columns[FORM]) == lower(system_words[ss + s].columns[FORM]):
- alignment.append_aligned_words(gold_words[gs+g], system_words[ss+s])
- g += 1
- s += 1
- elif lcs[g][s] == (lcs[g+1][s] if g+1 < gi-gs else 0):
- g += 1
- else:
- s += 1
- else:
- # B: No multi-word token => align according to spans.
- if (gold_words[gi].span.start, gold_words[gi].span.end) == (system_words[si].span.start, system_words[si].span.end):
- alignment.append_aligned_words(gold_words[gi], system_words[si])
- gi += 1
- si += 1
- elif gold_words[gi].span.start <= system_words[si].span.start:
- gi += 1
- else:
- si += 1
-
- alignment.fill_parents()
-
- return alignment
-
- # Check that underlying character sequences do match
- if gold_ud.characters != system_ud.characters:
- index = 0
- while gold_ud.characters[index] == system_ud.characters[index]:
- index += 1
-
- raise UDError(
- "The concatenation of tokens in gold file and in system file differ!\n" +
- "First 20 differing characters in gold file: '{}' and system file: '{}'".format(
- "".join(gold_ud.characters[index:index + 20]),
- "".join(system_ud.characters[index:index + 20])
- )
- )
-
- # Align words
- alignment = align_words(gold_ud.words, system_ud.words)
-
- # Compute the F1-scores
- if check_parse:
- result = {
- "Tokens": spans_score(gold_ud.tokens, system_ud.tokens),
- "Sentences": spans_score(gold_ud.sentences, system_ud.sentences),
- "Words": alignment_score(alignment, None),
- "UPOS": alignment_score(alignment, lambda w, parent: w.columns[UPOS]),
- "XPOS": alignment_score(alignment, lambda w, parent: w.columns[XPOS]),
- "Feats": alignment_score(alignment, lambda w, parent: w.columns[FEATS]),
- "AllTags": alignment_score(alignment, lambda w, parent: (w.columns[UPOS], w.columns[XPOS], w.columns[FEATS])),
- "Lemmas": alignment_score(alignment, lambda w, parent: w.columns[LEMMA]),
- "UAS": alignment_score(alignment, lambda w, parent: parent),
- "LAS": alignment_score(alignment, lambda w, parent: (parent, w.columns[DEPREL])),
- }
- else:
- result = {
- "Tokens": spans_score(gold_ud.tokens, system_ud.tokens),
- "Sentences": spans_score(gold_ud.sentences, system_ud.sentences),
- "Words": alignment_score(alignment, None),
- "Feats": alignment_score(alignment, lambda w, parent: w.columns[FEATS]),
- "Lemmas": alignment_score(alignment, lambda w, parent: w.columns[LEMMA]),
- }
-
-
- # Add WeightedLAS if weights are given
- if deprel_weights is not None:
- def weighted_las(word):
- return deprel_weights.get(word.columns[DEPREL], 1.0)
- result["WeightedLAS"] = alignment_score(alignment, lambda w, parent: (parent, w.columns[DEPREL]), weighted_las)
-
- return result
-
-def load_deprel_weights(weights_file):
- if weights_file is None:
- return None
-
- deprel_weights = {}
- for line in weights_file:
- # Ignore comments and empty lines
- if line.startswith("#") or not line.strip():
- continue
-
- columns = line.rstrip("\r\n").split()
- if len(columns) != 2:
- raise ValueError("Expected two columns in the UD Relations weights file on line '{}'".format(line))
-
- deprel_weights[columns[0]] = float(columns[1])
-
- return deprel_weights
-
-def load_conllu_file(path):
- _file = open(path, mode="r", **({"encoding": "utf-8"} if sys.version_info >= (3, 0) else {}))
- return load_conllu(_file)
-
-def evaluate_wrapper(args):
- # Load CoNLL-U files
- gold_ud = load_conllu_file(args.gold_file)
- system_ud = load_conllu_file(args.system_file)
-
- # Load weights if requested
- deprel_weights = load_deprel_weights(args.weights)
-
- return evaluate(gold_ud, system_ud, deprel_weights)
-
-def main():
- # Parse arguments
- parser = argparse.ArgumentParser()
- parser.add_argument("gold_file", type=str,
- help="Name of the CoNLL-U file with the gold data.")
- parser.add_argument("system_file", type=str,
- help="Name of the CoNLL-U file with the predicted data.")
- parser.add_argument("--weights", "-w", type=argparse.FileType("r"), default=None,
- metavar="deprel_weights_file",
- help="Compute WeightedLAS using given weights for Universal Dependency Relations.")
- parser.add_argument("--verbose", "-v", default=0, action="count",
- help="Print all metrics.")
- args = parser.parse_args()
-
- # Use verbose if weights are supplied
- if args.weights is not None and not args.verbose:
- args.verbose = 1
-
- # Evaluate
- evaluation = evaluate_wrapper(args)
-
- # Print the evaluation
- if not args.verbose:
- print("LAS F1 Score: {:.2f}".format(100 * evaluation["LAS"].f1))
- else:
- metrics = ["Tokens", "Sentences", "Words", "UPOS", "XPOS", "Feats", "AllTags", "Lemmas", "UAS", "LAS"]
- if args.weights is not None:
- metrics.append("WeightedLAS")
-
- print("Metrics | Precision | Recall | F1 Score | AligndAcc")
- print("-----------+-----------+-----------+-----------+-----------")
- for metric in metrics:
- print("{:11}|{:10.2f} |{:10.2f} |{:10.2f} |{}".format(
- metric,
- 100 * evaluation[metric].precision,
- 100 * evaluation[metric].recall,
- 100 * evaluation[metric].f1,
- "{:10.2f}".format(100 * evaluation[metric].aligned_accuracy) if evaluation[metric].aligned_accuracy is not None else ""
- ))
-
-if __name__ == "__main__":
- main()
-
-# Tests, which can be executed with `python -m unittest conll17_ud_eval`.
-class TestAlignment(unittest.TestCase):
- @staticmethod
- def _load_words(words):
- """Prepare fake CoNLL-U files with fake HEAD to prevent multiple roots errors."""
- lines, num_words = [], 0
- for w in words:
- parts = w.split(" ")
- if len(parts) == 1:
- num_words += 1
- lines.append("{}\t{}\t_\t_\t_\t_\t{}\t_\t_\t_".format(num_words, parts[0], int(num_words>1)))
- else:
- lines.append("{}-{}\t{}\t_\t_\t_\t_\t_\t_\t_\t_".format(num_words + 1, num_words + len(parts) - 1, parts[0]))
- for part in parts[1:]:
- num_words += 1
- lines.append("{}\t{}\t_\t_\t_\t_\t{}\t_\t_\t_".format(num_words, part, int(num_words>1)))
- return load_conllu((io.StringIO if sys.version_info >= (3, 0) else io.BytesIO)("\n".join(lines+["\n"])))
-
- def _test_exception(self, gold, system):
- self.assertRaises(UDError, evaluate, self._load_words(gold), self._load_words(system))
-
- def _test_ok(self, gold, system, correct):
- metrics = evaluate(self._load_words(gold), self._load_words(system))
- gold_words = sum((max(1, len(word.split(" ")) - 1) for word in gold))
- system_words = sum((max(1, len(word.split(" ")) - 1) for word in system))
- self.assertEqual((metrics["Words"].precision, metrics["Words"].recall, metrics["Words"].f1),
- (correct / system_words, correct / gold_words, 2 * correct / (gold_words + system_words)))
-
- def test_exception(self):
- self._test_exception(["a"], ["b"])
-
- def test_equal(self):
- self._test_ok(["a"], ["a"], 1)
- self._test_ok(["a", "b", "c"], ["a", "b", "c"], 3)
-
- def test_equal_with_multiword(self):
- self._test_ok(["abc a b c"], ["a", "b", "c"], 3)
- self._test_ok(["a", "bc b c", "d"], ["a", "b", "c", "d"], 4)
- self._test_ok(["abcd a b c d"], ["ab a b", "cd c d"], 4)
- self._test_ok(["abc a b c", "de d e"], ["a", "bcd b c d", "e"], 5)
-
- def test_alignment(self):
- self._test_ok(["abcd"], ["a", "b", "c", "d"], 0)
- self._test_ok(["abc", "d"], ["a", "b", "c", "d"], 1)
- self._test_ok(["a", "bc", "d"], ["a", "b", "c", "d"], 2)
- self._test_ok(["a", "bc b c", "d"], ["a", "b", "cd"], 2)
- self._test_ok(["abc a BX c", "def d EX f"], ["ab a b", "cd c d", "ef e f"], 4)
- self._test_ok(["ab a b", "cd bc d"], ["a", "bc", "d"], 2)
- self._test_ok(["a", "bc b c", "d"], ["ab AX BX", "cd CX a"], 1)
diff --git a/bin/ud/run_eval.py b/bin/ud/run_eval.py
deleted file mode 100644
index 2da476721..000000000
--- a/bin/ud/run_eval.py
+++ /dev/null
@@ -1,293 +0,0 @@
-import spacy
-import time
-import re
-import plac
-import operator
-import datetime
-from pathlib import Path
-import xml.etree.ElementTree as ET
-
-import conll17_ud_eval
-from ud_train import write_conllu
-from spacy.lang.lex_attrs import word_shape
-from spacy.util import get_lang_class
-
-# All languages in spaCy - in UD format (note that Norwegian is 'no' instead of 'nb')
-ALL_LANGUAGES = ("af, ar, bg, bn, ca, cs, da, de, el, en, es, et, fa, fi, fr,"
- "ga, he, hi, hr, hu, id, is, it, ja, kn, ko, lt, lv, mr, no,"
- "nl, pl, pt, ro, ru, si, sk, sl, sq, sr, sv, ta, te, th, tl,"
- "tr, tt, uk, ur, vi, zh")
-
-# Non-parsing tasks that will be evaluated (works for default models)
-EVAL_NO_PARSE = ['Tokens', 'Words', 'Lemmas', 'Sentences', 'Feats']
-
-# Tasks that will be evaluated if check_parse=True (does not work for default models)
-EVAL_PARSE = ['Tokens', 'Words', 'Lemmas', 'Sentences', 'Feats', 'UPOS', 'XPOS', 'AllTags', 'UAS', 'LAS']
-
-# Minimum frequency an error should have to be printed
-PRINT_FREQ = 20
-
-# Maximum number of errors printed per category
-PRINT_TOTAL = 10
-
-space_re = re.compile("\s+")
-
-
-def load_model(modelname, add_sentencizer=False):
- """ Load a specific spaCy model """
- loading_start = time.time()
- nlp = spacy.load(modelname)
- if add_sentencizer:
- nlp.add_pipe(nlp.create_pipe('sentencizer'))
- loading_end = time.time()
- loading_time = loading_end - loading_start
- if add_sentencizer:
- return nlp, loading_time, modelname + '_sentencizer'
- return nlp, loading_time, modelname
-
-
-def load_default_model_sentencizer(lang):
- """ Load a generic spaCy model and add the sentencizer for sentence tokenization"""
- loading_start = time.time()
- lang_class = get_lang_class(lang)
- nlp = lang_class()
- nlp.add_pipe(nlp.create_pipe('sentencizer'))
- loading_end = time.time()
- loading_time = loading_end - loading_start
- return nlp, loading_time, lang + "_default_" + 'sentencizer'
-
-
-def split_text(text):
- return [space_re.sub(" ", par.strip()) for par in text.split("\n\n")]
-
-
-def get_freq_tuples(my_list, print_total_threshold):
- """ Turn a list of errors into frequency-sorted tuples thresholded by a certain total number """
- d = {}
- for token in my_list:
- d.setdefault(token, 0)
- d[token] += 1
- return sorted(d.items(), key=operator.itemgetter(1), reverse=True)[:print_total_threshold]
-
-
-def _contains_blinded_text(stats_xml):
- """ Heuristic to determine whether the treebank has blinded texts or not """
- tree = ET.parse(stats_xml)
- root = tree.getroot()
- total_tokens = int(root.find('size/total/tokens').text)
- unique_forms = int(root.find('forms').get('unique'))
-
- # assume the corpus is largely blinded when there are less than 1% unique tokens
- return (unique_forms / total_tokens) < 0.01
-
-
-def fetch_all_treebanks(ud_dir, languages, corpus, best_per_language):
- """" Fetch the txt files for all treebanks for a given set of languages """
- all_treebanks = dict()
- treebank_size = dict()
- for l in languages:
- all_treebanks[l] = []
- treebank_size[l] = 0
-
- for treebank_dir in ud_dir.iterdir():
- if treebank_dir.is_dir():
- for txt_path in treebank_dir.iterdir():
- if txt_path.name.endswith('-ud-' + corpus + '.txt'):
- file_lang = txt_path.name.split('_')[0]
- if file_lang in languages:
- gold_path = treebank_dir / txt_path.name.replace('.txt', '.conllu')
- stats_xml = treebank_dir / "stats.xml"
- # ignore treebanks where the texts are not publicly available
- if not _contains_blinded_text(stats_xml):
- if not best_per_language:
- all_treebanks[file_lang].append(txt_path)
- # check the tokens in the gold annotation to keep only the biggest treebank per language
- else:
- with gold_path.open(mode='r', encoding='utf-8') as gold_file:
- gold_ud = conll17_ud_eval.load_conllu(gold_file)
- gold_tokens = len(gold_ud.tokens)
- if treebank_size[file_lang] < gold_tokens:
- all_treebanks[file_lang] = [txt_path]
- treebank_size[file_lang] = gold_tokens
-
- return all_treebanks
-
-
-def run_single_eval(nlp, loading_time, print_name, text_path, gold_ud, tmp_output_path, out_file, print_header,
- check_parse, print_freq_tasks):
- """" Run an evaluation of a model nlp on a certain specified treebank """
- with text_path.open(mode='r', encoding='utf-8') as f:
- flat_text = f.read()
-
- # STEP 1: tokenize text
- tokenization_start = time.time()
- texts = split_text(flat_text)
- docs = list(nlp.pipe(texts))
- tokenization_end = time.time()
- tokenization_time = tokenization_end - tokenization_start
-
- # STEP 2: record stats and timings
- tokens_per_s = int(len(gold_ud.tokens) / tokenization_time)
-
- print_header_1 = ['date', 'text_path', 'gold_tokens', 'model', 'loading_time', 'tokenization_time', 'tokens_per_s']
- print_string_1 = [str(datetime.date.today()), text_path.name, len(gold_ud.tokens),
- print_name, "%.2f" % loading_time, "%.2f" % tokenization_time, tokens_per_s]
-
- # STEP 3: evaluate predicted tokens and features
- with tmp_output_path.open(mode="w", encoding="utf8") as tmp_out_file:
- write_conllu(docs, tmp_out_file)
- with tmp_output_path.open(mode="r", encoding="utf8") as sys_file:
- sys_ud = conll17_ud_eval.load_conllu(sys_file, check_parse=check_parse)
- tmp_output_path.unlink()
- scores = conll17_ud_eval.evaluate(gold_ud, sys_ud, check_parse=check_parse)
-
- # STEP 4: format the scoring results
- eval_headers = EVAL_PARSE
- if not check_parse:
- eval_headers = EVAL_NO_PARSE
-
- for score_name in eval_headers:
- score = scores[score_name]
- print_string_1.extend(["%.2f" % score.precision,
- "%.2f" % score.recall,
- "%.2f" % score.f1])
- print_string_1.append("-" if score.aligned_accuracy is None else "%.2f" % score.aligned_accuracy)
- print_string_1.append("-" if score.undersegmented is None else "%.4f" % score.under_perc)
- print_string_1.append("-" if score.oversegmented is None else "%.4f" % score.over_perc)
-
- print_header_1.extend([score_name + '_p', score_name + '_r', score_name + '_F', score_name + '_acc',
- score_name + '_under', score_name + '_over'])
-
- if score_name in print_freq_tasks:
- print_header_1.extend([score_name + '_word_under_ex', score_name + '_shape_under_ex',
- score_name + '_word_over_ex', score_name + '_shape_over_ex'])
-
- d_under_words = get_freq_tuples(score.undersegmented, PRINT_TOTAL)
- d_under_shapes = get_freq_tuples([word_shape(x) for x in score.undersegmented], PRINT_TOTAL)
- d_over_words = get_freq_tuples(score.oversegmented, PRINT_TOTAL)
- d_over_shapes = get_freq_tuples([word_shape(x) for x in score.oversegmented], PRINT_TOTAL)
-
- # saving to CSV with ; seperator so blinding ; in the example output
- print_string_1.append(
- str({k: v for k, v in d_under_words if v > PRINT_FREQ}).replace(";", "*SEMICOLON*"))
- print_string_1.append(
- str({k: v for k, v in d_under_shapes if v > PRINT_FREQ}).replace(";", "*SEMICOLON*"))
- print_string_1.append(
- str({k: v for k, v in d_over_words if v > PRINT_FREQ}).replace(";", "*SEMICOLON*"))
- print_string_1.append(
- str({k: v for k, v in d_over_shapes if v > PRINT_FREQ}).replace(";", "*SEMICOLON*"))
-
- # STEP 5: print the formatted results to CSV
- if print_header:
- out_file.write(';'.join(map(str, print_header_1)) + '\n')
- out_file.write(';'.join(map(str, print_string_1)) + '\n')
-
-
-def run_all_evals(models, treebanks, out_file, check_parse, print_freq_tasks):
- """" Run an evaluation for each language with its specified models and treebanks """
- print_header = True
-
- for tb_lang, treebank_list in treebanks.items():
- print()
- print("Language", tb_lang)
- for text_path in treebank_list:
- print(" Evaluating on", text_path)
-
- gold_path = text_path.parent / (text_path.stem + '.conllu')
- print(" Gold data from ", gold_path)
-
- # nested try blocks to ensure the code can continue with the next iteration after a failure
- try:
- with gold_path.open(mode='r', encoding='utf-8') as gold_file:
- gold_ud = conll17_ud_eval.load_conllu(gold_file)
-
- for nlp, nlp_loading_time, nlp_name in models[tb_lang]:
- try:
- print(" Benchmarking", nlp_name)
- tmp_output_path = text_path.parent / str('tmp_' + nlp_name + '.conllu')
- run_single_eval(nlp, nlp_loading_time, nlp_name, text_path, gold_ud, tmp_output_path, out_file,
- print_header, check_parse, print_freq_tasks)
- print_header = False
- except Exception as e:
- print(" Ran into trouble: ", str(e))
- except Exception as e:
- print(" Ran into trouble: ", str(e))
-
-
-@plac.annotations(
- out_path=("Path to output CSV file", "positional", None, Path),
- ud_dir=("Path to Universal Dependencies corpus", "positional", None, Path),
- check_parse=("Set flag to evaluate parsing performance", "flag", "p", bool),
- langs=("Enumeration of languages to evaluate (default: all)", "option", "l", str),
- exclude_trained_models=("Set flag to exclude trained models", "flag", "t", bool),
- exclude_multi=("Set flag to exclude the multi-language model as default baseline", "flag", "m", bool),
- hide_freq=("Set flag to avoid printing out more detailed high-freq tokenization errors", "flag", "f", bool),
- corpus=("Whether to run on train, dev or test", "option", "c", str),
- best_per_language=("Set flag to only keep the largest treebank for each language", "flag", "b", bool)
-)
-def main(out_path, ud_dir, check_parse=False, langs=ALL_LANGUAGES, exclude_trained_models=False, exclude_multi=False,
- hide_freq=False, corpus='train', best_per_language=False):
- """"
- Assemble all treebanks and models to run evaluations with.
- When setting check_parse to True, the default models will not be evaluated as they don't have parsing functionality
- """
- languages = [lang.strip() for lang in langs.split(",")]
-
- print_freq_tasks = []
- if not hide_freq:
- print_freq_tasks = ['Tokens']
-
- # fetching all relevant treebank from the directory
- treebanks = fetch_all_treebanks(ud_dir, languages, corpus, best_per_language)
-
- print()
- print("Loading all relevant models for", languages)
- models = dict()
-
- # multi-lang model
- multi = None
- if not exclude_multi and not check_parse:
- multi = load_model('xx_ent_wiki_sm', add_sentencizer=True)
-
- # initialize all models with the multi-lang model
- for lang in languages:
- models[lang] = [multi] if multi else []
- # add default models if we don't want to evaluate parsing info
- if not check_parse:
- # Norwegian is 'nb' in spaCy but 'no' in the UD corpora
- if lang == 'no':
- models['no'].append(load_default_model_sentencizer('nb'))
- else:
- models[lang].append(load_default_model_sentencizer(lang))
-
- # language-specific trained models
- if not exclude_trained_models:
- if 'de' in models:
- models['de'].append(load_model('de_core_news_sm'))
- models['de'].append(load_model('de_core_news_md'))
- if 'el' in models:
- models['el'].append(load_model('el_core_news_sm'))
- models['el'].append(load_model('el_core_news_md'))
- if 'en' in models:
- models['en'].append(load_model('en_core_web_sm'))
- models['en'].append(load_model('en_core_web_md'))
- models['en'].append(load_model('en_core_web_lg'))
- if 'es' in models:
- models['es'].append(load_model('es_core_news_sm'))
- models['es'].append(load_model('es_core_news_md'))
- if 'fr' in models:
- models['fr'].append(load_model('fr_core_news_sm'))
- models['fr'].append(load_model('fr_core_news_md'))
- if 'it' in models:
- models['it'].append(load_model('it_core_news_sm'))
- if 'nl' in models:
- models['nl'].append(load_model('nl_core_news_sm'))
- if 'pt' in models:
- models['pt'].append(load_model('pt_core_news_sm'))
-
- with out_path.open(mode='w', encoding='utf-8') as out_file:
- run_all_evals(models, treebanks, out_file, check_parse, print_freq_tasks)
-
-
-if __name__ == "__main__":
- plac.call(main)
diff --git a/bin/ud/ud_run_test.py b/bin/ud/ud_run_test.py
deleted file mode 100644
index 7cb270d84..000000000
--- a/bin/ud/ud_run_test.py
+++ /dev/null
@@ -1,335 +0,0 @@
-# flake8: noqa
-"""Train for CONLL 2017 UD treebank evaluation. Takes .conllu files, writes
-.conllu format for development data, allowing the official scorer to be used.
-"""
-from __future__ import unicode_literals
-
-import plac
-from pathlib import Path
-import re
-import sys
-import srsly
-
-import spacy
-import spacy.util
-from spacy.tokens import Token, Doc
-from spacy.gold import GoldParse
-from spacy.util import compounding, minibatch_by_words
-from spacy.syntax.nonproj import projectivize
-from spacy.matcher import Matcher
-
-# from spacy.morphology import Fused_begin, Fused_inside
-from spacy import displacy
-from collections import defaultdict, Counter
-from timeit import default_timer as timer
-
-Fused_begin = None
-Fused_inside = None
-
-import itertools
-import random
-import numpy.random
-
-from . import conll17_ud_eval
-
-from spacy import lang
-from spacy.lang import zh
-from spacy.lang import ja
-from spacy.lang import ru
-
-
-################
-# Data reading #
-################
-
-space_re = re.compile(r"\s+")
-
-
-def split_text(text):
- return [space_re.sub(" ", par.strip()) for par in text.split("\n\n")]
-
-
-##############
-# Evaluation #
-##############
-
-
-def read_conllu(file_):
- docs = []
- sent = []
- doc = []
- for line in file_:
- if line.startswith("# newdoc"):
- if doc:
- docs.append(doc)
- doc = []
- elif line.startswith("#"):
- continue
- elif not line.strip():
- if sent:
- doc.append(sent)
- sent = []
- else:
- sent.append(list(line.strip().split("\t")))
- if len(sent[-1]) != 10:
- print(repr(line))
- raise ValueError
- if sent:
- doc.append(sent)
- if doc:
- docs.append(doc)
- return docs
-
-
-def evaluate(nlp, text_loc, gold_loc, sys_loc, limit=None):
- if text_loc.parts[-1].endswith(".conllu"):
- docs = []
- with text_loc.open(encoding="utf8") as file_:
- for conllu_doc in read_conllu(file_):
- for conllu_sent in conllu_doc:
- words = [line[1] for line in conllu_sent]
- docs.append(Doc(nlp.vocab, words=words))
- for name, component in nlp.pipeline:
- docs = list(component.pipe(docs))
- else:
- with text_loc.open("r", encoding="utf8") as text_file:
- texts = split_text(text_file.read())
- docs = list(nlp.pipe(texts))
- with sys_loc.open("w", encoding="utf8") as out_file:
- write_conllu(docs, out_file)
- with gold_loc.open("r", encoding="utf8") as gold_file:
- gold_ud = conll17_ud_eval.load_conllu(gold_file)
- with sys_loc.open("r", encoding="utf8") as sys_file:
- sys_ud = conll17_ud_eval.load_conllu(sys_file)
- scores = conll17_ud_eval.evaluate(gold_ud, sys_ud)
- return docs, scores
-
-
-def write_conllu(docs, file_):
- merger = Matcher(docs[0].vocab)
- merger.add("SUBTOK", None, [{"DEP": "subtok", "op": "+"}])
- for i, doc in enumerate(docs):
- matches = []
- if doc.is_parsed:
- matches = merger(doc)
- spans = [doc[start : end + 1] for _, start, end in matches]
- with doc.retokenize() as retokenizer:
- for span in spans:
- retokenizer.merge(span)
- file_.write("# newdoc id = {i}\n".format(i=i))
- for j, sent in enumerate(doc.sents):
- file_.write("# sent_id = {i}.{j}\n".format(i=i, j=j))
- file_.write("# text = {text}\n".format(text=sent.text))
- for k, token in enumerate(sent):
- file_.write(_get_token_conllu(token, k, len(sent)) + "\n")
- file_.write("\n")
- for word in sent:
- if word.head.i == word.i and word.dep_ == "ROOT":
- break
- else:
- print("Rootless sentence!")
- print(sent)
- print(i)
- for w in sent:
- print(w.i, w.text, w.head.text, w.head.i, w.dep_)
- raise ValueError
-
-
-def _get_token_conllu(token, k, sent_len):
- if token.check_morph(Fused_begin) and (k + 1 < sent_len):
- n = 1
- text = [token.text]
- while token.nbor(n).check_morph(Fused_inside):
- text.append(token.nbor(n).text)
- n += 1
- id_ = "%d-%d" % (k + 1, (k + n))
- fields = [id_, "".join(text)] + ["_"] * 8
- lines = ["\t".join(fields)]
- else:
- lines = []
- if token.head.i == token.i:
- head = 0
- else:
- head = k + (token.head.i - token.i) + 1
- fields = [
- str(k + 1),
- token.text,
- token.lemma_,
- token.pos_,
- token.tag_,
- "_",
- str(head),
- token.dep_.lower(),
- "_",
- "_",
- ]
- if token.check_morph(Fused_begin) and (k + 1 < sent_len):
- if k == 0:
- fields[1] = token.norm_[0].upper() + token.norm_[1:]
- else:
- fields[1] = token.norm_
- elif token.check_morph(Fused_inside):
- fields[1] = token.norm_
- elif token._.split_start is not None:
- split_start = token._.split_start
- split_end = token._.split_end
- split_len = (split_end.i - split_start.i) + 1
- n_in_split = token.i - split_start.i
- subtokens = guess_fused_orths(split_start.text, [""] * split_len)
- fields[1] = subtokens[n_in_split]
-
- lines.append("\t".join(fields))
- return "\n".join(lines)
-
-
-def guess_fused_orths(word, ud_forms):
- """The UD data 'fused tokens' don't necessarily expand to keys that match
- the form. We need orths that exact match the string. Here we make a best
- effort to divide up the word."""
- if word == "".join(ud_forms):
- # Happy case: we get a perfect split, with each letter accounted for.
- return ud_forms
- elif len(word) == sum(len(subtoken) for subtoken in ud_forms):
- # Unideal, but at least lengths match.
- output = []
- remain = word
- for subtoken in ud_forms:
- assert len(subtoken) >= 1
- output.append(remain[: len(subtoken)])
- remain = remain[len(subtoken) :]
- assert len(remain) == 0, (word, ud_forms, remain)
- return output
- else:
- # Let's say word is 6 long, and there are three subtokens. The orths
- # *must* equal the original string. Arbitrarily, split [4, 1, 1]
- first = word[: len(word) - (len(ud_forms) - 1)]
- output = [first]
- remain = word[len(first) :]
- for i in range(1, len(ud_forms)):
- assert remain
- output.append(remain[:1])
- remain = remain[1:]
- assert len(remain) == 0, (word, output, remain)
- return output
-
-
-def print_results(name, ud_scores):
- fields = {}
- if ud_scores is not None:
- fields.update(
- {
- "words": ud_scores["Words"].f1 * 100,
- "sents": ud_scores["Sentences"].f1 * 100,
- "tags": ud_scores["XPOS"].f1 * 100,
- "uas": ud_scores["UAS"].f1 * 100,
- "las": ud_scores["LAS"].f1 * 100,
- }
- )
- else:
- fields.update({"words": 0.0, "sents": 0.0, "tags": 0.0, "uas": 0.0, "las": 0.0})
- tpl = "\t".join(
- (name, "{las:.1f}", "{uas:.1f}", "{tags:.1f}", "{sents:.1f}", "{words:.1f}")
- )
- print(tpl.format(**fields))
- return fields
-
-
-def get_token_split_start(token):
- if token.text == "":
- assert token.i != 0
- i = -1
- while token.nbor(i).text == "":
- i -= 1
- return token.nbor(i)
- elif (token.i + 1) < len(token.doc) and token.nbor(1).text == "":
- return token
- else:
- return None
-
-
-def get_token_split_end(token):
- if (token.i + 1) == len(token.doc):
- return token if token.text == "" else None
- elif token.text != "" and token.nbor(1).text != "":
- return None
- i = 1
- while (token.i + i) < len(token.doc) and token.nbor(i).text == "":
- i += 1
- return token.nbor(i - 1)
-
-
-##################
-# Initialization #
-##################
-
-
-def load_nlp(experiments_dir, corpus):
- nlp = spacy.load(experiments_dir / corpus / "best-model")
- return nlp
-
-
-def initialize_pipeline(nlp, docs, golds, config, device):
- nlp.add_pipe(nlp.create_pipe("parser"))
- return nlp
-
-
-@plac.annotations(
- test_data_dir=(
- "Path to Universal Dependencies test data",
- "positional",
- None,
- Path,
- ),
- experiment_dir=("Parent directory with output model", "positional", None, Path),
- corpus=(
- "UD corpus to evaluate, e.g. UD_English, UD_Spanish, etc",
- "positional",
- None,
- str,
- ),
-)
-def main(test_data_dir, experiment_dir, corpus):
- Token.set_extension("split_start", getter=get_token_split_start)
- Token.set_extension("split_end", getter=get_token_split_end)
- Token.set_extension("begins_fused", default=False)
- Token.set_extension("inside_fused", default=False)
- lang.zh.Chinese.Defaults.use_jieba = False
- lang.ja.Japanese.Defaults.use_janome = False
- lang.ru.Russian.Defaults.use_pymorphy2 = False
-
- nlp = load_nlp(experiment_dir, corpus)
-
- treebank_code = nlp.meta["treebank"]
- for section in ("test", "dev"):
- if section == "dev":
- section_dir = "conll17-ud-development-2017-03-19"
- else:
- section_dir = "conll17-ud-test-2017-05-09"
- text_path = test_data_dir / "input" / section_dir / (treebank_code + ".txt")
- udpipe_path = (
- test_data_dir / "input" / section_dir / (treebank_code + "-udpipe.conllu")
- )
- gold_path = test_data_dir / "gold" / section_dir / (treebank_code + ".conllu")
-
- header = [section, "LAS", "UAS", "TAG", "SENT", "WORD"]
- print("\t".join(header))
- inputs = {"gold": gold_path, "udp": udpipe_path, "raw": text_path}
- for input_type in ("udp", "raw"):
- input_path = inputs[input_type]
- output_path = (
- experiment_dir / corpus / "{section}.conllu".format(section=section)
- )
-
- parsed_docs, test_scores = evaluate(nlp, input_path, gold_path, output_path)
-
- accuracy = print_results(input_type, test_scores)
- acc_path = (
- experiment_dir
- / corpus
- / "{section}-accuracy.json".format(section=section)
- )
- srsly.write_json(acc_path, accuracy)
-
-
-if __name__ == "__main__":
- plac.call(main)
diff --git a/bin/ud/ud_train.py b/bin/ud/ud_train.py
deleted file mode 100644
index 6353bd6e7..000000000
--- a/bin/ud/ud_train.py
+++ /dev/null
@@ -1,570 +0,0 @@
-# flake8: noqa
-"""Train for CONLL 2017 UD treebank evaluation. Takes .conllu files, writes
-.conllu format for development data, allowing the official scorer to be used.
-"""
-from __future__ import unicode_literals
-
-import plac
-from pathlib import Path
-import re
-import json
-import tqdm
-
-import spacy
-import spacy.util
-from bin.ud import conll17_ud_eval
-from spacy.tokens import Token, Doc
-from spacy.gold import GoldParse
-from spacy.util import compounding, minibatch, minibatch_by_words
-from spacy.syntax.nonproj import projectivize
-from spacy.matcher import Matcher
-from spacy import displacy
-from collections import defaultdict
-
-import random
-
-from spacy import lang
-from spacy.lang import zh
-from spacy.lang import ja
-
-try:
- import torch
-except ImportError:
- torch = None
-
-
-################
-# Data reading #
-################
-
-space_re = re.compile("\s+")
-
-
-def split_text(text):
- return [space_re.sub(" ", par.strip()) for par in text.split("\n\n")]
-
-
-def read_data(
- nlp,
- conllu_file,
- text_file,
- raw_text=True,
- oracle_segments=False,
- max_doc_length=None,
- limit=None,
-):
- """Read the CONLLU format into (Doc, GoldParse) tuples. If raw_text=True,
- include Doc objects created using nlp.make_doc and then aligned against
- the gold-standard sequences. If oracle_segments=True, include Doc objects
- created from the gold-standard segments. At least one must be True."""
- if not raw_text and not oracle_segments:
- raise ValueError("At least one of raw_text or oracle_segments must be True")
- paragraphs = split_text(text_file.read())
- conllu = read_conllu(conllu_file)
- # sd is spacy doc; cd is conllu doc
- # cs is conllu sent, ct is conllu token
- docs = []
- golds = []
- for doc_id, (text, cd) in enumerate(zip(paragraphs, conllu)):
- sent_annots = []
- for cs in cd:
- sent = defaultdict(list)
- for id_, word, lemma, pos, tag, morph, head, dep, _, space_after in cs:
- if "." in id_:
- continue
- if "-" in id_:
- continue
- id_ = int(id_) - 1
- head = int(head) - 1 if head != "0" else id_
- sent["words"].append(word)
- sent["tags"].append(tag)
- sent["morphology"].append(_parse_morph_string(morph))
- sent["morphology"][-1].add("POS_%s" % pos)
- sent["heads"].append(head)
- sent["deps"].append("ROOT" if dep == "root" else dep)
- sent["spaces"].append(space_after == "_")
- sent["entities"] = ["-"] * len(sent["words"])
- sent["heads"], sent["deps"] = projectivize(sent["heads"], sent["deps"])
- if oracle_segments:
- docs.append(Doc(nlp.vocab, words=sent["words"], spaces=sent["spaces"]))
- golds.append(GoldParse(docs[-1], **sent))
- assert golds[-1].morphology is not None
-
- sent_annots.append(sent)
- if raw_text and max_doc_length and len(sent_annots) >= max_doc_length:
- doc, gold = _make_gold(nlp, None, sent_annots)
- assert gold.morphology is not None
- sent_annots = []
- docs.append(doc)
- golds.append(gold)
- if limit and len(docs) >= limit:
- return docs, golds
-
- if raw_text and sent_annots:
- doc, gold = _make_gold(nlp, None, sent_annots)
- docs.append(doc)
- golds.append(gold)
- if limit and len(docs) >= limit:
- return docs, golds
- return docs, golds
-
-def _parse_morph_string(morph_string):
- if morph_string == '_':
- return set()
- output = []
- replacements = {'1': 'one', '2': 'two', '3': 'three'}
- for feature in morph_string.split('|'):
- key, value = feature.split('=')
- value = replacements.get(value, value)
- value = value.split(',')[0]
- output.append('%s_%s' % (key, value.lower()))
- return set(output)
-
-def read_conllu(file_):
- docs = []
- sent = []
- doc = []
- for line in file_:
- if line.startswith("# newdoc"):
- if doc:
- docs.append(doc)
- doc = []
- elif line.startswith("#"):
- continue
- elif not line.strip():
- if sent:
- doc.append(sent)
- sent = []
- else:
- sent.append(list(line.strip().split("\t")))
- if len(sent[-1]) != 10:
- print(repr(line))
- raise ValueError
- if sent:
- doc.append(sent)
- if doc:
- docs.append(doc)
- return docs
-
-
-def _make_gold(nlp, text, sent_annots, drop_deps=0.0):
- # Flatten the conll annotations, and adjust the head indices
- flat = defaultdict(list)
- sent_starts = []
- for sent in sent_annots:
- flat["heads"].extend(len(flat["words"])+head for head in sent["heads"])
- for field in ["words", "tags", "deps", "morphology", "entities", "spaces"]:
- flat[field].extend(sent[field])
- sent_starts.append(True)
- sent_starts.extend([False] * (len(sent["words"]) - 1))
- # Construct text if necessary
- assert len(flat["words"]) == len(flat["spaces"])
- if text is None:
- text = "".join(
- word + " " * space for word, space in zip(flat["words"], flat["spaces"])
- )
- doc = nlp.make_doc(text)
- flat.pop("spaces")
- gold = GoldParse(doc, **flat)
- gold.sent_starts = sent_starts
- for i in range(len(gold.heads)):
- if random.random() < drop_deps:
- gold.heads[i] = None
- gold.labels[i] = None
-
- return doc, gold
-
-
-#############################
-# Data transforms for spaCy #
-#############################
-
-
-def golds_to_gold_tuples(docs, golds):
- """Get out the annoying 'tuples' format used by begin_training, given the
- GoldParse objects."""
- tuples = []
- for doc, gold in zip(docs, golds):
- text = doc.text
- ids, words, tags, heads, labels, iob = zip(*gold.orig_annot)
- sents = [((ids, words, tags, heads, labels, iob), [])]
- tuples.append((text, sents))
- return tuples
-
-
-##############
-# Evaluation #
-##############
-
-
-def evaluate(nlp, text_loc, gold_loc, sys_loc, limit=None):
- if text_loc.parts[-1].endswith(".conllu"):
- docs = []
- with text_loc.open(encoding="utf8") as file_:
- for conllu_doc in read_conllu(file_):
- for conllu_sent in conllu_doc:
- words = [line[1] for line in conllu_sent]
- docs.append(Doc(nlp.vocab, words=words))
- for name, component in nlp.pipeline:
- docs = list(component.pipe(docs))
- else:
- with text_loc.open("r", encoding="utf8") as text_file:
- texts = split_text(text_file.read())
- docs = list(nlp.pipe(texts))
- with sys_loc.open("w", encoding="utf8") as out_file:
- write_conllu(docs, out_file)
- with gold_loc.open("r", encoding="utf8") as gold_file:
- gold_ud = conll17_ud_eval.load_conllu(gold_file)
- with sys_loc.open("r", encoding="utf8") as sys_file:
- sys_ud = conll17_ud_eval.load_conllu(sys_file)
- scores = conll17_ud_eval.evaluate(gold_ud, sys_ud)
- return docs, scores
-
-
-def write_conllu(docs, file_):
- if not Token.has_extension("get_conllu_lines"):
- Token.set_extension("get_conllu_lines", method=get_token_conllu)
- if not Token.has_extension("begins_fused"):
- Token.set_extension("begins_fused", default=False)
- if not Token.has_extension("inside_fused"):
- Token.set_extension("inside_fused", default=False)
-
- merger = Matcher(docs[0].vocab)
- merger.add("SUBTOK", None, [{"DEP": "subtok", "op": "+"}])
- for i, doc in enumerate(docs):
- matches = []
- if doc.is_parsed:
- matches = merger(doc)
- spans = [doc[start : end + 1] for _, start, end in matches]
- seen_tokens = set()
- with doc.retokenize() as retokenizer:
- for span in spans:
- span_tokens = set(range(span.start, span.end))
- if not span_tokens.intersection(seen_tokens):
- retokenizer.merge(span)
- seen_tokens.update(span_tokens)
-
- file_.write("# newdoc id = {i}\n".format(i=i))
- for j, sent in enumerate(doc.sents):
- file_.write("# sent_id = {i}.{j}\n".format(i=i, j=j))
- file_.write("# text = {text}\n".format(text=sent.text))
- for k, token in enumerate(sent):
- if token.head.i > sent[-1].i or token.head.i < sent[0].i:
- for word in doc[sent[0].i - 10 : sent[0].i]:
- print(word.i, word.head.i, word.text, word.dep_)
- for word in sent:
- print(word.i, word.head.i, word.text, word.dep_)
- for word in doc[sent[-1].i : sent[-1].i + 10]:
- print(word.i, word.head.i, word.text, word.dep_)
- raise ValueError(
- "Invalid parse: head outside sentence (%s)" % token.text
- )
- file_.write(token._.get_conllu_lines(k) + "\n")
- file_.write("\n")
-
-
-def print_progress(itn, losses, ud_scores):
- fields = {
- "dep_loss": losses.get("parser", 0.0),
- "morph_loss": losses.get("morphologizer", 0.0),
- "tag_loss": losses.get("tagger", 0.0),
- "words": ud_scores["Words"].f1 * 100,
- "sents": ud_scores["Sentences"].f1 * 100,
- "tags": ud_scores["XPOS"].f1 * 100,
- "uas": ud_scores["UAS"].f1 * 100,
- "las": ud_scores["LAS"].f1 * 100,
- "morph": ud_scores["Feats"].f1 * 100,
- }
- header = ["Epoch", "P.Loss", "M.Loss", "LAS", "UAS", "TAG", "MORPH", "SENT", "WORD"]
- if itn == 0:
- print("\t".join(header))
- tpl = "\t".join((
- "{:d}",
- "{dep_loss:.1f}",
- "{morph_loss:.1f}",
- "{las:.1f}",
- "{uas:.1f}",
- "{tags:.1f}",
- "{morph:.1f}",
- "{sents:.1f}",
- "{words:.1f}",
- ))
- print(tpl.format(itn, **fields))
-
-
-# def get_sent_conllu(sent, sent_id):
-# lines = ["# sent_id = {sent_id}".format(sent_id=sent_id)]
-
-
-def get_token_conllu(token, i):
- if token._.begins_fused:
- n = 1
- while token.nbor(n)._.inside_fused:
- n += 1
- id_ = "%d-%d" % (i, i + n)
- lines = [id_, token.text, "_", "_", "_", "_", "_", "_", "_", "_"]
- else:
- lines = []
- if token.head.i == token.i:
- head = 0
- else:
- head = i + (token.head.i - token.i) + 1
- features = list(token.morph)
- feat_str = []
- replacements = {"one": "1", "two": "2", "three": "3"}
- for feat in features:
- if not feat.startswith("begin") and not feat.startswith("end"):
- key, value = feat.split("_", 1)
- value = replacements.get(value, value)
- feat_str.append("%s=%s" % (key, value.title()))
- if not feat_str:
- feat_str = "_"
- else:
- feat_str = "|".join(feat_str)
- fields = [str(i+1), token.text, token.lemma_, token.pos_, token.tag_, feat_str,
- str(head), token.dep_.lower(), "_", "_"]
- lines.append("\t".join(fields))
- return "\n".join(lines)
-
-
-
-##################
-# Initialization #
-##################
-
-
-def load_nlp(corpus, config, vectors=None):
- lang = corpus.split("_")[0]
- nlp = spacy.blank(lang)
- if config.vectors:
- if not vectors:
- raise ValueError(
- "config asks for vectors, but no vectors "
- "directory set on command line (use -v)"
- )
- if (Path(vectors) / corpus).exists():
- nlp.vocab.from_disk(Path(vectors) / corpus / "vocab")
- nlp.meta["treebank"] = corpus
- return nlp
-
-
-def initialize_pipeline(nlp, docs, golds, config, device):
- nlp.add_pipe(nlp.create_pipe("tagger", config={"set_morphology": False}))
- nlp.add_pipe(nlp.create_pipe("morphologizer"))
- nlp.add_pipe(nlp.create_pipe("parser"))
- if config.multitask_tag:
- nlp.parser.add_multitask_objective("tag")
- if config.multitask_sent:
- nlp.parser.add_multitask_objective("sent_start")
- for gold in golds:
- for tag in gold.tags:
- if tag is not None:
- nlp.tagger.add_label(tag)
- if torch is not None and device != -1:
- torch.set_default_tensor_type("torch.cuda.FloatTensor")
- optimizer = nlp.begin_training(
- lambda: golds_to_gold_tuples(docs, golds),
- device=device,
- subword_features=config.subword_features,
- conv_depth=config.conv_depth,
- bilstm_depth=config.bilstm_depth,
- )
- if config.pretrained_tok2vec:
- _load_pretrained_tok2vec(nlp, config.pretrained_tok2vec)
- return optimizer
-
-
-def _load_pretrained_tok2vec(nlp, loc):
- """Load pretrained weights for the 'token-to-vector' part of the component
- models, which is typically a CNN. See 'spacy pretrain'. Experimental.
- """
- with Path(loc).open("rb", encoding="utf8") as file_:
- weights_data = file_.read()
- loaded = []
- for name, component in nlp.pipeline:
- if hasattr(component, "model") and hasattr(component.model, "tok2vec"):
- component.tok2vec.from_bytes(weights_data)
- loaded.append(name)
- return loaded
-
-
-########################
-# Command line helpers #
-########################
-
-
-class Config(object):
- def __init__(
- self,
- vectors=None,
- max_doc_length=10,
- multitask_tag=False,
- multitask_sent=False,
- multitask_dep=False,
- multitask_vectors=None,
- bilstm_depth=0,
- nr_epoch=30,
- min_batch_size=100,
- max_batch_size=1000,
- batch_by_words=True,
- dropout=0.2,
- conv_depth=4,
- subword_features=True,
- vectors_dir=None,
- pretrained_tok2vec=None,
- ):
- if vectors_dir is not None:
- if vectors is None:
- vectors = True
- if multitask_vectors is None:
- multitask_vectors = True
- for key, value in locals().items():
- setattr(self, key, value)
-
- @classmethod
- def load(cls, loc, vectors_dir=None):
- with Path(loc).open("r", encoding="utf8") as file_:
- cfg = json.load(file_)
- if vectors_dir is not None:
- cfg["vectors_dir"] = vectors_dir
- return cls(**cfg)
-
-
-class Dataset(object):
- def __init__(self, path, section):
- self.path = path
- self.section = section
- self.conllu = None
- self.text = None
- for file_path in self.path.iterdir():
- name = file_path.parts[-1]
- if section in name and name.endswith("conllu"):
- self.conllu = file_path
- elif section in name and name.endswith("txt"):
- self.text = file_path
- if self.conllu is None:
- msg = "Could not find .txt file in {path} for {section}"
- raise IOError(msg.format(section=section, path=path))
- if self.text is None:
- msg = "Could not find .txt file in {path} for {section}"
- self.lang = self.conllu.parts[-1].split("-")[0].split("_")[0]
-
-
-class TreebankPaths(object):
- def __init__(self, ud_path, treebank, **cfg):
- self.train = Dataset(ud_path / treebank, "train")
- self.dev = Dataset(ud_path / treebank, "dev")
- self.lang = self.train.lang
-
-
-@plac.annotations(
- ud_dir=("Path to Universal Dependencies corpus", "positional", None, Path),
- parses_dir=("Directory to write the development parses", "positional", None, Path),
- corpus=(
- "UD corpus to train and evaluate on, e.g. UD_Spanish-AnCora",
- "positional",
- None,
- str,
- ),
- config=("Path to json formatted config file", "option", "C", Path),
- limit=("Size limit", "option", "n", int),
- gpu_device=("Use GPU", "option", "g", int),
- use_oracle_segments=("Use oracle segments", "flag", "G", int),
- vectors_dir=(
- "Path to directory with pretrained vectors, named e.g. en/",
- "option",
- "v",
- Path,
- ),
-)
-def main(
- ud_dir,
- parses_dir,
- corpus,
- config=None,
- limit=0,
- gpu_device=-1,
- vectors_dir=None,
- use_oracle_segments=False,
-):
- Token.set_extension("get_conllu_lines", method=get_token_conllu)
- Token.set_extension("begins_fused", default=False)
- Token.set_extension("inside_fused", default=False)
-
- spacy.util.fix_random_seed()
- lang.zh.Chinese.Defaults.use_jieba = False
- lang.ja.Japanese.Defaults.use_janome = False
-
- if config is not None:
- config = Config.load(config, vectors_dir=vectors_dir)
- else:
- config = Config(vectors_dir=vectors_dir)
- paths = TreebankPaths(ud_dir, corpus)
- if not (parses_dir / corpus).exists():
- (parses_dir / corpus).mkdir()
- print("Train and evaluate", corpus, "using lang", paths.lang)
- nlp = load_nlp(paths.lang, config, vectors=vectors_dir)
-
- docs, golds = read_data(
- nlp,
- paths.train.conllu.open(encoding="utf8"),
- paths.train.text.open(encoding="utf8"),
- max_doc_length=config.max_doc_length,
- limit=limit,
- )
-
- optimizer = initialize_pipeline(nlp, docs, golds, config, gpu_device)
-
- batch_sizes = compounding(config.min_batch_size, config.max_batch_size, 1.001)
- beam_prob = compounding(0.2, 0.8, 1.001)
- for i in range(config.nr_epoch):
- docs, golds = read_data(
- nlp,
- paths.train.conllu.open(encoding="utf8"),
- paths.train.text.open(encoding="utf8"),
- max_doc_length=config.max_doc_length,
- limit=limit,
- oracle_segments=use_oracle_segments,
- raw_text=not use_oracle_segments,
- )
- Xs = list(zip(docs, golds))
- random.shuffle(Xs)
- if config.batch_by_words:
- batches = minibatch_by_words(Xs, size=batch_sizes)
- else:
- batches = minibatch(Xs, size=batch_sizes)
- losses = {}
- n_train_words = sum(len(doc) for doc in docs)
- with tqdm.tqdm(total=n_train_words, leave=False) as pbar:
- for batch in batches:
- batch_docs, batch_gold = zip(*batch)
- pbar.update(sum(len(doc) for doc in batch_docs))
- nlp.parser.cfg["beam_update_prob"] = next(beam_prob)
- nlp.update(
- batch_docs,
- batch_gold,
- sgd=optimizer,
- drop=config.dropout,
- losses=losses,
- )
-
- out_path = parses_dir / corpus / "epoch-{i}.conllu".format(i=i)
- with nlp.use_params(optimizer.averages):
- if use_oracle_segments:
- parsed_docs, scores = evaluate(nlp, paths.dev.conllu,
- paths.dev.conllu, out_path)
- else:
- parsed_docs, scores = evaluate(nlp, paths.dev.text,
- paths.dev.conllu, out_path)
- print_progress(i, losses, scores)
-
-
-def _render_parses(i, to_render):
- to_render[0].user_data["title"] = "Batch %d" % i
- with Path("/tmp/parses.html").open("w", encoding="utf8") as file_:
- html = displacy.render(to_render[:5], style="dep", page=True)
- file_.write(html)
-
-
-if __name__ == "__main__":
- plac.call(main)
diff --git a/examples/README.md b/examples/README.md
deleted file mode 100644
index 869077531..000000000
--- a/examples/README.md
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
-# spaCy examples
-
-The examples are Python scripts with well-behaved command line interfaces. For
-more detailed usage guides, see the [documentation](https://spacy.io/usage/).
-
-To see the available arguments, you can use the `--help` or `-h` flag:
-
-```bash
-$ python examples/training/train_ner.py --help
-```
-
-While we try to keep the examples up to date, they are not currently exercised
-by the test suite, as some of them require significant data downloads or take
-time to train. If you find that an example is no longer running,
-[please tell us](https://github.com/explosion/spaCy/issues)! We know there's
-nothing worse than trying to figure out what you're doing wrong, and it turns
-out your code was never the problem.
diff --git a/examples/deep_learning_keras.py b/examples/deep_learning_keras.py
deleted file mode 100644
index 049cc0be4..000000000
--- a/examples/deep_learning_keras.py
+++ /dev/null
@@ -1,267 +0,0 @@
-"""
-This example shows how to use an LSTM sentiment classification model trained
-using Keras in spaCy. spaCy splits the document into sentences, and each
-sentence is classified using the LSTM. The scores for the sentences are then
-aggregated to give the document score. This kind of hierarchical model is quite
-difficult in "pure" Keras or Tensorflow, but it's very effective. The Keras
-example on this dataset performs quite poorly, because it cuts off the documents
-so that they're a fixed size. This hurts review accuracy a lot, because people
-often summarise their rating in the final sentence
-
-Prerequisites:
-spacy download en_vectors_web_lg
-pip install keras==2.0.9
-
-Compatible with: spaCy v2.0.0+
-"""
-
-import plac
-import random
-import pathlib
-import cytoolz
-import numpy
-from keras.models import Sequential, model_from_json
-from keras.layers import LSTM, Dense, Embedding, Bidirectional
-from keras.layers import TimeDistributed
-from keras.optimizers import Adam
-import thinc.extra.datasets
-from spacy.compat import pickle
-import spacy
-
-
-class SentimentAnalyser(object):
- @classmethod
- def load(cls, path, nlp, max_length=100):
- with (path / "config.json").open() as file_:
- model = model_from_json(file_.read())
- with (path / "model").open("rb") as file_:
- lstm_weights = pickle.load(file_)
- embeddings = get_embeddings(nlp.vocab)
- model.set_weights([embeddings] + lstm_weights)
- return cls(model, max_length=max_length)
-
- def __init__(self, model, max_length=100):
- self._model = model
- self.max_length = max_length
-
- def __call__(self, doc):
- X = get_features([doc], self.max_length)
- y = self._model.predict(X)
- self.set_sentiment(doc, y)
-
- def pipe(self, docs, batch_size=1000):
- for minibatch in cytoolz.partition_all(batch_size, docs):
- minibatch = list(minibatch)
- sentences = []
- for doc in minibatch:
- sentences.extend(doc.sents)
- Xs = get_features(sentences, self.max_length)
- ys = self._model.predict(Xs)
- for sent, label in zip(sentences, ys):
- sent.doc.sentiment += label - 0.5
- for doc in minibatch:
- yield doc
-
- def set_sentiment(self, doc, y):
- doc.sentiment = float(y[0])
- # Sentiment has a native slot for a single float.
- # For arbitrary data storage, there's:
- # doc.user_data['my_data'] = y
-
-
-def get_labelled_sentences(docs, doc_labels):
- labels = []
- sentences = []
- for doc, y in zip(docs, doc_labels):
- for sent in doc.sents:
- sentences.append(sent)
- labels.append(y)
- return sentences, numpy.asarray(labels, dtype="int32")
-
-
-def get_features(docs, max_length):
- docs = list(docs)
- Xs = numpy.zeros((len(docs), max_length), dtype="int32")
- for i, doc in enumerate(docs):
- j = 0
- for token in doc:
- vector_id = token.vocab.vectors.find(key=token.orth)
- if vector_id >= 0:
- Xs[i, j] = vector_id
- else:
- Xs[i, j] = 0
- j += 1
- if j >= max_length:
- break
- return Xs
-
-
-def train(
- train_texts,
- train_labels,
- dev_texts,
- dev_labels,
- lstm_shape,
- lstm_settings,
- lstm_optimizer,
- batch_size=100,
- nb_epoch=5,
- by_sentence=True,
-):
-
- print("Loading spaCy")
- nlp = spacy.load("en_vectors_web_lg")
- nlp.add_pipe(nlp.create_pipe("sentencizer"))
- embeddings = get_embeddings(nlp.vocab)
- model = compile_lstm(embeddings, lstm_shape, lstm_settings)
-
- print("Parsing texts...")
- train_docs = list(nlp.pipe(train_texts))
- dev_docs = list(nlp.pipe(dev_texts))
- if by_sentence:
- train_docs, train_labels = get_labelled_sentences(train_docs, train_labels)
- dev_docs, dev_labels = get_labelled_sentences(dev_docs, dev_labels)
-
- train_X = get_features(train_docs, lstm_shape["max_length"])
- dev_X = get_features(dev_docs, lstm_shape["max_length"])
- model.fit(
- train_X,
- train_labels,
- validation_data=(dev_X, dev_labels),
- epochs=nb_epoch,
- batch_size=batch_size,
- )
- return model
-
-
-def compile_lstm(embeddings, shape, settings):
- model = Sequential()
- model.add(
- Embedding(
- embeddings.shape[0],
- embeddings.shape[1],
- input_length=shape["max_length"],
- trainable=False,
- weights=[embeddings],
- mask_zero=True,
- )
- )
- model.add(TimeDistributed(Dense(shape["nr_hidden"], use_bias=False)))
- model.add(
- Bidirectional(
- LSTM(
- shape["nr_hidden"],
- recurrent_dropout=settings["dropout"],
- dropout=settings["dropout"],
- )
- )
- )
- model.add(Dense(shape["nr_class"], activation="sigmoid"))
- model.compile(
- optimizer=Adam(lr=settings["lr"]),
- loss="binary_crossentropy",
- metrics=["accuracy"],
- )
- return model
-
-
-def get_embeddings(vocab):
- return vocab.vectors.data
-
-
-def evaluate(model_dir, texts, labels, max_length=100):
- nlp = spacy.load("en_vectors_web_lg")
- nlp.add_pipe(nlp.create_pipe("sentencizer"))
- nlp.add_pipe(SentimentAnalyser.load(model_dir, nlp, max_length=max_length))
-
- correct = 0
- i = 0
- for doc in nlp.pipe(texts, batch_size=1000):
- correct += bool(doc.sentiment >= 0.5) == bool(labels[i])
- i += 1
- return float(correct) / i
-
-
-def read_data(data_dir, limit=0):
- examples = []
- for subdir, label in (("pos", 1), ("neg", 0)):
- for filename in (data_dir / subdir).iterdir():
- with filename.open() as file_:
- text = file_.read()
- examples.append((text, label))
- random.shuffle(examples)
- if limit >= 1:
- examples = examples[:limit]
- return zip(*examples) # Unzips into two lists
-
-
-@plac.annotations(
- train_dir=("Location of training file or directory"),
- dev_dir=("Location of development file or directory"),
- model_dir=("Location of output model directory",),
- is_runtime=("Demonstrate run-time usage", "flag", "r", bool),
- nr_hidden=("Number of hidden units", "option", "H", int),
- max_length=("Maximum sentence length", "option", "L", int),
- dropout=("Dropout", "option", "d", float),
- learn_rate=("Learn rate", "option", "e", float),
- nb_epoch=("Number of training epochs", "option", "i", int),
- batch_size=("Size of minibatches for training LSTM", "option", "b", int),
- nr_examples=("Limit to N examples", "option", "n", int),
-)
-def main(
- model_dir=None,
- train_dir=None,
- dev_dir=None,
- is_runtime=False,
- nr_hidden=64,
- max_length=100, # Shape
- dropout=0.5,
- learn_rate=0.001, # General NN config
- nb_epoch=5,
- batch_size=256,
- nr_examples=-1,
-): # Training params
- if model_dir is not None:
- model_dir = pathlib.Path(model_dir)
- if train_dir is None or dev_dir is None:
- imdb_data = thinc.extra.datasets.imdb()
- if is_runtime:
- if dev_dir is None:
- dev_texts, dev_labels = zip(*imdb_data[1])
- else:
- dev_texts, dev_labels = read_data(dev_dir)
- acc = evaluate(model_dir, dev_texts, dev_labels, max_length=max_length)
- print(acc)
- else:
- if train_dir is None:
- train_texts, train_labels = zip(*imdb_data[0])
- else:
- print("Read data")
- train_texts, train_labels = read_data(train_dir, limit=nr_examples)
- if dev_dir is None:
- dev_texts, dev_labels = zip(*imdb_data[1])
- else:
- dev_texts, dev_labels = read_data(dev_dir, imdb_data, limit=nr_examples)
- train_labels = numpy.asarray(train_labels, dtype="int32")
- dev_labels = numpy.asarray(dev_labels, dtype="int32")
- lstm = train(
- train_texts,
- train_labels,
- dev_texts,
- dev_labels,
- {"nr_hidden": nr_hidden, "max_length": max_length, "nr_class": 1},
- {"dropout": dropout, "lr": learn_rate},
- {},
- nb_epoch=nb_epoch,
- batch_size=batch_size,
- )
- weights = lstm.get_weights()
- if model_dir is not None:
- with (model_dir / "model").open("wb") as file_:
- pickle.dump(weights[1:], file_)
- with (model_dir / "config.json").open("w") as file_:
- file_.write(lstm.to_json())
-
-
-if __name__ == "__main__":
- plac.call(main)
diff --git a/examples/information_extraction/entity_relations.py b/examples/information_extraction/entity_relations.py
deleted file mode 100644
index c40a3c10d..000000000
--- a/examples/information_extraction/entity_relations.py
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/usr/bin/env python
-# coding: utf8
-"""A simple example of extracting relations between phrases and entities using
-spaCy's named entity recognizer and the dependency parse. Here, we extract
-money and currency values (entities labelled as MONEY) and then check the
-dependency tree to find the noun phrase they are referring to – for example:
-$9.4 million --> Net income.
-
-Compatible with: spaCy v2.0.0+
-Last tested with: v2.2.1
-"""
-from __future__ import unicode_literals, print_function
-
-import plac
-import spacy
-
-
-TEXTS = [
- "Net income was $9.4 million compared to the prior year of $2.7 million.",
- "Revenue exceeded twelve billion dollars, with a loss of $1b.",
-]
-
-
-@plac.annotations(
- model=("Model to load (needs parser and NER)", "positional", None, str)
-)
-def main(model="en_core_web_sm"):
- nlp = spacy.load(model)
- print("Loaded model '%s'" % model)
- print("Processing %d texts" % len(TEXTS))
-
- for text in TEXTS:
- doc = nlp(text)
- relations = extract_currency_relations(doc)
- for r1, r2 in relations:
- print("{:<10}\t{}\t{}".format(r1.text, r2.ent_type_, r2.text))
-
-
-def filter_spans(spans):
- # Filter a sequence of spans so they don't contain overlaps
- # For spaCy 2.1.4+: this function is available as spacy.util.filter_spans()
- get_sort_key = lambda span: (span.end - span.start, -span.start)
- sorted_spans = sorted(spans, key=get_sort_key, reverse=True)
- result = []
- seen_tokens = set()
- for span in sorted_spans:
- # Check for end - 1 here because boundaries are inclusive
- if span.start not in seen_tokens and span.end - 1 not in seen_tokens:
- result.append(span)
- seen_tokens.update(range(span.start, span.end))
- result = sorted(result, key=lambda span: span.start)
- return result
-
-
-def extract_currency_relations(doc):
- # Merge entities and noun chunks into one token
- spans = list(doc.ents) + list(doc.noun_chunks)
- spans = filter_spans(spans)
- with doc.retokenize() as retokenizer:
- for span in spans:
- retokenizer.merge(span)
-
- relations = []
- for money in filter(lambda w: w.ent_type_ == "MONEY", doc):
- if money.dep_ in ("attr", "dobj"):
- subject = [w for w in money.head.lefts if w.dep_ == "nsubj"]
- if subject:
- subject = subject[0]
- relations.append((subject, money))
- elif money.dep_ == "pobj" and money.head.dep_ == "prep":
- relations.append((money.head.head, money))
- return relations
-
-
-if __name__ == "__main__":
- plac.call(main)
-
- # Expected output:
- # Net income MONEY $9.4 million
- # the prior year MONEY $2.7 million
- # Revenue MONEY twelve billion dollars
- # a loss MONEY 1b
diff --git a/examples/information_extraction/parse_subtrees.py b/examples/information_extraction/parse_subtrees.py
deleted file mode 100644
index 2ca9da1ea..000000000
--- a/examples/information_extraction/parse_subtrees.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/env python
-# coding: utf8
-"""This example shows how to navigate the parse tree including subtrees
-attached to a word.
-
-Based on issue #252:
-"In the documents and tutorials the main thing I haven't found is
-examples on how to break sentences down into small sub thoughts/chunks. The
-noun_chunks is handy, but having examples on using the token.head to find small
-(near-complete) sentence chunks would be neat. Lets take the example sentence:
-"displaCy uses CSS and JavaScript to show you how computers understand language"
-
-This sentence has two main parts (XCOMP & CCOMP) according to the breakdown:
-[displaCy] uses CSS and Javascript [to + show]
-show you how computers understand [language]
-
-I'm assuming that we can use the token.head to build these groups."
-
-Compatible with: spaCy v2.0.0+
-Last tested with: v2.1.0
-"""
-from __future__ import unicode_literals, print_function
-
-import plac
-import spacy
-
-
-@plac.annotations(model=("Model to load", "positional", None, str))
-def main(model="en_core_web_sm"):
- nlp = spacy.load(model)
- print("Loaded model '%s'" % model)
-
- doc = nlp(
- "displaCy uses CSS and JavaScript to show you how computers "
- "understand language"
- )
-
- # The easiest way is to find the head of the subtree you want, and then use
- # the `.subtree`, `.children`, `.lefts` and `.rights` iterators. `.subtree`
- # is the one that does what you're asking for most directly:
- for word in doc:
- if word.dep_ in ("xcomp", "ccomp"):
- print("".join(w.text_with_ws for w in word.subtree))
-
- # It'd probably be better for `word.subtree` to return a `Span` object
- # instead of a generator over the tokens. If you want the `Span` you can
- # get it via the `.right_edge` and `.left_edge` properties. The `Span`
- # object is nice because you can easily get a vector, merge it, etc.
- for word in doc:
- if word.dep_ in ("xcomp", "ccomp"):
- subtree_span = doc[word.left_edge.i : word.right_edge.i + 1]
- print(subtree_span.text, "|", subtree_span.root.text)
-
- # You might also want to select a head, and then select a start and end
- # position by walking along its children. You could then take the
- # `.left_edge` and `.right_edge` of those tokens, and use it to calculate
- # a span.
-
-
-if __name__ == "__main__":
- plac.call(main)
-
- # Expected output:
- # to show you how computers understand language
- # how computers understand language
- # to show you how computers understand language | show
- # how computers understand language | understand
diff --git a/examples/information_extraction/phrase_matcher.py b/examples/information_extraction/phrase_matcher.py
deleted file mode 100644
index f3622bfdd..000000000
--- a/examples/information_extraction/phrase_matcher.py
+++ /dev/null
@@ -1,112 +0,0 @@
-#!/usr/bin/env python
-# coding: utf8
-"""Match a large set of multi-word expressions in O(1) time.
-
-The idea is to associate each word in the vocabulary with a tag, noting whether
-they begin, end, or are inside at least one pattern. An additional tag is used
-for single-word patterns. Complete patterns are also stored in a hash set.
-When we process a document, we look up the words in the vocabulary, to
-associate the words with the tags. We then search for tag-sequences that
-correspond to valid candidates. Finally, we look up the candidates in the hash
-set.
-
-For instance, to search for the phrases "Barack Hussein Obama" and "Hilary
-Clinton", we would associate "Barack" and "Hilary" with the B tag, Hussein with
-the I tag, and Obama and Clinton with the L tag.
-
-The document "Barack Clinton and Hilary Clinton" would have the tag sequence
-[{B}, {L}, {}, {B}, {L}], so we'd get two matches. However, only the second
-candidate is in the phrase dictionary, so only one is returned as a match.
-
-The algorithm is O(n) at run-time for document of length n because we're only
-ever matching over the tag patterns. So no matter how many phrases we're
-looking for, our pattern set stays very small (exact size depends on the
-maximum length we're looking for, as the query language currently has no
-quantifiers).
-
-The example expects a .bz2 file from the Reddit corpus, and a patterns file,
-formatted in jsonl as a sequence of entries like this:
-
-{"text":"Anchorage"}
-{"text":"Angola"}
-{"text":"Ann Arbor"}
-{"text":"Annapolis"}
-{"text":"Appalachia"}
-{"text":"Argentina"}
-
-Reddit comments corpus:
-* https://files.pushshift.io/reddit/
-* https://archive.org/details/2015_reddit_comments_corpus
-
-Compatible with: spaCy v2.0.0+
-"""
-from __future__ import print_function, unicode_literals, division
-
-from bz2 import BZ2File
-import time
-import plac
-import json
-
-from spacy.matcher import PhraseMatcher
-import spacy
-
-
-@plac.annotations(
- patterns_loc=("Path to gazetteer", "positional", None, str),
- text_loc=("Path to Reddit corpus file", "positional", None, str),
- n=("Number of texts to read", "option", "n", int),
- lang=("Language class to initialise", "option", "l", str),
-)
-def main(patterns_loc, text_loc, n=10000, lang="en"):
- nlp = spacy.blank(lang)
- nlp.vocab.lex_attr_getters = {}
- phrases = read_gazetteer(nlp.tokenizer, patterns_loc)
- count = 0
- t1 = time.time()
- for ent_id, text in get_matches(nlp.tokenizer, phrases, read_text(text_loc, n=n)):
- count += 1
- t2 = time.time()
- print("%d docs in %.3f s. %d matches" % (n, (t2 - t1), count))
-
-
-def read_gazetteer(tokenizer, loc, n=-1):
- for i, line in enumerate(open(loc)):
- data = json.loads(line.strip())
- phrase = tokenizer(data["text"])
- for w in phrase:
- _ = tokenizer.vocab[w.text]
- if len(phrase) >= 2:
- yield phrase
-
-
-def read_text(bz2_loc, n=10000):
- with BZ2File(bz2_loc) as file_:
- for i, line in enumerate(file_):
- data = json.loads(line)
- yield data["body"]
- if i >= n:
- break
-
-
-def get_matches(tokenizer, phrases, texts):
- matcher = PhraseMatcher(tokenizer.vocab)
- matcher.add("Phrase", None, *phrases)
- for text in texts:
- doc = tokenizer(text)
- for w in doc:
- _ = doc.vocab[w.text]
- matches = matcher(doc)
- for ent_id, start, end in matches:
- yield (ent_id, doc[start:end].text)
-
-
-if __name__ == "__main__":
- if False:
- import cProfile
- import pstats
-
- cProfile.runctx("plac.call(main)", globals(), locals(), "Profile.prof")
- s = pstats.Stats("Profile.prof")
- s.strip_dirs().sort_stats("time").print_stats()
- else:
- plac.call(main)
diff --git a/examples/keras_parikh_entailment/README.md b/examples/keras_parikh_entailment/README.md
deleted file mode 100644
index 86ba50d9b..000000000
--- a/examples/keras_parikh_entailment/README.md
+++ /dev/null
@@ -1,114 +0,0 @@
-
-
-# A decomposable attention model for Natural Language Inference
-**by Matthew Honnibal, [@honnibal](https://github.com/honnibal)**
-**Updated for spaCy 2.0+ and Keras 2.2.2+ by John Stewart, [@free-variation](https://github.com/free-variation)**
-
-This directory contains an implementation of the entailment prediction model described
-by [Parikh et al. (2016)](https://arxiv.org/pdf/1606.01933.pdf). The model is notable
-for its competitive performance with very few parameters.
-
-The model is implemented using [Keras](https://keras.io/) and [spaCy](https://spacy.io).
-Keras is used to build and train the network. spaCy is used to load
-the [GloVe](http://nlp.stanford.edu/projects/glove/) vectors, perform the
-feature extraction, and help you apply the model at run-time. The following
-demo code shows how the entailment model can be used at runtime, once the
-hook is installed to customise the `.similarity()` method of spaCy's `Doc`
-and `Span` objects:
-
-```python
-def demo(shape):
- nlp = spacy.load('en_vectors_web_lg')
- nlp.add_pipe(KerasSimilarityShim.load(nlp.path / 'similarity', nlp, shape[0]))
-
- doc1 = nlp(u'The king of France is bald.')
- doc2 = nlp(u'France has no king.')
-
- print("Sentence 1:", doc1)
- print("Sentence 2:", doc2)
-
- entailment_type, confidence = doc1.similarity(doc2)
- print("Entailment type:", entailment_type, "(Confidence:", confidence, ")")
-```
-
-Which gives the output `Entailment type: contradiction (Confidence: 0.60604566)`, showing that
-the system has definite opinions about Betrand Russell's [famous conundrum](https://users.drew.edu/jlenz/br-on-denoting.html)!
-
-I'm working on a blog post to explain Parikh et al.'s model in more detail.
-A [notebook](https://github.com/free-variation/spaCy/blob/master/examples/notebooks/Decompositional%20Attention.ipynb) is available that briefly explains this implementation.
-I think it is a very interesting example of the attention mechanism, which
-I didn't understand very well before working through this paper. There are
-lots of ways to extend the model.
-
-## What's where
-
-| File | Description |
-| --- | --- |
-| `__main__.py` | The script that will be executed. Defines the CLI, the data reading, etc — all the boring stuff. |
-| `spacy_hook.py` | Provides a class `KerasSimilarityShim` that lets you use an arbitrary function to customize spaCy's `doc.similarity()` method. Instead of the default average-of-vectors algorithm, when you call `doc1.similarity(doc2)`, you'll get the result of `your_model(doc1, doc2)`. |
-| `keras_decomposable_attention.py` | Defines the neural network model. |
-
-## Setting up
-
-First, install [Keras](https://keras.io/), [spaCy](https://spacy.io) and the spaCy
-English models (about 1GB of data):
-
-```bash
-pip install keras
-pip install spacy
-python -m spacy download en_vectors_web_lg
-```
-
-You'll also want to get Keras working on your GPU, and you will need a backend, such as TensorFlow or Theano.
-This will depend on your set up, so you're mostly on your own for this step. If you're using AWS, try the
-[NVidia AMI](https://aws.amazon.com/marketplace/pp/B00FYCDDTE). It made things pretty easy.
-
-Once you've installed the dependencies, you can run a small preliminary test of
-the Keras model:
-
-```bash
-py.test keras_parikh_entailment/keras_decomposable_attention.py
-```
-
-This compiles the model and fits it with some dummy data. You should see that
-both tests passed.
-
-Finally, download the [Stanford Natural Language Inference corpus](http://nlp.stanford.edu/projects/snli/).
-
-## Running the example
-
-You can run the `keras_parikh_entailment/` directory as a script, which executes the file
-[`keras_parikh_entailment/__main__.py`](__main__.py). If you run the script without arguments
-the usage is shown. Running it with `-h` explains the command line arguments.
-
-The first thing you'll want to do is train the model:
-
-```bash
-python keras_parikh_entailment/ train -t -s
-```
-
-Training takes about 300 epochs for full accuracy, and I haven't rerun the full
-experiment since refactoring things to publish this example — please let me
-know if I've broken something. You should get to at least 85% on the development data even after 10-15 epochs.
-
-The other two modes demonstrate run-time usage. I never like relying on the accuracy printed
-by `.fit()` methods. I never really feel confident until I've run a new process that loads
-the model and starts making predictions, without access to the gold labels. I've therefore
-included an `evaluate` mode.
-
-```bash
-python keras_parikh_entailment/ evaluate -s
-```
-
-Finally, there's also a little demo, which mostly exists to show
-you how run-time usage will eventually look.
-
-```bash
-python keras_parikh_entailment/ demo
-```
-
-## Getting updates
-
-We should have the blog post explaining the model ready before the end of the week. To get
-notified when it's published, you can either follow me on [Twitter](https://twitter.com/honnibal)
-or subscribe to our [mailing list](http://eepurl.com/ckUpQ5).
diff --git a/examples/keras_parikh_entailment/__main__.py b/examples/keras_parikh_entailment/__main__.py
deleted file mode 100644
index ad398dae3..000000000
--- a/examples/keras_parikh_entailment/__main__.py
+++ /dev/null
@@ -1,207 +0,0 @@
-import numpy as np
-import json
-from keras.utils import to_categorical
-import plac
-import sys
-
-from keras_decomposable_attention import build_model
-from spacy_hook import get_embeddings, KerasSimilarityShim
-
-try:
- import cPickle as pickle
-except ImportError:
- import pickle
-
-import spacy
-
-# workaround for keras/tensorflow bug
-# see https://github.com/tensorflow/tensorflow/issues/3388
-import os
-import importlib
-from keras import backend as K
-
-
-def set_keras_backend(backend):
- if K.backend() != backend:
- os.environ["KERAS_BACKEND"] = backend
- importlib.reload(K)
- assert K.backend() == backend
- if backend == "tensorflow":
- K.get_session().close()
- cfg = K.tf.ConfigProto()
- cfg.gpu_options.allow_growth = True
- K.set_session(K.tf.Session(config=cfg))
- K.clear_session()
-
-
-set_keras_backend("tensorflow")
-
-
-def train(train_loc, dev_loc, shape, settings):
- train_texts1, train_texts2, train_labels = read_snli(train_loc)
- dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc)
-
- print("Loading spaCy")
- nlp = spacy.load("en_vectors_web_lg")
- assert nlp.path is not None
- print("Processing texts...")
- train_X = create_dataset(nlp, train_texts1, train_texts2, 100, shape[0])
- dev_X = create_dataset(nlp, dev_texts1, dev_texts2, 100, shape[0])
-
- print("Compiling network")
- model = build_model(get_embeddings(nlp.vocab), shape, settings)
-
- print(settings)
- model.fit(
- train_X,
- train_labels,
- validation_data=(dev_X, dev_labels),
- epochs=settings["nr_epoch"],
- batch_size=settings["batch_size"],
- )
- if not (nlp.path / "similarity").exists():
- (nlp.path / "similarity").mkdir()
- print("Saving to", nlp.path / "similarity")
- weights = model.get_weights()
- # remove the embedding matrix. We can reconstruct it.
- del weights[1]
- with (nlp.path / "similarity" / "model").open("wb") as file_:
- pickle.dump(weights, file_)
- with (nlp.path / "similarity" / "config.json").open("w") as file_:
- file_.write(model.to_json())
-
-
-def evaluate(dev_loc, shape):
- dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc)
- nlp = spacy.load("en_vectors_web_lg")
- nlp.add_pipe(KerasSimilarityShim.load(nlp.path / "similarity", nlp, shape[0]))
- total = 0.0
- correct = 0.0
- for text1, text2, label in zip(dev_texts1, dev_texts2, dev_labels):
- doc1 = nlp(text1)
- doc2 = nlp(text2)
- sim, _ = doc1.similarity(doc2)
- if sim == KerasSimilarityShim.entailment_types[label.argmax()]:
- correct += 1
- total += 1
- return correct, total
-
-
-def demo(shape):
- nlp = spacy.load("en_vectors_web_lg")
- nlp.add_pipe(KerasSimilarityShim.load(nlp.path / "similarity", nlp, shape[0]))
-
- doc1 = nlp("The king of France is bald.")
- doc2 = nlp("France has no king.")
-
- print("Sentence 1:", doc1)
- print("Sentence 2:", doc2)
-
- entailment_type, confidence = doc1.similarity(doc2)
- print("Entailment type:", entailment_type, "(Confidence:", confidence, ")")
-
-
-LABELS = {"entailment": 0, "contradiction": 1, "neutral": 2}
-
-
-def read_snli(path):
- texts1 = []
- texts2 = []
- labels = []
- with open(path, "r") as file_:
- for line in file_:
- eg = json.loads(line)
- label = eg["gold_label"]
- if label == "-": # per Parikh, ignore - SNLI entries
- continue
- texts1.append(eg["sentence1"])
- texts2.append(eg["sentence2"])
- labels.append(LABELS[label])
- return texts1, texts2, to_categorical(np.asarray(labels, dtype="int32"))
-
-
-def create_dataset(nlp, texts, hypotheses, num_unk, max_length):
- sents = texts + hypotheses
- sents_as_ids = []
- for sent in sents:
- doc = nlp(sent)
- word_ids = []
- for i, token in enumerate(doc):
- # skip odd spaces from tokenizer
- if token.has_vector and token.vector_norm == 0:
- continue
-
- if i > max_length:
- break
-
- if token.has_vector:
- word_ids.append(token.rank + num_unk + 1)
- else:
- # if we don't have a vector, pick an OOV entry
- word_ids.append(token.rank % num_unk + 1)
-
- # there must be a simpler way of generating padded arrays from lists...
- word_id_vec = np.zeros((max_length), dtype="int")
- clipped_len = min(max_length, len(word_ids))
- word_id_vec[:clipped_len] = word_ids[:clipped_len]
- sents_as_ids.append(word_id_vec)
-
- return [np.array(sents_as_ids[: len(texts)]), np.array(sents_as_ids[len(texts) :])]
-
-
-@plac.annotations(
- mode=("Mode to execute", "positional", None, str, ["train", "evaluate", "demo"]),
- train_loc=("Path to training data", "option", "t", str),
- dev_loc=("Path to development or test data", "option", "s", str),
- max_length=("Length to truncate sentences", "option", "L", int),
- nr_hidden=("Number of hidden units", "option", "H", int),
- dropout=("Dropout level", "option", "d", float),
- learn_rate=("Learning rate", "option", "r", float),
- batch_size=("Batch size for neural network training", "option", "b", int),
- nr_epoch=("Number of training epochs", "option", "e", int),
- entail_dir=(
- "Direction of entailment",
- "option",
- "D",
- str,
- ["both", "left", "right"],
- ),
-)
-def main(
- mode,
- train_loc,
- dev_loc,
- max_length=50,
- nr_hidden=200,
- dropout=0.2,
- learn_rate=0.001,
- batch_size=1024,
- nr_epoch=10,
- entail_dir="both",
-):
- shape = (max_length, nr_hidden, 3)
- settings = {
- "lr": learn_rate,
- "dropout": dropout,
- "batch_size": batch_size,
- "nr_epoch": nr_epoch,
- "entail_dir": entail_dir,
- }
-
- if mode == "train":
- if train_loc == None or dev_loc == None:
- print("Train mode requires paths to training and development data sets.")
- sys.exit(1)
- train(train_loc, dev_loc, shape, settings)
- elif mode == "evaluate":
- if dev_loc == None:
- print("Evaluate mode requires paths to test data set.")
- sys.exit(1)
- correct, total = evaluate(dev_loc, shape)
- print(correct, "/", total, correct / total)
- else:
- demo(shape)
-
-
-if __name__ == "__main__":
- plac.call(main)
diff --git a/examples/keras_parikh_entailment/keras_decomposable_attention.py b/examples/keras_parikh_entailment/keras_decomposable_attention.py
deleted file mode 100644
index 2e17a11ee..000000000
--- a/examples/keras_parikh_entailment/keras_decomposable_attention.py
+++ /dev/null
@@ -1,152 +0,0 @@
-# Semantic entailment/similarity with decomposable attention (using spaCy and Keras)
-# Practical state-of-the-art textual entailment with spaCy and Keras
-
-import numpy as np
-from keras import layers, Model, models, optimizers
-from keras import backend as K
-
-
-def build_model(vectors, shape, settings):
- max_length, nr_hidden, nr_class = shape
-
- input1 = layers.Input(shape=(max_length,), dtype="int32", name="words1")
- input2 = layers.Input(shape=(max_length,), dtype="int32", name="words2")
-
- # embeddings (projected)
- embed = create_embedding(vectors, max_length, nr_hidden)
-
- a = embed(input1)
- b = embed(input2)
-
- # step 1: attend
- F = create_feedforward(nr_hidden)
- att_weights = layers.dot([F(a), F(b)], axes=-1)
-
- G = create_feedforward(nr_hidden)
-
- if settings["entail_dir"] == "both":
- norm_weights_a = layers.Lambda(normalizer(1))(att_weights)
- norm_weights_b = layers.Lambda(normalizer(2))(att_weights)
- alpha = layers.dot([norm_weights_a, a], axes=1)
- beta = layers.dot([norm_weights_b, b], axes=1)
-
- # step 2: compare
- comp1 = layers.concatenate([a, beta])
- comp2 = layers.concatenate([b, alpha])
- v1 = layers.TimeDistributed(G)(comp1)
- v2 = layers.TimeDistributed(G)(comp2)
-
- # step 3: aggregate
- v1_sum = layers.Lambda(sum_word)(v1)
- v2_sum = layers.Lambda(sum_word)(v2)
- concat = layers.concatenate([v1_sum, v2_sum])
-
- elif settings["entail_dir"] == "left":
- norm_weights_a = layers.Lambda(normalizer(1))(att_weights)
- alpha = layers.dot([norm_weights_a, a], axes=1)
- comp2 = layers.concatenate([b, alpha])
- v2 = layers.TimeDistributed(G)(comp2)
- v2_sum = layers.Lambda(sum_word)(v2)
- concat = v2_sum
-
- else:
- norm_weights_b = layers.Lambda(normalizer(2))(att_weights)
- beta = layers.dot([norm_weights_b, b], axes=1)
- comp1 = layers.concatenate([a, beta])
- v1 = layers.TimeDistributed(G)(comp1)
- v1_sum = layers.Lambda(sum_word)(v1)
- concat = v1_sum
-
- H = create_feedforward(nr_hidden)
- out = H(concat)
- out = layers.Dense(nr_class, activation="softmax")(out)
-
- model = Model([input1, input2], out)
-
- model.compile(
- optimizer=optimizers.Adam(lr=settings["lr"]),
- loss="categorical_crossentropy",
- metrics=["accuracy"],
- )
-
- return model
-
-
-def create_embedding(vectors, max_length, projected_dim):
- return models.Sequential(
- [
- layers.Embedding(
- vectors.shape[0],
- vectors.shape[1],
- input_length=max_length,
- weights=[vectors],
- trainable=False,
- ),
- layers.TimeDistributed(
- layers.Dense(projected_dim, activation=None, use_bias=False)
- ),
- ]
- )
-
-
-def create_feedforward(num_units=200, activation="relu", dropout_rate=0.2):
- return models.Sequential(
- [
- layers.Dense(num_units, activation=activation),
- layers.Dropout(dropout_rate),
- layers.Dense(num_units, activation=activation),
- layers.Dropout(dropout_rate),
- ]
- )
-
-
-def normalizer(axis):
- def _normalize(att_weights):
- exp_weights = K.exp(att_weights)
- sum_weights = K.sum(exp_weights, axis=axis, keepdims=True)
- return exp_weights / sum_weights
-
- return _normalize
-
-
-def sum_word(x):
- return K.sum(x, axis=1)
-
-
-def test_build_model():
- vectors = np.ndarray((100, 8), dtype="float32")
- shape = (10, 16, 3)
- settings = {"lr": 0.001, "dropout": 0.2, "gru_encode": True, "entail_dir": "both"}
- model = build_model(vectors, shape, settings)
-
-
-def test_fit_model():
- def _generate_X(nr_example, length, nr_vector):
- X1 = np.ndarray((nr_example, length), dtype="int32")
- X1 *= X1 < nr_vector
- X1 *= 0 <= X1
- X2 = np.ndarray((nr_example, length), dtype="int32")
- X2 *= X2 < nr_vector
- X2 *= 0 <= X2
- return [X1, X2]
-
- def _generate_Y(nr_example, nr_class):
- ys = np.zeros((nr_example, nr_class), dtype="int32")
- for i in range(nr_example):
- ys[i, i % nr_class] = 1
- return ys
-
- vectors = np.ndarray((100, 8), dtype="float32")
- shape = (10, 16, 3)
- settings = {"lr": 0.001, "dropout": 0.2, "gru_encode": True, "entail_dir": "both"}
- model = build_model(vectors, shape, settings)
-
- train_X = _generate_X(20, shape[0], vectors.shape[0])
- train_Y = _generate_Y(20, shape[2])
- dev_X = _generate_X(15, shape[0], vectors.shape[0])
- dev_Y = _generate_Y(15, shape[2])
-
- model.fit(train_X, train_Y, validation_data=(dev_X, dev_Y), epochs=5, batch_size=4)
-
-
-__all__ = [build_model]
diff --git a/examples/keras_parikh_entailment/spacy_hook.py b/examples/keras_parikh_entailment/spacy_hook.py
deleted file mode 100644
index 307669a70..000000000
--- a/examples/keras_parikh_entailment/spacy_hook.py
+++ /dev/null
@@ -1,77 +0,0 @@
-import numpy as np
-from keras.models import model_from_json
-
-try:
- import cPickle as pickle
-except ImportError:
- import pickle
-
-
-class KerasSimilarityShim(object):
- entailment_types = ["entailment", "contradiction", "neutral"]
-
- @classmethod
- def load(cls, path, nlp, max_length=100, get_features=None):
-
- if get_features is None:
- get_features = get_word_ids
-
- with (path / "config.json").open() as file_:
- model = model_from_json(file_.read())
- with (path / "model").open("rb") as file_:
- weights = pickle.load(file_)
-
- embeddings = get_embeddings(nlp.vocab)
- weights.insert(1, embeddings)
- model.set_weights(weights)
-
- return cls(model, get_features=get_features, max_length=max_length)
-
- def __init__(self, model, get_features=None, max_length=100):
- self.model = model
- self.get_features = get_features
- self.max_length = max_length
-
- def __call__(self, doc):
- doc.user_hooks["similarity"] = self.predict
- doc.user_span_hooks["similarity"] = self.predict
-
- return doc
-
- def predict(self, doc1, doc2):
- x1 = self.get_features([doc1], max_length=self.max_length)
- x2 = self.get_features([doc2], max_length=self.max_length)
- scores = self.model.predict([x1, x2])
-
- return self.entailment_types[scores.argmax()], scores.max()
-
-
-def get_embeddings(vocab, nr_unk=100):
- # the extra +1 is for a zero vector representing sentence-final padding
- num_vectors = max(lex.rank for lex in vocab) + 2
-
- # create random vectors for OOV tokens
- oov = np.random.normal(size=(nr_unk, vocab.vectors_length))
- oov = oov / oov.sum(axis=1, keepdims=True)
-
- vectors = np.zeros((num_vectors + nr_unk, vocab.vectors_length), dtype="float32")
- vectors[1 : (nr_unk + 1),] = oov
- for lex in vocab:
- if lex.has_vector and lex.vector_norm > 0:
- vectors[nr_unk + lex.rank + 1] = lex.vector / lex.vector_norm
-
- return vectors
-
-
-def get_word_ids(docs, max_length=100, nr_unk=100):
- Xs = np.zeros((len(docs), max_length), dtype="int32")
-
- for i, doc in enumerate(docs):
- for j, token in enumerate(doc):
- if j == max_length:
- break
- if token.has_vector:
- Xs[i, j] = token.rank + nr_unk + 1
- else:
- Xs[i, j] = token.rank % nr_unk + 1
- return Xs
diff --git a/examples/load_from_docbin.py b/examples/load_from_docbin.py
deleted file mode 100644
index f26e7fc49..000000000
--- a/examples/load_from_docbin.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# coding: utf-8
-"""
-Example of loading previously parsed text using spaCy's DocBin class. The example
-performs an entity count to show that the annotations are available.
-For more details, see https://spacy.io/usage/saving-loading#docs
-Installation:
-python -m spacy download en_core_web_lg
-Usage:
-python examples/load_from_docbin.py en_core_web_lg RC_2015-03-9.spacy
-"""
-from __future__ import unicode_literals
-
-import spacy
-from spacy.tokens import DocBin
-from timeit import default_timer as timer
-from collections import Counter
-
-EXAMPLE_PARSES_PATH = "RC_2015-03-9.spacy"
-
-
-def main(model="en_core_web_lg", docbin_path=EXAMPLE_PARSES_PATH):
- nlp = spacy.load(model)
- print("Reading data from {}".format(docbin_path))
- with open(docbin_path, "rb") as file_:
- bytes_data = file_.read()
- nr_word = 0
- start_time = timer()
- entities = Counter()
- docbin = DocBin().from_bytes(bytes_data)
- for doc in docbin.get_docs(nlp.vocab):
- nr_word += len(doc)
- entities.update((e.label_, e.text) for e in doc.ents)
- end_time = timer()
- msg = "Loaded {nr_word} words in {seconds} seconds ({wps} words per second)"
- wps = nr_word / (end_time - start_time)
- print(msg.format(nr_word=nr_word, seconds=end_time - start_time, wps=wps))
- print("Most common entities:")
- for (label, entity), freq in entities.most_common(30):
- print(freq, entity, label)
-
-
-if __name__ == "__main__":
- import plac
-
- plac.call(main)
diff --git a/examples/notebooks/Decompositional Attention.ipynb b/examples/notebooks/Decompositional Attention.ipynb
deleted file mode 100644
index 8baaf7d33..000000000
--- a/examples/notebooks/Decompositional Attention.ipynb
+++ /dev/null
@@ -1,955 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Natural language inference using spaCy and Keras"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Introduction"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "This notebook details an implementation of the natural language inference model presented in [(Parikh et al, 2016)](https://arxiv.org/abs/1606.01933). The model is notable for the small number of paramaters *and hyperparameters* it specifices, while still yielding good performance."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Constructing the dataset"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import spacy\n",
- "import numpy as np"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We only need the GloVe vectors from spaCy, not a full NLP pipeline."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "nlp = spacy.load('en_vectors_web_lg')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Function to load the SNLI dataset. The categories are converted to one-shot representation. The function comes from an example in spaCy."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/home/jds/tensorflow-gpu/lib/python3.5/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
- " from ._conv import register_converters as _register_converters\n",
- "Using TensorFlow backend.\n"
- ]
- }
- ],
- "source": [
- "import json\n",
- "from keras.utils import to_categorical\n",
- "\n",
- "LABELS = {'entailment': 0, 'contradiction': 1, 'neutral': 2}\n",
- "def read_snli(path):\n",
- " texts1 = []\n",
- " texts2 = []\n",
- " labels = []\n",
- " with open(path, 'r') as file_:\n",
- " for line in file_:\n",
- " eg = json.loads(line)\n",
- " label = eg['gold_label']\n",
- " if label == '-': # per Parikh, ignore - SNLI entries\n",
- " continue\n",
- " texts1.append(eg['sentence1'])\n",
- " texts2.append(eg['sentence2'])\n",
- " labels.append(LABELS[label])\n",
- " return texts1, texts2, to_categorical(np.asarray(labels, dtype='int32'))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Because Keras can do the train/test split for us, we'll load *all* SNLI triples from one file."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "texts,hypotheses,labels = read_snli('snli/snli_1.0_train.jsonl')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [],
- "source": [
- "def create_dataset(nlp, texts, hypotheses, num_oov, max_length, norm_vectors = True):\n",
- " sents = texts + hypotheses\n",
- " \n",
- " # the extra +1 is for a zero vector represting NULL for padding\n",
- " num_vectors = max(lex.rank for lex in nlp.vocab) + 2 \n",
- " \n",
- " # create random vectors for OOV tokens\n",
- " oov = np.random.normal(size=(num_oov, nlp.vocab.vectors_length))\n",
- " oov = oov / oov.sum(axis=1, keepdims=True)\n",
- " \n",
- " vectors = np.zeros((num_vectors + num_oov, nlp.vocab.vectors_length), dtype='float32')\n",
- " vectors[num_vectors:, ] = oov\n",
- " for lex in nlp.vocab:\n",
- " if lex.has_vector and lex.vector_norm > 0:\n",
- " vectors[lex.rank + 1] = lex.vector / lex.vector_norm if norm_vectors == True else lex.vector\n",
- " \n",
- " sents_as_ids = []\n",
- " for sent in sents:\n",
- " doc = nlp(sent)\n",
- " word_ids = []\n",
- " \n",
- " for i, token in enumerate(doc):\n",
- " # skip odd spaces from tokenizer\n",
- " if token.has_vector and token.vector_norm == 0:\n",
- " continue\n",
- " \n",
- " if i > max_length:\n",
- " break\n",
- " \n",
- " if token.has_vector:\n",
- " word_ids.append(token.rank + 1)\n",
- " else:\n",
- " # if we don't have a vector, pick an OOV entry\n",
- " word_ids.append(token.rank % num_oov + num_vectors) \n",
- " \n",
- " # there must be a simpler way of generating padded arrays from lists...\n",
- " word_id_vec = np.zeros((max_length), dtype='int')\n",
- " clipped_len = min(max_length, len(word_ids))\n",
- " word_id_vec[:clipped_len] = word_ids[:clipped_len]\n",
- " sents_as_ids.append(word_id_vec)\n",
- " \n",
- " \n",
- " return vectors, np.array(sents_as_ids[:len(texts)]), np.array(sents_as_ids[len(texts):])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [],
- "source": [
- "sem_vectors, text_vectors, hypothesis_vectors = create_dataset(nlp, texts, hypotheses, 100, 50, True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [],
- "source": [
- "texts_test,hypotheses_test,labels_test = read_snli('snli/snli_1.0_test.jsonl')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [],
- "source": [
- "_, text_vectors_test, hypothesis_vectors_test = create_dataset(nlp, texts_test, hypotheses_test, 100, 50, True)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We use spaCy to tokenize the sentences and return, when available, a semantic vector for each token. \n",
- "\n",
- "OOV terms (tokens for which no semantic vector is available) are assigned to one of a set of randomly-generated OOV vectors, per (Parikh et al, 2016).\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Note that we will clip sentences to 50 words maximum."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [],
- "source": [
- "from keras import layers, Model, models\n",
- "from keras import backend as K"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Building the model"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The embedding layer copies the 300-dimensional GloVe vectors into GPU memory. Per (Parikh et al, 2016), the vectors, which are not adapted during training, are projected down to lower-dimensional vectors using a trained projection matrix."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [],
- "source": [
- "def create_embedding(vectors, max_length, projected_dim):\n",
- " return models.Sequential([\n",
- " layers.Embedding(\n",
- " vectors.shape[0],\n",
- " vectors.shape[1],\n",
- " input_length=max_length,\n",
- " weights=[vectors],\n",
- " trainable=False),\n",
- " \n",
- " layers.TimeDistributed(\n",
- " layers.Dense(projected_dim,\n",
- " activation=None,\n",
- " use_bias=False))\n",
- " ])"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The Parikh model makes use of three feedforward blocks that construct nonlinear combinations of their input. Each block contains two ReLU layers and two dropout layers."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {},
- "outputs": [],
- "source": [
- "def create_feedforward(num_units=200, activation='relu', dropout_rate=0.2):\n",
- " return models.Sequential([\n",
- " layers.Dense(num_units, activation=activation),\n",
- " layers.Dropout(dropout_rate),\n",
- " layers.Dense(num_units, activation=activation),\n",
- " layers.Dropout(dropout_rate)\n",
- " ])"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The basic idea of the (Parikh et al, 2016) model is to:\n",
- "\n",
- "1. *Align*: Construct an alignment of subphrases in the text and hypothesis using an attention-like mechanism, called \"decompositional\" because the layer is applied to each of the two sentences individually rather than to their product. The dot product of the nonlinear transformations of the inputs is then normalized vertically and horizontally to yield a pair of \"soft\" alignment structures, from text->hypothesis and hypothesis->text. Concretely, for each word in one sentence, a multinomial distribution is computed over the words of the other sentence, by learning a multinomial logistic with softmax target.\n",
- "2. *Compare*: Each word is now compared to its aligned phrase using a function modeled as a two-layer feedforward ReLU network. The output is a high-dimensional representation of the strength of association between word and aligned phrase.\n",
- "3. *Aggregate*: The comparison vectors are summed, separately, for the text and the hypothesis. The result is two vectors: one that describes the degree of association of the text to the hypothesis, and the second, of the hypothesis to the text.\n",
- "4. Finally, these two vectors are processed by a dense layer followed by a softmax classifier, as usual.\n",
- "\n",
- "Note that because in entailment the truth conditions of the consequent must be a subset of those of the antecedent, it is not obvious that we need both vectors in step (3). Entailment is not symmetric. It may be enough to just use the hypothesis->text vector. We will explore this possibility later."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We need a couple of little functions for Lambda layers to normalize and aggregate weights:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {},
- "outputs": [],
- "source": [
- "def normalizer(axis):\n",
- " def _normalize(att_weights):\n",
- " exp_weights = K.exp(att_weights)\n",
- " sum_weights = K.sum(exp_weights, axis=axis, keepdims=True)\n",
- " return exp_weights/sum_weights\n",
- " return _normalize\n",
- "\n",
- "def sum_word(x):\n",
- " return K.sum(x, axis=1)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {},
- "outputs": [],
- "source": [
- "def build_model(vectors, max_length, num_hidden, num_classes, projected_dim, entail_dir='both'):\n",
- " input1 = layers.Input(shape=(max_length,), dtype='int32', name='words1')\n",
- " input2 = layers.Input(shape=(max_length,), dtype='int32', name='words2')\n",
- " \n",
- " # embeddings (projected)\n",
- " embed = create_embedding(vectors, max_length, projected_dim)\n",
- " \n",
- " a = embed(input1)\n",
- " b = embed(input2)\n",
- " \n",
- " # step 1: attend\n",
- " F = create_feedforward(num_hidden)\n",
- " att_weights = layers.dot([F(a), F(b)], axes=-1)\n",
- " \n",
- " G = create_feedforward(num_hidden)\n",
- " \n",
- " if entail_dir == 'both':\n",
- " norm_weights_a = layers.Lambda(normalizer(1))(att_weights)\n",
- " norm_weights_b = layers.Lambda(normalizer(2))(att_weights)\n",
- " alpha = layers.dot([norm_weights_a, a], axes=1)\n",
- " beta = layers.dot([norm_weights_b, b], axes=1)\n",
- "\n",
- " # step 2: compare\n",
- " comp1 = layers.concatenate([a, beta])\n",
- " comp2 = layers.concatenate([b, alpha])\n",
- " v1 = layers.TimeDistributed(G)(comp1)\n",
- " v2 = layers.TimeDistributed(G)(comp2)\n",
- "\n",
- " # step 3: aggregate\n",
- " v1_sum = layers.Lambda(sum_word)(v1)\n",
- " v2_sum = layers.Lambda(sum_word)(v2)\n",
- " concat = layers.concatenate([v1_sum, v2_sum])\n",
- " elif entail_dir == 'left':\n",
- " norm_weights_a = layers.Lambda(normalizer(1))(att_weights)\n",
- " alpha = layers.dot([norm_weights_a, a], axes=1)\n",
- " comp2 = layers.concatenate([b, alpha])\n",
- " v2 = layers.TimeDistributed(G)(comp2)\n",
- " v2_sum = layers.Lambda(sum_word)(v2)\n",
- " concat = v2_sum\n",
- " else:\n",
- " norm_weights_b = layers.Lambda(normalizer(2))(att_weights)\n",
- " beta = layers.dot([norm_weights_b, b], axes=1)\n",
- " comp1 = layers.concatenate([a, beta])\n",
- " v1 = layers.TimeDistributed(G)(comp1)\n",
- " v1_sum = layers.Lambda(sum_word)(v1)\n",
- " concat = v1_sum\n",
- " \n",
- " H = create_feedforward(num_hidden)\n",
- " out = H(concat)\n",
- " out = layers.Dense(num_classes, activation='softmax')(out)\n",
- " \n",
- " model = Model([input1, input2], out)\n",
- " \n",
- " model.compile(optimizer='adam',\n",
- " loss='categorical_crossentropy',\n",
- " metrics=['accuracy'])\n",
- " return model\n",
- " \n",
- " \n",
- " "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "__________________________________________________________________________________________________\n",
- "Layer (type) Output Shape Param # Connected to \n",
- "==================================================================================================\n",
- "words1 (InputLayer) (None, 50) 0 \n",
- "__________________________________________________________________________________________________\n",
- "words2 (InputLayer) (None, 50) 0 \n",
- "__________________________________________________________________________________________________\n",
- "sequential_1 (Sequential) (None, 50, 200) 321381600 words1[0][0] \n",
- " words2[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "sequential_2 (Sequential) (None, 50, 200) 80400 sequential_1[1][0] \n",
- " sequential_1[2][0] \n",
- "__________________________________________________________________________________________________\n",
- "dot_1 (Dot) (None, 50, 50) 0 sequential_2[1][0] \n",
- " sequential_2[2][0] \n",
- "__________________________________________________________________________________________________\n",
- "lambda_2 (Lambda) (None, 50, 50) 0 dot_1[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "lambda_1 (Lambda) (None, 50, 50) 0 dot_1[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "dot_3 (Dot) (None, 50, 200) 0 lambda_2[0][0] \n",
- " sequential_1[2][0] \n",
- "__________________________________________________________________________________________________\n",
- "dot_2 (Dot) (None, 50, 200) 0 lambda_1[0][0] \n",
- " sequential_1[1][0] \n",
- "__________________________________________________________________________________________________\n",
- "concatenate_1 (Concatenate) (None, 50, 400) 0 sequential_1[1][0] \n",
- " dot_3[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "concatenate_2 (Concatenate) (None, 50, 400) 0 sequential_1[2][0] \n",
- " dot_2[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "time_distributed_2 (TimeDistrib (None, 50, 200) 120400 concatenate_1[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "time_distributed_3 (TimeDistrib (None, 50, 200) 120400 concatenate_2[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "lambda_3 (Lambda) (None, 200) 0 time_distributed_2[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "lambda_4 (Lambda) (None, 200) 0 time_distributed_3[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "concatenate_3 (Concatenate) (None, 400) 0 lambda_3[0][0] \n",
- " lambda_4[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "sequential_4 (Sequential) (None, 200) 120400 concatenate_3[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "dense_8 (Dense) (None, 3) 603 sequential_4[1][0] \n",
- "==================================================================================================\n",
- "Total params: 321,703,403\n",
- "Trainable params: 381,803\n",
- "Non-trainable params: 321,321,600\n",
- "__________________________________________________________________________________________________\n"
- ]
- }
- ],
- "source": [
- "K.clear_session()\n",
- "m = build_model(sem_vectors, 50, 200, 3, 200)\n",
- "m.summary()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The number of trainable parameters, ~381k, is the number given by Parikh et al, so we're on the right track."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Training the model"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Parikh et al use tiny batches of 4, training for 50MM batches, which amounts to around 500 epochs. Here we'll use large batches to better use the GPU, and train for fewer epochs -- for purposes of this experiment."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {
- "scrolled": true
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Train on 549367 samples, validate on 9824 samples\n",
- "Epoch 1/50\n",
- "549367/549367 [==============================] - 34s 62us/step - loss: 0.7599 - acc: 0.6617 - val_loss: 0.5396 - val_acc: 0.7861\n",
- "Epoch 2/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.5611 - acc: 0.7763 - val_loss: 0.4892 - val_acc: 0.8085\n",
- "Epoch 3/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.5212 - acc: 0.7948 - val_loss: 0.4574 - val_acc: 0.8261\n",
- "Epoch 4/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4986 - acc: 0.8045 - val_loss: 0.4410 - val_acc: 0.8274\n",
- "Epoch 5/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4819 - acc: 0.8114 - val_loss: 0.4224 - val_acc: 0.8383\n",
- "Epoch 6/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4714 - acc: 0.8166 - val_loss: 0.4200 - val_acc: 0.8379\n",
- "Epoch 7/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4633 - acc: 0.8203 - val_loss: 0.4098 - val_acc: 0.8457\n",
- "Epoch 8/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4558 - acc: 0.8232 - val_loss: 0.4114 - val_acc: 0.8415\n",
- "Epoch 9/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4508 - acc: 0.8250 - val_loss: 0.4062 - val_acc: 0.8477\n",
- "Epoch 10/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4433 - acc: 0.8286 - val_loss: 0.3982 - val_acc: 0.8486\n",
- "Epoch 11/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4388 - acc: 0.8307 - val_loss: 0.3953 - val_acc: 0.8497\n",
- "Epoch 12/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4351 - acc: 0.8321 - val_loss: 0.3973 - val_acc: 0.8522\n",
- "Epoch 13/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4309 - acc: 0.8342 - val_loss: 0.3939 - val_acc: 0.8539\n",
- "Epoch 14/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4269 - acc: 0.8355 - val_loss: 0.3932 - val_acc: 0.8517\n",
- "Epoch 15/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4247 - acc: 0.8369 - val_loss: 0.3938 - val_acc: 0.8515\n",
- "Epoch 16/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4208 - acc: 0.8379 - val_loss: 0.3936 - val_acc: 0.8504\n",
- "Epoch 17/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4194 - acc: 0.8390 - val_loss: 0.3885 - val_acc: 0.8560\n",
- "Epoch 18/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4162 - acc: 0.8402 - val_loss: 0.3874 - val_acc: 0.8561\n",
- "Epoch 19/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4140 - acc: 0.8409 - val_loss: 0.3889 - val_acc: 0.8545\n",
- "Epoch 20/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4114 - acc: 0.8426 - val_loss: 0.3864 - val_acc: 0.8583\n",
- "Epoch 21/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4092 - acc: 0.8430 - val_loss: 0.3870 - val_acc: 0.8561\n",
- "Epoch 22/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4062 - acc: 0.8442 - val_loss: 0.3852 - val_acc: 0.8577\n",
- "Epoch 23/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4050 - acc: 0.8450 - val_loss: 0.3850 - val_acc: 0.8578\n",
- "Epoch 24/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4035 - acc: 0.8455 - val_loss: 0.3825 - val_acc: 0.8555\n",
- "Epoch 25/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.4018 - acc: 0.8460 - val_loss: 0.3837 - val_acc: 0.8573\n",
- "Epoch 26/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3989 - acc: 0.8476 - val_loss: 0.3843 - val_acc: 0.8599\n",
- "Epoch 27/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3979 - acc: 0.8481 - val_loss: 0.3841 - val_acc: 0.8589\n",
- "Epoch 28/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3967 - acc: 0.8484 - val_loss: 0.3811 - val_acc: 0.8575\n",
- "Epoch 29/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3956 - acc: 0.8492 - val_loss: 0.3829 - val_acc: 0.8589\n",
- "Epoch 30/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3938 - acc: 0.8499 - val_loss: 0.3859 - val_acc: 0.8562\n",
- "Epoch 31/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3925 - acc: 0.8500 - val_loss: 0.3798 - val_acc: 0.8587\n",
- "Epoch 32/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3906 - acc: 0.8509 - val_loss: 0.3834 - val_acc: 0.8569\n",
- "Epoch 33/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3893 - acc: 0.8511 - val_loss: 0.3806 - val_acc: 0.8588\n",
- "Epoch 34/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3885 - acc: 0.8515 - val_loss: 0.3828 - val_acc: 0.8603\n",
- "Epoch 35/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3879 - acc: 0.8520 - val_loss: 0.3800 - val_acc: 0.8594\n",
- "Epoch 36/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3860 - acc: 0.8530 - val_loss: 0.3796 - val_acc: 0.8577\n",
- "Epoch 37/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3856 - acc: 0.8532 - val_loss: 0.3857 - val_acc: 0.8591\n",
- "Epoch 38/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3838 - acc: 0.8535 - val_loss: 0.3835 - val_acc: 0.8603\n",
- "Epoch 39/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3830 - acc: 0.8543 - val_loss: 0.3830 - val_acc: 0.8599\n",
- "Epoch 40/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3818 - acc: 0.8548 - val_loss: 0.3832 - val_acc: 0.8559\n",
- "Epoch 41/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3806 - acc: 0.8551 - val_loss: 0.3845 - val_acc: 0.8553\n",
- "Epoch 42/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3803 - acc: 0.8550 - val_loss: 0.3789 - val_acc: 0.8617\n",
- "Epoch 43/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3791 - acc: 0.8556 - val_loss: 0.3835 - val_acc: 0.8580\n",
- "Epoch 44/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3778 - acc: 0.8565 - val_loss: 0.3799 - val_acc: 0.8580\n",
- "Epoch 45/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3766 - acc: 0.8571 - val_loss: 0.3790 - val_acc: 0.8625\n",
- "Epoch 46/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3770 - acc: 0.8569 - val_loss: 0.3820 - val_acc: 0.8590\n",
- "Epoch 47/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3761 - acc: 0.8573 - val_loss: 0.3831 - val_acc: 0.8581\n",
- "Epoch 48/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3739 - acc: 0.8579 - val_loss: 0.3828 - val_acc: 0.8599\n",
- "Epoch 49/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3738 - acc: 0.8577 - val_loss: 0.3785 - val_acc: 0.8590\n",
- "Epoch 50/50\n",
- "549367/549367 [==============================] - 33s 60us/step - loss: 0.3726 - acc: 0.8580 - val_loss: 0.3820 - val_acc: 0.8585\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 19,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "m.fit([text_vectors, hypothesis_vectors], labels, batch_size=1024, epochs=50,validation_data=([text_vectors_test, hypothesis_vectors_test], labels_test))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The result is broadly in the region reported by Parikh et al: ~86 vs 86.3%. The small difference might be accounted by differences in `max_length` (here set at 50), in the training regime, and that here we use Keras' built-in validation splitting rather than the SNLI test set."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Experiment: the asymmetric model"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "It was suggested earlier that, based on the semantics of entailment, the vector representing the strength of association between the hypothesis to the text is all that is needed for classifying the entailment.\n",
- "\n",
- "The following model removes consideration of the complementary vector (text to hypothesis) from the computation. This will decrease the paramater count slightly, because the final dense layers will be smaller, and speed up the forward pass when predicting, because fewer calculations will be needed."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "__________________________________________________________________________________________________\n",
- "Layer (type) Output Shape Param # Connected to \n",
- "==================================================================================================\n",
- "words2 (InputLayer) (None, 50) 0 \n",
- "__________________________________________________________________________________________________\n",
- "words1 (InputLayer) (None, 50) 0 \n",
- "__________________________________________________________________________________________________\n",
- "sequential_5 (Sequential) (None, 50, 200) 321381600 words1[0][0] \n",
- " words2[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "sequential_6 (Sequential) (None, 50, 200) 80400 sequential_5[1][0] \n",
- " sequential_5[2][0] \n",
- "__________________________________________________________________________________________________\n",
- "dot_4 (Dot) (None, 50, 50) 0 sequential_6[1][0] \n",
- " sequential_6[2][0] \n",
- "__________________________________________________________________________________________________\n",
- "lambda_5 (Lambda) (None, 50, 50) 0 dot_4[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "dot_5 (Dot) (None, 50, 200) 0 lambda_5[0][0] \n",
- " sequential_5[1][0] \n",
- "__________________________________________________________________________________________________\n",
- "concatenate_4 (Concatenate) (None, 50, 400) 0 sequential_5[2][0] \n",
- " dot_5[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "time_distributed_5 (TimeDistrib (None, 50, 200) 120400 concatenate_4[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "lambda_6 (Lambda) (None, 200) 0 time_distributed_5[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "sequential_8 (Sequential) (None, 200) 80400 lambda_6[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "dense_16 (Dense) (None, 3) 603 sequential_8[1][0] \n",
- "==================================================================================================\n",
- "Total params: 321,663,403\n",
- "Trainable params: 341,803\n",
- "Non-trainable params: 321,321,600\n",
- "__________________________________________________________________________________________________\n"
- ]
- }
- ],
- "source": [
- "m1 = build_model(sem_vectors, 50, 200, 3, 200, 'left')\n",
- "m1.summary()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The parameter count has indeed decreased by 40,000, corresponding to the 200x200 smaller H function."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Train on 549367 samples, validate on 9824 samples\n",
- "Epoch 1/50\n",
- "549367/549367 [==============================] - 25s 46us/step - loss: 0.7331 - acc: 0.6770 - val_loss: 0.5257 - val_acc: 0.7936\n",
- "Epoch 2/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.5518 - acc: 0.7799 - val_loss: 0.4717 - val_acc: 0.8159\n",
- "Epoch 3/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.5147 - acc: 0.7967 - val_loss: 0.4449 - val_acc: 0.8278\n",
- "Epoch 4/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4948 - acc: 0.8060 - val_loss: 0.4326 - val_acc: 0.8344\n",
- "Epoch 5/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4814 - acc: 0.8122 - val_loss: 0.4247 - val_acc: 0.8359\n",
- "Epoch 6/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4712 - acc: 0.8162 - val_loss: 0.4143 - val_acc: 0.8430\n",
- "Epoch 7/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4635 - acc: 0.8205 - val_loss: 0.4172 - val_acc: 0.8401\n",
- "Epoch 8/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4570 - acc: 0.8223 - val_loss: 0.4106 - val_acc: 0.8422\n",
- "Epoch 9/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4505 - acc: 0.8259 - val_loss: 0.4043 - val_acc: 0.8451\n",
- "Epoch 10/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4459 - acc: 0.8280 - val_loss: 0.4050 - val_acc: 0.8467\n",
- "Epoch 11/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4405 - acc: 0.8300 - val_loss: 0.3975 - val_acc: 0.8481\n",
- "Epoch 12/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4360 - acc: 0.8324 - val_loss: 0.4026 - val_acc: 0.8496\n",
- "Epoch 13/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4327 - acc: 0.8334 - val_loss: 0.4024 - val_acc: 0.8471\n",
- "Epoch 14/50\n",
- "549367/549367 [==============================] - 24s 45us/step - loss: 0.4293 - acc: 0.8350 - val_loss: 0.3955 - val_acc: 0.8496\n",
- "Epoch 15/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4263 - acc: 0.8369 - val_loss: 0.3980 - val_acc: 0.8490\n",
- "Epoch 16/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4236 - acc: 0.8377 - val_loss: 0.3958 - val_acc: 0.8496\n",
- "Epoch 17/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4213 - acc: 0.8384 - val_loss: 0.3954 - val_acc: 0.8496\n",
- "Epoch 18/50\n",
- "549367/549367 [==============================] - 24s 45us/step - loss: 0.4187 - acc: 0.8394 - val_loss: 0.3929 - val_acc: 0.8514\n",
- "Epoch 19/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4157 - acc: 0.8409 - val_loss: 0.3939 - val_acc: 0.8507\n",
- "Epoch 20/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4135 - acc: 0.8417 - val_loss: 0.3953 - val_acc: 0.8522\n",
- "Epoch 21/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4122 - acc: 0.8424 - val_loss: 0.3974 - val_acc: 0.8506\n",
- "Epoch 22/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4099 - acc: 0.8435 - val_loss: 0.3918 - val_acc: 0.8522\n",
- "Epoch 23/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4075 - acc: 0.8443 - val_loss: 0.3901 - val_acc: 0.8513\n",
- "Epoch 24/50\n",
- "549367/549367 [==============================] - 24s 44us/step - loss: 0.4067 - acc: 0.8447 - val_loss: 0.3885 - val_acc: 0.8543\n",
- "Epoch 25/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4047 - acc: 0.8454 - val_loss: 0.3846 - val_acc: 0.8531\n",
- "Epoch 26/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.4031 - acc: 0.8461 - val_loss: 0.3864 - val_acc: 0.8562\n",
- "Epoch 27/50\n",
- "549367/549367 [==============================] - 24s 45us/step - loss: 0.4020 - acc: 0.8467 - val_loss: 0.3874 - val_acc: 0.8546\n",
- "Epoch 28/50\n",
- "549367/549367 [==============================] - 24s 45us/step - loss: 0.4001 - acc: 0.8473 - val_loss: 0.3848 - val_acc: 0.8534\n",
- "Epoch 29/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3991 - acc: 0.8479 - val_loss: 0.3865 - val_acc: 0.8562\n",
- "Epoch 30/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3976 - acc: 0.8484 - val_loss: 0.3833 - val_acc: 0.8574\n",
- "Epoch 31/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3961 - acc: 0.8487 - val_loss: 0.3846 - val_acc: 0.8585\n",
- "Epoch 32/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3942 - acc: 0.8498 - val_loss: 0.3805 - val_acc: 0.8573\n",
- "Epoch 33/50\n",
- "549367/549367 [==============================] - 24s 44us/step - loss: 0.3935 - acc: 0.8503 - val_loss: 0.3856 - val_acc: 0.8579\n",
- "Epoch 34/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3923 - acc: 0.8507 - val_loss: 0.3829 - val_acc: 0.8560\n",
- "Epoch 35/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3920 - acc: 0.8508 - val_loss: 0.3864 - val_acc: 0.8575\n",
- "Epoch 36/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3907 - acc: 0.8516 - val_loss: 0.3873 - val_acc: 0.8563\n",
- "Epoch 37/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3891 - acc: 0.8519 - val_loss: 0.3850 - val_acc: 0.8570\n",
- "Epoch 38/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3872 - acc: 0.8522 - val_loss: 0.3815 - val_acc: 0.8591\n",
- "Epoch 39/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3887 - acc: 0.8520 - val_loss: 0.3829 - val_acc: 0.8590\n",
- "Epoch 40/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3868 - acc: 0.8531 - val_loss: 0.3807 - val_acc: 0.8600\n",
- "Epoch 41/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3859 - acc: 0.8537 - val_loss: 0.3832 - val_acc: 0.8574\n",
- "Epoch 42/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3849 - acc: 0.8537 - val_loss: 0.3850 - val_acc: 0.8576\n",
- "Epoch 43/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3834 - acc: 0.8541 - val_loss: 0.3825 - val_acc: 0.8563\n",
- "Epoch 44/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3829 - acc: 0.8548 - val_loss: 0.3844 - val_acc: 0.8540\n",
- "Epoch 45/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3816 - acc: 0.8552 - val_loss: 0.3841 - val_acc: 0.8559\n",
- "Epoch 46/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3816 - acc: 0.8549 - val_loss: 0.3880 - val_acc: 0.8567\n",
- "Epoch 47/50\n",
- "549367/549367 [==============================] - 24s 45us/step - loss: 0.3799 - acc: 0.8559 - val_loss: 0.3767 - val_acc: 0.8635\n",
- "Epoch 48/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3800 - acc: 0.8560 - val_loss: 0.3786 - val_acc: 0.8563\n",
- "Epoch 49/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3781 - acc: 0.8563 - val_loss: 0.3812 - val_acc: 0.8596\n",
- "Epoch 50/50\n",
- "549367/549367 [==============================] - 25s 45us/step - loss: 0.3788 - acc: 0.8560 - val_loss: 0.3782 - val_acc: 0.8601\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "m1.fit([text_vectors, hypothesis_vectors], labels, batch_size=1024, epochs=50,validation_data=([text_vectors_test, hypothesis_vectors_test], labels_test))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "This model performs the same as the slightly more complex model that evaluates alignments in both directions. Note also that processing time is improved, from 64 down to 48 microseconds per step. \n",
- "\n",
- "Let's now look at an asymmetric model that evaluates text to hypothesis comparisons. The prediction is that such a model will correctly classify a decent proportion of the exemplars, but not as accurately as the previous two.\n",
- "\n",
- "We'll just use 10 epochs for expediency."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 96,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "__________________________________________________________________________________________________\n",
- "Layer (type) Output Shape Param # Connected to \n",
- "==================================================================================================\n",
- "words1 (InputLayer) (None, 50) 0 \n",
- "__________________________________________________________________________________________________\n",
- "words2 (InputLayer) (None, 50) 0 \n",
- "__________________________________________________________________________________________________\n",
- "sequential_13 (Sequential) (None, 50, 200) 321381600 words1[0][0] \n",
- " words2[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "sequential_14 (Sequential) (None, 50, 200) 80400 sequential_13[1][0] \n",
- " sequential_13[2][0] \n",
- "__________________________________________________________________________________________________\n",
- "dot_8 (Dot) (None, 50, 50) 0 sequential_14[1][0] \n",
- " sequential_14[2][0] \n",
- "__________________________________________________________________________________________________\n",
- "lambda_9 (Lambda) (None, 50, 50) 0 dot_8[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "dot_9 (Dot) (None, 50, 200) 0 lambda_9[0][0] \n",
- " sequential_13[2][0] \n",
- "__________________________________________________________________________________________________\n",
- "concatenate_6 (Concatenate) (None, 50, 400) 0 sequential_13[1][0] \n",
- " dot_9[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "time_distributed_9 (TimeDistrib (None, 50, 200) 120400 concatenate_6[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "lambda_10 (Lambda) (None, 200) 0 time_distributed_9[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "sequential_16 (Sequential) (None, 200) 80400 lambda_10[0][0] \n",
- "__________________________________________________________________________________________________\n",
- "dense_32 (Dense) (None, 3) 603 sequential_16[1][0] \n",
- "==================================================================================================\n",
- "Total params: 321,663,403\n",
- "Trainable params: 341,803\n",
- "Non-trainable params: 321,321,600\n",
- "__________________________________________________________________________________________________\n"
- ]
- }
- ],
- "source": [
- "m2 = build_model(sem_vectors, 50, 200, 3, 200, 'right')\n",
- "m2.summary()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 97,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Train on 455226 samples, validate on 113807 samples\n",
- "Epoch 1/10\n",
- "455226/455226 [==============================] - 22s 49us/step - loss: 0.8920 - acc: 0.5771 - val_loss: 0.8001 - val_acc: 0.6435\n",
- "Epoch 2/10\n",
- "455226/455226 [==============================] - 22s 47us/step - loss: 0.7808 - acc: 0.6553 - val_loss: 0.7267 - val_acc: 0.6855\n",
- "Epoch 3/10\n",
- "455226/455226 [==============================] - 22s 47us/step - loss: 0.7329 - acc: 0.6825 - val_loss: 0.6966 - val_acc: 0.7006\n",
- "Epoch 4/10\n",
- "455226/455226 [==============================] - 22s 47us/step - loss: 0.7055 - acc: 0.6978 - val_loss: 0.6713 - val_acc: 0.7150\n",
- "Epoch 5/10\n",
- "455226/455226 [==============================] - 22s 47us/step - loss: 0.6862 - acc: 0.7081 - val_loss: 0.6533 - val_acc: 0.7253\n",
- "Epoch 6/10\n",
- "455226/455226 [==============================] - 21s 47us/step - loss: 0.6694 - acc: 0.7179 - val_loss: 0.6472 - val_acc: 0.7277\n",
- "Epoch 7/10\n",
- "455226/455226 [==============================] - 22s 47us/step - loss: 0.6555 - acc: 0.7252 - val_loss: 0.6338 - val_acc: 0.7347\n",
- "Epoch 8/10\n",
- "455226/455226 [==============================] - 22s 48us/step - loss: 0.6434 - acc: 0.7310 - val_loss: 0.6246 - val_acc: 0.7385\n",
- "Epoch 9/10\n",
- "455226/455226 [==============================] - 22s 47us/step - loss: 0.6325 - acc: 0.7367 - val_loss: 0.6164 - val_acc: 0.7424\n",
- "Epoch 10/10\n",
- "455226/455226 [==============================] - 22s 47us/step - loss: 0.6216 - acc: 0.7426 - val_loss: 0.6082 - val_acc: 0.7478\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 97,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "m2.fit([text_vectors, hypothesis_vectors], labels, batch_size=1024, epochs=10,validation_split=.2)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Comparing this fit to the validation accuracy of the previous two models after 10 epochs, we observe that its accuracy is roughly 10% lower.\n",
- "\n",
- "It is reassuring that the neural modeling here reproduces what we know from the semantics of natural language!"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.5.2"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/pipeline/custom_attr_methods.py b/examples/pipeline/custom_attr_methods.py
deleted file mode 100644
index 7f97bc1c3..000000000
--- a/examples/pipeline/custom_attr_methods.py
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-"""This example contains several snippets of methods that can be set via custom
-Doc, Token or Span attributes in spaCy v2.0. Attribute methods act like
-they're "bound" to the object and are partially applied – i.e. the object
-they're called on is passed in as the first argument.
-
-* Custom pipeline components: https://spacy.io//usage/processing-pipelines#custom-components
-
-Compatible with: spaCy v2.0.0+
-Last tested with: v2.1.0
-"""
-from __future__ import unicode_literals, print_function
-
-import plac
-from spacy.lang.en import English
-from spacy.tokens import Doc, Span
-from spacy import displacy
-from pathlib import Path
-
-
-@plac.annotations(
- output_dir=("Output directory for saved HTML", "positional", None, Path)
-)
-def main(output_dir=None):
- nlp = English() # start off with blank English class
-
- Doc.set_extension("overlap", method=overlap_tokens)
- doc1 = nlp("Peach emoji is where it has always been.")
- doc2 = nlp("Peach is the superior emoji.")
- print("Text 1:", doc1.text)
- print("Text 2:", doc2.text)
- print("Overlapping tokens:", doc1._.overlap(doc2))
-
- Doc.set_extension("to_html", method=to_html)
- doc = nlp("This is a sentence about Apple.")
- # add entity manually for demo purposes, to make it work without a model
- doc.ents = [Span(doc, 5, 6, label=nlp.vocab.strings["ORG"])]
- print("Text:", doc.text)
- doc._.to_html(output=output_dir, style="ent")
-
-
-def to_html(doc, output="/tmp", style="dep"):
- """Doc method extension for saving the current state as a displaCy
- visualization.
- """
- # generate filename from first six non-punct tokens
- file_name = "-".join([w.text for w in doc[:6] if not w.is_punct]) + ".html"
- html = displacy.render(doc, style=style, page=True) # render markup
- if output is not None:
- output_path = Path(output)
- if not output_path.exists():
- output_path.mkdir()
- output_file = Path(output) / file_name
- output_file.open("w", encoding="utf-8").write(html) # save to file
- print("Saved HTML to {}".format(output_file))
- else:
- print(html)
-
-
-def overlap_tokens(doc, other_doc):
- """Get the tokens from the original Doc that are also in the comparison Doc.
- """
- overlap = []
- other_tokens = [token.text for token in other_doc]
- for token in doc:
- if token.text in other_tokens:
- overlap.append(token)
- return overlap
-
-
-if __name__ == "__main__":
- plac.call(main)
-
- # Expected output:
- # Text 1: Peach emoji is where it has always been.
- # Text 2: Peach is the superior emoji.
- # Overlapping tokens: [Peach, emoji, is, .]
diff --git a/examples/pipeline/custom_component_countries_api.py b/examples/pipeline/custom_component_countries_api.py
deleted file mode 100644
index 241c0af37..000000000
--- a/examples/pipeline/custom_component_countries_api.py
+++ /dev/null
@@ -1,130 +0,0 @@
-#!/usr/bin/env python
-# coding: utf8
-"""Example of a spaCy v2.0 pipeline component that requests all countries via
-the REST Countries API, merges country names into one token, assigns entity
-labels and sets attributes on country tokens, e.g. the capital and lat/lng
-coordinates. Can be extended with more details from the API.
-
-* REST Countries API: https://restcountries.eu (Mozilla Public License MPL 2.0)
-* Custom pipeline components: https://spacy.io//usage/processing-pipelines#custom-components
-
-Compatible with: spaCy v2.0.0+
-Last tested with: v2.1.0
-Prerequisites: pip install requests
-"""
-from __future__ import unicode_literals, print_function
-
-import requests
-import plac
-from spacy.lang.en import English
-from spacy.matcher import PhraseMatcher
-from spacy.tokens import Doc, Span, Token
-
-
-def main():
- # For simplicity, we start off with only the blank English Language class
- # and no model or pre-defined pipeline loaded.
- nlp = English()
- rest_countries = RESTCountriesComponent(nlp) # initialise component
- nlp.add_pipe(rest_countries) # add it to the pipeline
- doc = nlp("Some text about Colombia and the Czech Republic")
- print("Pipeline", nlp.pipe_names) # pipeline contains component name
- print("Doc has countries", doc._.has_country) # Doc contains countries
- for token in doc:
- if token._.is_country:
- print(
- token.text,
- token._.country_capital,
- token._.country_latlng,
- token._.country_flag,
- ) # country data
- print("Entities", [(e.text, e.label_) for e in doc.ents]) # entities
-
-
-class RESTCountriesComponent(object):
- """spaCy v2.0 pipeline component that requests all countries via
- the REST Countries API, merges country names into one token, assigns entity
- labels and sets attributes on country tokens.
- """
-
- name = "rest_countries" # component name, will show up in the pipeline
-
- def __init__(self, nlp, label="GPE"):
- """Initialise the pipeline component. The shared nlp instance is used
- to initialise the matcher with the shared vocab, get the label ID and
- generate Doc objects as phrase match patterns.
- """
- # Make request once on initialisation and store the data
- r = requests.get("https://restcountries.eu/rest/v2/all")
- r.raise_for_status() # make sure requests raises an error if it fails
- countries = r.json()
-
- # Convert API response to dict keyed by country name for easy lookup
- # This could also be extended using the alternative and foreign language
- # names provided by the API
- self.countries = {c["name"]: c for c in countries}
- self.label = nlp.vocab.strings[label] # get entity label ID
-
- # Set up the PhraseMatcher with Doc patterns for each country name
- patterns = [nlp(c) for c in self.countries.keys()]
- self.matcher = PhraseMatcher(nlp.vocab)
- self.matcher.add("COUNTRIES", None, *patterns)
-
- # Register attribute on the Token. We'll be overwriting this based on
- # the matches, so we're only setting a default value, not a getter.
- # If no default value is set, it defaults to None.
- Token.set_extension("is_country", default=False)
- Token.set_extension("country_capital", default=False)
- Token.set_extension("country_latlng", default=False)
- Token.set_extension("country_flag", default=False)
-
- # Register attributes on Doc and Span via a getter that checks if one of
- # the contained tokens is set to is_country == True.
- Doc.set_extension("has_country", getter=self.has_country)
- Span.set_extension("has_country", getter=self.has_country)
-
- def __call__(self, doc):
- """Apply the pipeline component on a Doc object and modify it if matches
- are found. Return the Doc, so it can be processed by the next component
- in the pipeline, if available.
- """
- matches = self.matcher(doc)
- spans = [] # keep the spans for later so we can merge them afterwards
- for _, start, end in matches:
- # Generate Span representing the entity & set label
- entity = Span(doc, start, end, label=self.label)
- spans.append(entity)
- # Set custom attribute on each token of the entity
- # Can be extended with other data returned by the API, like
- # currencies, country code, flag, calling code etc.
- for token in entity:
- token._.set("is_country", True)
- token._.set("country_capital", self.countries[entity.text]["capital"])
- token._.set("country_latlng", self.countries[entity.text]["latlng"])
- token._.set("country_flag", self.countries[entity.text]["flag"])
- # Overwrite doc.ents and add entity – be careful not to replace!
- doc.ents = list(doc.ents) + [entity]
- for span in spans:
- # Iterate over all spans and merge them into one token. This is done
- # after setting the entities – otherwise, it would cause mismatched
- # indices!
- span.merge()
- return doc # don't forget to return the Doc!
-
- def has_country(self, tokens):
- """Getter for Doc and Span attributes. Returns True if one of the tokens
- is a country. Since the getter is only called when we access the
- attribute, we can refer to the Token's 'is_country' attribute here,
- which is already set in the processing step."""
- return any([t._.get("is_country") for t in tokens])
-
-
-if __name__ == "__main__":
- plac.call(main)
-
- # Expected output:
- # Pipeline ['rest_countries']
- # Doc has countries True
- # Colombia Bogotá [4.0, -72.0] https://restcountries.eu/data/col.svg
- # Czech Republic Prague [49.75, 15.5] https://restcountries.eu/data/cze.svg
- # Entities [('Colombia', 'GPE'), ('Czech Republic', 'GPE')]
diff --git a/examples/pipeline/custom_component_entities.py b/examples/pipeline/custom_component_entities.py
deleted file mode 100644
index a53b688b0..000000000
--- a/examples/pipeline/custom_component_entities.py
+++ /dev/null
@@ -1,115 +0,0 @@
-#!/usr/bin/env python
-# coding: utf8
-"""Example of a spaCy v2.0 pipeline component that sets entity annotations
-based on list of single or multiple-word company names. Companies are
-labelled as ORG and their spans are merged into one token. Additionally,
-._.has_tech_org and ._.is_tech_org is set on the Doc/Span and Token
-respectively.
-
-* Custom pipeline components: https://spacy.io//usage/processing-pipelines#custom-components
-
-Compatible with: spaCy v2.0.0+
-Last tested with: v2.1.0
-"""
-from __future__ import unicode_literals, print_function
-
-import plac
-from spacy.lang.en import English
-from spacy.matcher import PhraseMatcher
-from spacy.tokens import Doc, Span, Token
-
-
-@plac.annotations(
- text=("Text to process", "positional", None, str),
- companies=("Names of technology companies", "positional", None, str),
-)
-def main(text="Alphabet Inc. is the company behind Google.", *companies):
- # For simplicity, we start off with only the blank English Language class
- # and no model or pre-defined pipeline loaded.
- nlp = English()
- if not companies: # set default companies if none are set via args
- companies = ["Alphabet Inc.", "Google", "Netflix", "Apple"] # etc.
- component = TechCompanyRecognizer(nlp, companies) # initialise component
- nlp.add_pipe(component, last=True) # add last to the pipeline
-
- doc = nlp(text)
- print("Pipeline", nlp.pipe_names) # pipeline contains component name
- print("Tokens", [t.text for t in doc]) # company names from the list are merged
- print("Doc has_tech_org", doc._.has_tech_org) # Doc contains tech orgs
- print("Token 0 is_tech_org", doc[0]._.is_tech_org) # "Alphabet Inc." is a tech org
- print("Token 1 is_tech_org", doc[1]._.is_tech_org) # "is" is not
- print("Entities", [(e.text, e.label_) for e in doc.ents]) # all orgs are entities
-
-
-class TechCompanyRecognizer(object):
- """Example of a spaCy v2.0 pipeline component that sets entity annotations
- based on list of single or multiple-word company names. Companies are
- labelled as ORG and their spans are merged into one token. Additionally,
- ._.has_tech_org and ._.is_tech_org is set on the Doc/Span and Token
- respectively."""
-
- name = "tech_companies" # component name, will show up in the pipeline
-
- def __init__(self, nlp, companies=tuple(), label="ORG"):
- """Initialise the pipeline component. The shared nlp instance is used
- to initialise the matcher with the shared vocab, get the label ID and
- generate Doc objects as phrase match patterns.
- """
- self.label = nlp.vocab.strings[label] # get entity label ID
-
- # Set up the PhraseMatcher – it can now take Doc objects as patterns,
- # so even if the list of companies is long, it's very efficient
- patterns = [nlp(org) for org in companies]
- self.matcher = PhraseMatcher(nlp.vocab)
- self.matcher.add("TECH_ORGS", None, *patterns)
-
- # Register attribute on the Token. We'll be overwriting this based on
- # the matches, so we're only setting a default value, not a getter.
- Token.set_extension("is_tech_org", default=False)
-
- # Register attributes on Doc and Span via a getter that checks if one of
- # the contained tokens is set to is_tech_org == True.
- Doc.set_extension("has_tech_org", getter=self.has_tech_org)
- Span.set_extension("has_tech_org", getter=self.has_tech_org)
-
- def __call__(self, doc):
- """Apply the pipeline component on a Doc object and modify it if matches
- are found. Return the Doc, so it can be processed by the next component
- in the pipeline, if available.
- """
- matches = self.matcher(doc)
- spans = [] # keep the spans for later so we can merge them afterwards
- for _, start, end in matches:
- # Generate Span representing the entity & set label
- entity = Span(doc, start, end, label=self.label)
- spans.append(entity)
- # Set custom attribute on each token of the entity
- for token in entity:
- token._.set("is_tech_org", True)
- # Overwrite doc.ents and add entity – be careful not to replace!
- doc.ents = list(doc.ents) + [entity]
- for span in spans:
- # Iterate over all spans and merge them into one token. This is done
- # after setting the entities – otherwise, it would cause mismatched
- # indices!
- span.merge()
- return doc # don't forget to return the Doc!
-
- def has_tech_org(self, tokens):
- """Getter for Doc and Span attributes. Returns True if one of the tokens
- is a tech org. Since the getter is only called when we access the
- attribute, we can refer to the Token's 'is_tech_org' attribute here,
- which is already set in the processing step."""
- return any([t._.get("is_tech_org") for t in tokens])
-
-
-if __name__ == "__main__":
- plac.call(main)
-
- # Expected output:
- # Pipeline ['tech_companies']
- # Tokens ['Alphabet Inc.', 'is', 'the', 'company', 'behind', 'Google', '.']
- # Doc has_tech_org True
- # Token 0 is_tech_org True
- # Token 1 is_tech_org False
- # Entities [('Alphabet Inc.', 'ORG'), ('Google', 'ORG')]
diff --git a/examples/pipeline/custom_sentence_segmentation.py b/examples/pipeline/custom_sentence_segmentation.py
deleted file mode 100644
index ff59ab187..000000000
--- a/examples/pipeline/custom_sentence_segmentation.py
+++ /dev/null
@@ -1,61 +0,0 @@
-"""Example of adding a pipeline component to prohibit sentence boundaries
-before certain tokens.
-
-What we do is write to the token.is_sent_start attribute, which
-takes values in {True, False, None}. The default value None allows the parser
-to predict sentence segments. The value False prohibits the parser from inserting
-a sentence boundary before that token. Note that fixing the sentence segmentation
-should also improve the parse quality.
-
-The specific example here is drawn from https://github.com/explosion/spaCy/issues/2627
-Other versions of the model may not make the original mistake, so the specific
-example might not be apt for future versions.
-
-Compatible with: spaCy v2.0.0+
-Last tested with: v2.1.0
-"""
-import plac
-import spacy
-
-
-def prevent_sentence_boundaries(doc):
- for token in doc:
- if not can_be_sentence_start(token):
- token.is_sent_start = False
- return doc
-
-
-def can_be_sentence_start(token):
- if token.i == 0:
- return True
- # We're not checking for is_title here to ignore arbitrary titlecased
- # tokens within sentences
- # elif token.is_title:
- # return True
- elif token.nbor(-1).is_punct:
- return True
- elif token.nbor(-1).is_space:
- return True
- else:
- return False
-
-
-@plac.annotations(
- text=("The raw text to process", "positional", None, str),
- spacy_model=("spaCy model to use (with a parser)", "option", "m", str),
-)
-def main(text="Been here And I'm loving it.", spacy_model="en_core_web_lg"):
- print("Using spaCy model '{}'".format(spacy_model))
- print("Processing text '{}'".format(text))
- nlp = spacy.load(spacy_model)
- doc = nlp(text)
- sentences = [sent.text.strip() for sent in doc.sents]
- print("Before:", sentences)
- nlp.add_pipe(prevent_sentence_boundaries, before="parser")
- doc = nlp(text)
- sentences = [sent.text.strip() for sent in doc.sents]
- print("After:", sentences)
-
-
-if __name__ == "__main__":
- plac.call(main)
diff --git a/examples/pipeline/fix_space_entities.py b/examples/pipeline/fix_space_entities.py
deleted file mode 100644
index 686253eca..000000000
--- a/examples/pipeline/fix_space_entities.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python
-# coding: utf8
-"""Demonstrate adding a rule-based component that forces some tokens to not
-be entities, before the NER tagger is applied. This is used to hotfix the issue
-in https://github.com/explosion/spaCy/issues/2870, present as of spaCy v2.0.16.
-
-Compatible with: spaCy v2.0.0+
-Last tested with: v2.1.0
-"""
-from __future__ import unicode_literals
-
-import spacy
-from spacy.attrs import ENT_IOB
-
-
-def fix_space_tags(doc):
- ent_iobs = doc.to_array([ENT_IOB])
- for i, token in enumerate(doc):
- if token.is_space:
- # Sets 'O' tag (0 is None, so I is 1, O is 2)
- ent_iobs[i] = 2
- doc.from_array([ENT_IOB], ent_iobs.reshape((len(doc), 1)))
- return doc
-
-
-def main():
- nlp = spacy.load("en_core_web_sm")
- text = "This is some crazy test where I dont need an Apple Watch to make things bug"
- doc = nlp(text)
- print("Before", doc.ents)
- nlp.add_pipe(fix_space_tags, name="fix-ner", before="ner")
- doc = nlp(text)
- print("After", doc.ents)
-
-
-if __name__ == "__main__":
- main()
diff --git a/examples/pipeline/multi_processing.py b/examples/pipeline/multi_processing.py
deleted file mode 100644
index f0e437acf..000000000
--- a/examples/pipeline/multi_processing.py
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/usr/bin/env python
-# coding: utf8
-"""Example of multi-processing with Joblib. Here, we're exporting
-part-of-speech-tagged, true-cased, (very roughly) sentence-separated text, with
-each "sentence" on a newline, and spaces between tokens. Data is loaded from
-the IMDB movie reviews dataset and will be loaded automatically via Thinc's
-built-in dataset loader.
-
-Compatible with: spaCy v2.0.0+
-Last tested with: v2.1.0
-Prerequisites: pip install joblib
-"""
-from __future__ import print_function, unicode_literals
-
-from pathlib import Path
-from joblib import Parallel, delayed
-from functools import partial
-import thinc.extra.datasets
-import plac
-import spacy
-from spacy.util import minibatch
-
-
-@plac.annotations(
- output_dir=("Output directory", "positional", None, Path),
- model=("Model name (needs tagger)", "positional", None, str),
- n_jobs=("Number of workers", "option", "n", int),
- batch_size=("Batch-size for each process", "option", "b", int),
- limit=("Limit of entries from the dataset", "option", "l", int),
-)
-def main(output_dir, model="en_core_web_sm", n_jobs=4, batch_size=1000, limit=10000):
- nlp = spacy.load(model) # load spaCy model
- print("Loaded model '%s'" % model)
- if not output_dir.exists():
- output_dir.mkdir()
- # load and pre-process the IMBD dataset
- print("Loading IMDB data...")
- data, _ = thinc.extra.datasets.imdb()
- texts, _ = zip(*data[-limit:])
- print("Processing texts...")
- partitions = minibatch(texts, size=batch_size)
- executor = Parallel(n_jobs=n_jobs, backend="multiprocessing", prefer="processes")
- do = delayed(partial(transform_texts, nlp))
- tasks = (do(i, batch, output_dir) for i, batch in enumerate(partitions))
- executor(tasks)
-
-
-def transform_texts(nlp, batch_id, texts, output_dir):
- print(nlp.pipe_names)
- out_path = Path(output_dir) / ("%d.txt" % batch_id)
- if out_path.exists(): # return None in case same batch is called again
- return None
- print("Processing batch", batch_id)
- with out_path.open("w", encoding="utf8") as f:
- for doc in nlp.pipe(texts):
- f.write(" ".join(represent_word(w) for w in doc if not w.is_space))
- f.write("\n")
- print("Saved {} texts to {}.txt".format(len(texts), batch_id))
-
-
-def represent_word(word):
- text = word.text
- # True-case, i.e. try to normalize sentence-initial capitals.
- # Only do this if the lower-cased form is more probable.
- if (
- text.istitle()
- and is_sent_begin(word)
- and word.prob < word.doc.vocab[text.lower()].prob
- ):
- text = text.lower()
- return text + "|" + word.tag_
-
-
-def is_sent_begin(word):
- if word.i == 0:
- return True
- elif word.i >= 2 and word.nbor(-1).text in (".", "!", "?", "..."):
- return True
- else:
- return False
-
-
-if __name__ == "__main__":
- plac.call(main)
diff --git a/examples/streamlit_spacy.py b/examples/streamlit_spacy.py
deleted file mode 100644
index a2da123c2..000000000
--- a/examples/streamlit_spacy.py
+++ /dev/null
@@ -1,153 +0,0 @@
-# coding: utf-8
-"""
-Example of a Streamlit app for an interactive spaCy model visualizer. You can
-either download the script, or point streamlit run to the raw URL of this
-file. For more details, see https://streamlit.io.
-
-Installation:
-pip install streamlit
-python -m spacy download en_core_web_sm
-python -m spacy download en_core_web_md
-python -m spacy download de_core_news_sm
-
-Usage:
-streamlit run streamlit_spacy.py
-"""
-from __future__ import unicode_literals
-
-import streamlit as st
-import spacy
-from spacy import displacy
-import pandas as pd
-
-
-SPACY_MODEL_NAMES = ["en_core_web_sm", "en_core_web_md", "de_core_news_sm"]
-DEFAULT_TEXT = "Mark Zuckerberg is the CEO of Facebook."
-HTML_WRAPPER = """{}
"""
-
-
-@st.cache(allow_output_mutation=True)
-def load_model(name):
- return spacy.load(name)
-
-
-@st.cache(allow_output_mutation=True)
-def process_text(model_name, text):
- nlp = load_model(model_name)
- return nlp(text)
-
-
-st.sidebar.title("Interactive spaCy visualizer")
-st.sidebar.markdown(
- """
-Process text with [spaCy](https://spacy.io) models and visualize named entities,
-dependencies and more. Uses spaCy's built-in
-[displaCy](http://spacy.io/usage/visualizers) visualizer under the hood.
-"""
-)
-
-spacy_model = st.sidebar.selectbox("Model name", SPACY_MODEL_NAMES)
-model_load_state = st.info(f"Loading model '{spacy_model}'...")
-nlp = load_model(spacy_model)
-model_load_state.empty()
-
-text = st.text_area("Text to analyze", DEFAULT_TEXT)
-doc = process_text(spacy_model, text)
-
-if "parser" in nlp.pipe_names:
- st.header("Dependency Parse & Part-of-speech tags")
- st.sidebar.header("Dependency Parse")
- split_sents = st.sidebar.checkbox("Split sentences", value=True)
- collapse_punct = st.sidebar.checkbox("Collapse punctuation", value=True)
- collapse_phrases = st.sidebar.checkbox("Collapse phrases")
- compact = st.sidebar.checkbox("Compact mode")
- options = {
- "collapse_punct": collapse_punct,
- "collapse_phrases": collapse_phrases,
- "compact": compact,
- }
- docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]
- for sent in docs:
- html = displacy.render(sent, options=options)
- # Double newlines seem to mess with the rendering
- html = html.replace("\n\n", "\n")
- if split_sents and len(docs) > 1:
- st.markdown(f"> {sent.text}")
- st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
-
-if "ner" in nlp.pipe_names:
- st.header("Named Entities")
- st.sidebar.header("Named Entities")
- label_set = nlp.get_pipe("ner").labels
- labels = st.sidebar.multiselect(
- "Entity labels", options=label_set, default=list(label_set)
- )
- html = displacy.render(doc, style="ent", options={"ents": labels})
- # Newlines seem to mess with the rendering
- html = html.replace("\n", " ")
- st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
- attrs = ["text", "label_", "start", "end", "start_char", "end_char"]
- if "entity_linker" in nlp.pipe_names:
- attrs.append("kb_id_")
- data = [
- [str(getattr(ent, attr)) for attr in attrs]
- for ent in doc.ents
- if ent.label_ in labels
- ]
- df = pd.DataFrame(data, columns=attrs)
- st.dataframe(df)
-
-
-if "textcat" in nlp.pipe_names:
- st.header("Text Classification")
- st.markdown(f"> {text}")
- df = pd.DataFrame(doc.cats.items(), columns=("Label", "Score"))
- st.dataframe(df)
-
-
-vector_size = nlp.meta.get("vectors", {}).get("width", 0)
-if vector_size:
- st.header("Vectors & Similarity")
- st.code(nlp.meta["vectors"])
- text1 = st.text_input("Text or word 1", "apple")
- text2 = st.text_input("Text or word 2", "orange")
- doc1 = process_text(spacy_model, text1)
- doc2 = process_text(spacy_model, text2)
- similarity = doc1.similarity(doc2)
- if similarity > 0.5:
- st.success(similarity)
- else:
- st.error(similarity)
-
-st.header("Token attributes")
-
-if st.button("Show token attributes"):
- attrs = [
- "idx",
- "text",
- "lemma_",
- "pos_",
- "tag_",
- "dep_",
- "head",
- "ent_type_",
- "ent_iob_",
- "shape_",
- "is_alpha",
- "is_ascii",
- "is_digit",
- "is_punct",
- "like_num",
- ]
- data = [[str(getattr(token, attr)) for attr in attrs] for token in doc]
- df = pd.DataFrame(data, columns=attrs)
- st.dataframe(df)
-
-
-st.header("JSON Doc")
-if st.button("Show JSON Doc"):
- st.json(doc.to_json())
-
-st.header("JSON model meta")
-if st.button("Show JSON model meta"):
- st.json(nlp.meta)
diff --git a/examples/training/conllu-config.json b/examples/training/conllu-config.json
deleted file mode 100644
index 9a11dd96b..000000000
--- a/examples/training/conllu-config.json
+++ /dev/null
@@ -1 +0,0 @@
-{"nr_epoch": 3, "batch_size": 24, "dropout": 0.001, "vectors": 0, "multitask_tag": 0, "multitask_sent": 0}
diff --git a/examples/training/conllu.py b/examples/training/conllu.py
deleted file mode 100644
index 1c65f4a72..000000000
--- a/examples/training/conllu.py
+++ /dev/null
@@ -1,434 +0,0 @@
-"""Train for CONLL 2017 UD treebank evaluation. Takes .conllu files, writes
-.conllu format for development data, allowing the official scorer to be used.
-"""
-from __future__ import unicode_literals
-import plac
-import attr
-from pathlib import Path
-import re
-import json
-import tqdm
-
-import spacy
-import spacy.util
-from spacy.tokens import Token, Doc
-from spacy.gold import GoldParse
-from spacy.syntax.nonproj import projectivize
-from collections import defaultdict
-from spacy.matcher import Matcher
-
-import itertools
-import random
-import numpy.random
-
-from bin.ud import conll17_ud_eval
-
-import spacy.lang.zh
-import spacy.lang.ja
-
-spacy.lang.zh.Chinese.Defaults.use_jieba = False
-spacy.lang.ja.Japanese.Defaults.use_janome = False
-
-random.seed(0)
-numpy.random.seed(0)
-
-
-def minibatch_by_words(items, size=5000):
- random.shuffle(items)
- if isinstance(size, int):
- size_ = itertools.repeat(size)
- else:
- size_ = size
- items = iter(items)
- while True:
- batch_size = next(size_)
- batch = []
- while batch_size >= 0:
- try:
- doc, gold = next(items)
- except StopIteration:
- if batch:
- yield batch
- return
- batch_size -= len(doc)
- batch.append((doc, gold))
- if batch:
- yield batch
- else:
- break
-
-
-################
-# Data reading #
-################
-
-space_re = re.compile("\s+")
-
-
-def split_text(text):
- return [space_re.sub(" ", par.strip()) for par in text.split("\n\n")]
-
-
-def read_data(
- nlp,
- conllu_file,
- text_file,
- raw_text=True,
- oracle_segments=False,
- max_doc_length=None,
- limit=None,
-):
- """Read the CONLLU format into (Doc, GoldParse) tuples. If raw_text=True,
- include Doc objects created using nlp.make_doc and then aligned against
- the gold-standard sequences. If oracle_segments=True, include Doc objects
- created from the gold-standard segments. At least one must be True."""
- if not raw_text and not oracle_segments:
- raise ValueError("At least one of raw_text or oracle_segments must be True")
- paragraphs = split_text(text_file.read())
- conllu = read_conllu(conllu_file)
- # sd is spacy doc; cd is conllu doc
- # cs is conllu sent, ct is conllu token
- docs = []
- golds = []
- for doc_id, (text, cd) in enumerate(zip(paragraphs, conllu)):
- sent_annots = []
- for cs in cd:
- sent = defaultdict(list)
- for id_, word, lemma, pos, tag, morph, head, dep, _, space_after in cs:
- if "." in id_:
- continue
- if "-" in id_:
- continue
- id_ = int(id_) - 1
- head = int(head) - 1 if head != "0" else id_
- sent["words"].append(word)
- sent["tags"].append(tag)
- sent["heads"].append(head)
- sent["deps"].append("ROOT" if dep == "root" else dep)
- sent["spaces"].append(space_after == "_")
- sent["entities"] = ["-"] * len(sent["words"])
- sent["heads"], sent["deps"] = projectivize(sent["heads"], sent["deps"])
- if oracle_segments:
- docs.append(Doc(nlp.vocab, words=sent["words"], spaces=sent["spaces"]))
- golds.append(GoldParse(docs[-1], **sent))
-
- sent_annots.append(sent)
- if raw_text and max_doc_length and len(sent_annots) >= max_doc_length:
- doc, gold = _make_gold(nlp, None, sent_annots)
- sent_annots = []
- docs.append(doc)
- golds.append(gold)
- if limit and len(docs) >= limit:
- return docs, golds
-
- if raw_text and sent_annots:
- doc, gold = _make_gold(nlp, None, sent_annots)
- docs.append(doc)
- golds.append(gold)
- if limit and len(docs) >= limit:
- return docs, golds
- return docs, golds
-
-
-def read_conllu(file_):
- docs = []
- sent = []
- doc = []
- for line in file_:
- if line.startswith("# newdoc"):
- if doc:
- docs.append(doc)
- doc = []
- elif line.startswith("#"):
- continue
- elif not line.strip():
- if sent:
- doc.append(sent)
- sent = []
- else:
- sent.append(list(line.strip().split("\t")))
- if len(sent[-1]) != 10:
- print(repr(line))
- raise ValueError
- if sent:
- doc.append(sent)
- if doc:
- docs.append(doc)
- return docs
-
-
-def _make_gold(nlp, text, sent_annots):
- # Flatten the conll annotations, and adjust the head indices
- flat = defaultdict(list)
- for sent in sent_annots:
- flat["heads"].extend(len(flat["words"]) + head for head in sent["heads"])
- for field in ["words", "tags", "deps", "entities", "spaces"]:
- flat[field].extend(sent[field])
- # Construct text if necessary
- assert len(flat["words"]) == len(flat["spaces"])
- if text is None:
- text = "".join(
- word + " " * space for word, space in zip(flat["words"], flat["spaces"])
- )
- doc = nlp.make_doc(text)
- flat.pop("spaces")
- gold = GoldParse(doc, **flat)
- return doc, gold
-
-
-#############################
-# Data transforms for spaCy #
-#############################
-
-
-def golds_to_gold_tuples(docs, golds):
- """Get out the annoying 'tuples' format used by begin_training, given the
- GoldParse objects."""
- tuples = []
- for doc, gold in zip(docs, golds):
- text = doc.text
- ids, words, tags, heads, labels, iob = zip(*gold.orig_annot)
- sents = [((ids, words, tags, heads, labels, iob), [])]
- tuples.append((text, sents))
- return tuples
-
-
-##############
-# Evaluation #
-##############
-
-
-def evaluate(nlp, text_loc, gold_loc, sys_loc, limit=None):
- with text_loc.open("r", encoding="utf8") as text_file:
- texts = split_text(text_file.read())
- docs = list(nlp.pipe(texts))
- with sys_loc.open("w", encoding="utf8") as out_file:
- write_conllu(docs, out_file)
- with gold_loc.open("r", encoding="utf8") as gold_file:
- gold_ud = conll17_ud_eval.load_conllu(gold_file)
- with sys_loc.open("r", encoding="utf8") as sys_file:
- sys_ud = conll17_ud_eval.load_conllu(sys_file)
- scores = conll17_ud_eval.evaluate(gold_ud, sys_ud)
- return scores
-
-
-def write_conllu(docs, file_):
- merger = Matcher(docs[0].vocab)
- merger.add("SUBTOK", None, [{"DEP": "subtok", "op": "+"}])
- for i, doc in enumerate(docs):
- matches = merger(doc)
- spans = [doc[start : end + 1] for _, start, end in matches]
- offsets = [(span.start_char, span.end_char) for span in spans]
- for start_char, end_char in offsets:
- doc.merge(start_char, end_char)
- file_.write("# newdoc id = {i}\n".format(i=i))
- for j, sent in enumerate(doc.sents):
- file_.write("# sent_id = {i}.{j}\n".format(i=i, j=j))
- file_.write("# text = {text}\n".format(text=sent.text))
- for k, token in enumerate(sent):
- file_.write(token._.get_conllu_lines(k) + "\n")
- file_.write("\n")
-
-
-def print_progress(itn, losses, ud_scores):
- fields = {
- "dep_loss": losses.get("parser", 0.0),
- "tag_loss": losses.get("tagger", 0.0),
- "words": ud_scores["Words"].f1 * 100,
- "sents": ud_scores["Sentences"].f1 * 100,
- "tags": ud_scores["XPOS"].f1 * 100,
- "uas": ud_scores["UAS"].f1 * 100,
- "las": ud_scores["LAS"].f1 * 100,
- }
- header = ["Epoch", "Loss", "LAS", "UAS", "TAG", "SENT", "WORD"]
- if itn == 0:
- print("\t".join(header))
- tpl = "\t".join(
- (
- "{:d}",
- "{dep_loss:.1f}",
- "{las:.1f}",
- "{uas:.1f}",
- "{tags:.1f}",
- "{sents:.1f}",
- "{words:.1f}",
- )
- )
- print(tpl.format(itn, **fields))
-
-
-# def get_sent_conllu(sent, sent_id):
-# lines = ["# sent_id = {sent_id}".format(sent_id=sent_id)]
-
-
-def get_token_conllu(token, i):
- if token._.begins_fused:
- n = 1
- while token.nbor(n)._.inside_fused:
- n += 1
- id_ = "%d-%d" % (i, i + n)
- lines = [id_, token.text, "_", "_", "_", "_", "_", "_", "_", "_"]
- else:
- lines = []
- if token.head.i == token.i:
- head = 0
- else:
- head = i + (token.head.i - token.i) + 1
- fields = [
- str(i + 1),
- token.text,
- token.lemma_,
- token.pos_,
- token.tag_,
- "_",
- str(head),
- token.dep_.lower(),
- "_",
- "_",
- ]
- lines.append("\t".join(fields))
- return "\n".join(lines)
-
-
-##################
-# Initialization #
-##################
-
-
-def load_nlp(corpus, config):
- lang = corpus.split("_")[0]
- nlp = spacy.blank(lang)
- if config.vectors:
- nlp.vocab.from_disk(config.vectors / "vocab")
- return nlp
-
-
-def initialize_pipeline(nlp, docs, golds, config):
- nlp.add_pipe(nlp.create_pipe("parser"))
- if config.multitask_tag:
- nlp.parser.add_multitask_objective("tag")
- if config.multitask_sent:
- nlp.parser.add_multitask_objective("sent_start")
- nlp.parser.moves.add_action(2, "subtok")
- nlp.add_pipe(nlp.create_pipe("tagger"))
- for gold in golds:
- for tag in gold.tags:
- if tag is not None:
- nlp.tagger.add_label(tag)
- # Replace labels that didn't make the frequency cutoff
- actions = set(nlp.parser.labels)
- label_set = set([act.split("-")[1] for act in actions if "-" in act])
- for gold in golds:
- for i, label in enumerate(gold.labels):
- if label is not None and label not in label_set:
- gold.labels[i] = label.split("||")[0]
- return nlp.begin_training(lambda: golds_to_gold_tuples(docs, golds))
-
-
-########################
-# Command line helpers #
-########################
-
-
-@attr.s
-class Config(object):
- vectors = attr.ib(default=None)
- max_doc_length = attr.ib(default=10)
- multitask_tag = attr.ib(default=True)
- multitask_sent = attr.ib(default=True)
- nr_epoch = attr.ib(default=30)
- batch_size = attr.ib(default=1000)
- dropout = attr.ib(default=0.2)
-
- @classmethod
- def load(cls, loc):
- with Path(loc).open("r", encoding="utf8") as file_:
- cfg = json.load(file_)
- return cls(**cfg)
-
-
-class Dataset(object):
- def __init__(self, path, section):
- self.path = path
- self.section = section
- self.conllu = None
- self.text = None
- for file_path in self.path.iterdir():
- name = file_path.parts[-1]
- if section in name and name.endswith("conllu"):
- self.conllu = file_path
- elif section in name and name.endswith("txt"):
- self.text = file_path
- if self.conllu is None:
- msg = "Could not find .txt file in {path} for {section}"
- raise IOError(msg.format(section=section, path=path))
- if self.text is None:
- msg = "Could not find .txt file in {path} for {section}"
- self.lang = self.conllu.parts[-1].split("-")[0].split("_")[0]
-
-
-class TreebankPaths(object):
- def __init__(self, ud_path, treebank, **cfg):
- self.train = Dataset(ud_path / treebank, "train")
- self.dev = Dataset(ud_path / treebank, "dev")
- self.lang = self.train.lang
-
-
-@plac.annotations(
- ud_dir=("Path to Universal Dependencies corpus", "positional", None, Path),
- parses_dir=("Directory to write the development parses", "positional", None, Path),
- config=("Path to json formatted config file", "positional", None, Config.load),
- corpus=(
- "UD corpus to train and evaluate on, e.g. UD_Spanish-AnCora",
- "positional",
- None,
- str,
- ),
- limit=("Size limit", "option", "n", int),
-)
-def main(ud_dir, parses_dir, config, corpus, limit=0):
- Token.set_extension("get_conllu_lines", method=get_token_conllu)
- Token.set_extension("begins_fused", default=False)
- Token.set_extension("inside_fused", default=False)
-
- paths = TreebankPaths(ud_dir, corpus)
- if not (parses_dir / corpus).exists():
- (parses_dir / corpus).mkdir()
- print("Train and evaluate", corpus, "using lang", paths.lang)
- nlp = load_nlp(paths.lang, config)
-
- docs, golds = read_data(
- nlp,
- paths.train.conllu.open(encoding="utf8"),
- paths.train.text.open(encoding="utf8"),
- max_doc_length=config.max_doc_length,
- limit=limit,
- )
-
- optimizer = initialize_pipeline(nlp, docs, golds, config)
-
- for i in range(config.nr_epoch):
- docs = [nlp.make_doc(doc.text) for doc in docs]
- batches = minibatch_by_words(list(zip(docs, golds)), size=config.batch_size)
- losses = {}
- n_train_words = sum(len(doc) for doc in docs)
- with tqdm.tqdm(total=n_train_words, leave=False) as pbar:
- for batch in batches:
- batch_docs, batch_gold = zip(*batch)
- pbar.update(sum(len(doc) for doc in batch_docs))
- nlp.update(
- batch_docs,
- batch_gold,
- sgd=optimizer,
- drop=config.dropout,
- losses=losses,
- )
-
- out_path = parses_dir / corpus / "epoch-{i}.conllu".format(i=i)
- with nlp.use_params(optimizer.averages):
- scores = evaluate(nlp, paths.dev.text, paths.dev.conllu, out_path)
- print_progress(i, losses, scores)
-
-
-if __name__ == "__main__":
- plac.call(main)
diff --git a/examples/training/create_kb.py b/examples/training/create_kb.py
deleted file mode 100644
index cbdb5c05b..000000000
--- a/examples/training/create_kb.py
+++ /dev/null
@@ -1,114 +0,0 @@
-#!/usr/bin/env python
-# coding: utf8
-
-"""Example of defining a knowledge base in spaCy,
-which is needed to implement entity linking functionality.
-
-For more details, see the documentation:
-* Knowledge base: https://spacy.io/api/kb
-* Entity Linking: https://spacy.io/usage/linguistic-features#entity-linking
-
-Compatible with: spaCy v2.2.4
-Last tested with: v2.2.4
-"""
-from __future__ import unicode_literals, print_function
-
-import plac
-from pathlib import Path
-
-from spacy.vocab import Vocab
-import spacy
-from spacy.kb import KnowledgeBase
-
-
-# Q2146908 (Russ Cochran): American golfer
-# Q7381115 (Russ Cochran): publisher
-ENTITIES = {"Q2146908": ("American golfer", 342), "Q7381115": ("publisher", 17)}
-
-
-@plac.annotations(
- model=("Model name, should have pretrained word embeddings", "positional", None, str),
- output_dir=("Optional output directory", "option", "o", Path),
-)
-def main(model=None, output_dir=None):
- """Load the model and create the KB with pre-defined entity encodings.
- If an output_dir is provided, the KB will be stored there in a file 'kb'.
- The updated vocab will also be written to a directory in the output_dir."""
-
- nlp = spacy.load(model) # load existing spaCy model
- print("Loaded model '%s'" % model)
-
- # check the length of the nlp vectors
- if "vectors" not in nlp.meta or not nlp.vocab.vectors.size:
- raise ValueError(
- "The `nlp` object should have access to pretrained word vectors, "
- " cf. https://spacy.io/usage/models#languages."
- )
-
- # You can change the dimension of vectors in your KB by using an encoder that changes the dimensionality.
- # For simplicity, we'll just use the original vector dimension here instead.
- vectors_dim = nlp.vocab.vectors.shape[1]
- kb = KnowledgeBase(vocab=nlp.vocab, entity_vector_length=vectors_dim)
-
- # set up the data
- entity_ids = []
- descr_embeddings = []
- freqs = []
- for key, value in ENTITIES.items():
- desc, freq = value
- entity_ids.append(key)
- descr_embeddings.append(nlp(desc).vector)
- freqs.append(freq)
-
- # set the entities, can also be done by calling `kb.add_entity` for each entity
- kb.set_entities(entity_list=entity_ids, freq_list=freqs, vector_list=descr_embeddings)
-
- # adding aliases, the entities need to be defined in the KB beforehand
- kb.add_alias(
- alias="Russ Cochran",
- entities=["Q2146908", "Q7381115"],
- probabilities=[0.24, 0.7], # the sum of these probabilities should not exceed 1
- )
-
- # test the trained model
- print()
- _print_kb(kb)
-
- # save model to output directory
- if output_dir is not None:
- output_dir = Path(output_dir)
- if not output_dir.exists():
- output_dir.mkdir()
- kb_path = str(output_dir / "kb")
- kb.dump(kb_path)
- print()
- print("Saved KB to", kb_path)
-
- vocab_path = output_dir / "vocab"
- kb.vocab.to_disk(vocab_path)
- print("Saved vocab to", vocab_path)
-
- print()
-
- # test the saved model
- # always reload a knowledge base with the same vocab instance!
- print("Loading vocab from", vocab_path)
- print("Loading KB from", kb_path)
- vocab2 = Vocab().from_disk(vocab_path)
- kb2 = KnowledgeBase(vocab=vocab2)
- kb2.load_bulk(kb_path)
- print()
- _print_kb(kb2)
-
-
-def _print_kb(kb):
- print(kb.get_size_entities(), "kb entities:", kb.get_entity_strings())
- print(kb.get_size_aliases(), "kb aliases:", kb.get_alias_strings())
-
-
-if __name__ == "__main__":
- plac.call(main)
-
- # Expected output:
- # 2 kb entities: ['Q2146908', 'Q7381115']
- # 1 kb aliases: ['Russ Cochran']
diff --git a/examples/training/ner_multitask_objective.py b/examples/training/ner_multitask_objective.py
deleted file mode 100644
index 4bf7a008f..000000000
--- a/examples/training/ner_multitask_objective.py
+++ /dev/null
@@ -1,89 +0,0 @@
-"""This example shows how to add a multi-task objective that is trained
-alongside the entity recognizer. This is an alternative to adding features
-to the model.
-
-The multi-task idea is to train an auxiliary model to predict some attribute,
-with weights shared between the auxiliary model and the main model. In this
-example, we're predicting the position of the word in the document.
-
-The model that predicts the position of the word encourages the convolutional
-layers to include the position information in their representation. The
-information is then available to the main model, as a feature.
-
-The overall idea is that we might know something about what sort of features
-we'd like the CNN to extract. The multi-task objectives can encourage the
-extraction of this type of feature. The multi-task objective is only used
-during training. We discard the auxiliary model before run-time.
-
-The specific example here is not necessarily a good idea --- but it shows
-how an arbitrary objective function for some word can be used.
-
-Developed and tested for spaCy 2.0.6. Updated for v2.2.2
-"""
-import random
-import plac
-import spacy
-import os.path
-from spacy.tokens import Doc
-from spacy.gold import read_json_file, GoldParse
-
-random.seed(0)
-
-PWD = os.path.dirname(__file__)
-
-TRAIN_DATA = list(read_json_file(
- os.path.join(PWD, "ner_example_data", "ner-sent-per-line.json")))
-
-
-def get_position_label(i, words, tags, heads, labels, ents):
- """Return labels indicating the position of the word in the document.
- """
- if len(words) < 20:
- return "short-doc"
- elif i == 0:
- return "first-word"
- elif i < 10:
- return "early-word"
- elif i < 20:
- return "mid-word"
- elif i == len(words) - 1:
- return "last-word"
- else:
- return "late-word"
-
-
-def main(n_iter=10):
- nlp = spacy.blank("en")
- ner = nlp.create_pipe("ner")
- ner.add_multitask_objective(get_position_label)
- nlp.add_pipe(ner)
- print(nlp.pipeline)
-
- print("Create data", len(TRAIN_DATA))
- optimizer = nlp.begin_training(get_gold_tuples=lambda: TRAIN_DATA)
- for itn in range(n_iter):
- random.shuffle(TRAIN_DATA)
- losses = {}
- for text, annot_brackets in TRAIN_DATA:
- for annotations, _ in annot_brackets:
- doc = Doc(nlp.vocab, words=annotations[1])
- gold = GoldParse.from_annot_tuples(doc, annotations)
- nlp.update(
- [doc], # batch of texts
- [gold], # batch of annotations
- drop=0.2, # dropout - make it harder to memorise data
- sgd=optimizer, # callable to update weights
- losses=losses,
- )
- print(losses.get("nn_labeller", 0.0), losses["ner"])
-
- # test the trained model
- for text, _ in TRAIN_DATA:
- if text is not None:
- doc = nlp(text)
- print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
- print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])
-
-
-if __name__ == "__main__":
- plac.call(main)
diff --git a/examples/training/pretrain_textcat.py b/examples/training/pretrain_textcat.py
deleted file mode 100644
index d29e20ad1..000000000
--- a/examples/training/pretrain_textcat.py
+++ /dev/null
@@ -1,217 +0,0 @@
-"""This script is experimental.
-
-Try pre-training the CNN component of the text categorizer using a cheap
-language modelling-like objective. Specifically, we load pretrained vectors
-(from something like word2vec, GloVe, FastText etc), and use the CNN to
-predict the tokens' pretrained vectors. This isn't as easy as it sounds:
-we're not merely doing compression here, because heavy dropout is applied,
-including over the input words. This means the model must often (50% of the time)
-use the context in order to predict the word.
-
-To evaluate the technique, we're pre-training with the 50k texts from the IMDB
-corpus, and then training with only 100 labels. Note that it's a bit dirty to
-pre-train with the development data, but also not *so* terrible: we're not using
-the development labels, after all --- only the unlabelled text.
-"""
-import plac
-import tqdm
-import random
-import spacy
-import thinc.extra.datasets
-from spacy.util import minibatch, use_gpu, compounding
-from spacy._ml import Tok2Vec
-from spacy.pipeline import TextCategorizer
-import numpy
-
-
-def load_texts(limit=0):
- train, dev = thinc.extra.datasets.imdb()
- train_texts, train_labels = zip(*train)
- dev_texts, dev_labels = zip(*train)
- train_texts = list(train_texts)
- dev_texts = list(dev_texts)
- random.shuffle(train_texts)
- random.shuffle(dev_texts)
- if limit >= 1:
- return train_texts[:limit]
- else:
- return list(train_texts) + list(dev_texts)
-
-
-def load_textcat_data(limit=0):
- """Load data from the IMDB dataset."""
- # Partition off part of the train data for evaluation
- train_data, eval_data = thinc.extra.datasets.imdb()
- random.shuffle(train_data)
- train_data = train_data[-limit:]
- texts, labels = zip(*train_data)
- eval_texts, eval_labels = zip(*eval_data)
- cats = [{"POSITIVE": bool(y), "NEGATIVE": not bool(y)} for y in labels]
- eval_cats = [{"POSITIVE": bool(y), "NEGATIVE": not bool(y)} for y in eval_labels]
- return (texts, cats), (eval_texts, eval_cats)
-
-
-def prefer_gpu():
- used = spacy.util.use_gpu(0)
- if used is None:
- return False
- else:
- import cupy.random
-
- cupy.random.seed(0)
- return True
-
-
-def build_textcat_model(tok2vec, nr_class, width):
- from thinc.v2v import Model, Softmax, Maxout
- from thinc.api import flatten_add_lengths, chain
- from thinc.t2v import Pooling, sum_pool, mean_pool, max_pool
- from thinc.misc import Residual, LayerNorm
- from spacy._ml import logistic, zero_init
-
- with Model.define_operators({">>": chain}):
- model = (
- tok2vec
- >> flatten_add_lengths
- >> Pooling(mean_pool)
- >> Softmax(nr_class, width)
- )
- model.tok2vec = tok2vec
- return model
-
-
-def block_gradients(model):
- from thinc.api import wrap
-
- def forward(X, drop=0.0):
- Y, _ = model.begin_update(X, drop=drop)
- return Y, None
-
- return wrap(forward, model)
-
-
-def create_pipeline(width, embed_size, vectors_model):
- print("Load vectors")
- nlp = spacy.load(vectors_model)
- print("Start training")
- textcat = TextCategorizer(
- nlp.vocab,
- labels=["POSITIVE", "NEGATIVE"],
- model=build_textcat_model(
- Tok2Vec(width=width, embed_size=embed_size), 2, width
- ),
- )
-
- nlp.add_pipe(textcat)
- return nlp
-
-
-def train_tensorizer(nlp, texts, dropout, n_iter):
- tensorizer = nlp.create_pipe("tensorizer")
- nlp.add_pipe(tensorizer)
- optimizer = nlp.begin_training()
- for i in range(n_iter):
- losses = {}
- for i, batch in enumerate(minibatch(tqdm.tqdm(texts))):
- docs = [nlp.make_doc(text) for text in batch]
- tensorizer.update(docs, None, losses=losses, sgd=optimizer, drop=dropout)
- print(losses)
- return optimizer
-
-
-def train_textcat(nlp, n_texts, n_iter=10):
- textcat = nlp.get_pipe("textcat")
- tok2vec_weights = textcat.model.tok2vec.to_bytes()
- (train_texts, train_cats), (dev_texts, dev_cats) = load_textcat_data(limit=n_texts)
- print(
- "Using {} examples ({} training, {} evaluation)".format(
- n_texts, len(train_texts), len(dev_texts)
- )
- )
- train_data = list(zip(train_texts, [{"cats": cats} for cats in train_cats]))
-
- # get names of other pipes to disable them during training
- pipe_exceptions = ["textcat", "trf_wordpiecer", "trf_tok2vec"]
- other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
- with nlp.disable_pipes(*other_pipes): # only train textcat
- optimizer = nlp.begin_training()
- textcat.model.tok2vec.from_bytes(tok2vec_weights)
- print("Training the model...")
- print("{:^5}\t{:^5}\t{:^5}\t{:^5}".format("LOSS", "P", "R", "F"))
- for i in range(n_iter):
- losses = {"textcat": 0.0}
- # batch up the examples using spaCy's minibatch
- batches = minibatch(tqdm.tqdm(train_data), size=2)
- for batch in batches:
- texts, annotations = zip(*batch)
- nlp.update(texts, annotations, sgd=optimizer, drop=0.2, losses=losses)
- with textcat.model.use_params(optimizer.averages):
- # evaluate on the dev data split off in load_data()
- scores = evaluate_textcat(nlp.tokenizer, textcat, dev_texts, dev_cats)
- print(
- "{0:.3f}\t{1:.3f}\t{2:.3f}\t{3:.3f}".format( # print a simple table
- losses["textcat"],
- scores["textcat_p"],
- scores["textcat_r"],
- scores["textcat_f"],
- )
- )
-
-
-def evaluate_textcat(tokenizer, textcat, texts, cats):
- docs = (tokenizer(text) for text in texts)
- tp = 1e-8
- fp = 1e-8
- tn = 1e-8
- fn = 1e-8
- for i, doc in enumerate(textcat.pipe(docs)):
- gold = cats[i]
- for label, score in doc.cats.items():
- if label not in gold:
- continue
- if score >= 0.5 and gold[label] >= 0.5:
- tp += 1.0
- elif score >= 0.5 and gold[label] < 0.5:
- fp += 1.0
- elif score < 0.5 and gold[label] < 0.5:
- tn += 1
- elif score < 0.5 and gold[label] >= 0.5:
- fn += 1
- precision = tp / (tp + fp)
- recall = tp / (tp + fn)
- f_score = 2 * (precision * recall) / (precision + recall)
- return {"textcat_p": precision, "textcat_r": recall, "textcat_f": f_score}
-
-
-@plac.annotations(
- width=("Width of CNN layers", "positional", None, int),
- embed_size=("Embedding rows", "positional", None, int),
- pretrain_iters=("Number of iterations to pretrain", "option", "pn", int),
- train_iters=("Number of iterations to train", "option", "tn", int),
- train_examples=("Number of labelled examples", "option", "eg", int),
- vectors_model=("Name or path to vectors model to learn from"),
-)
-def main(
- width,
- embed_size,
- vectors_model,
- pretrain_iters=30,
- train_iters=30,
- train_examples=1000,
-):
- random.seed(0)
- numpy.random.seed(0)
- use_gpu = prefer_gpu()
- print("Using GPU?", use_gpu)
-
- nlp = create_pipeline(width, embed_size, vectors_model)
- print("Load data")
- texts = load_texts(limit=0)
- print("Train tensorizer")
- optimizer = train_tensorizer(nlp, texts, dropout=0.2, n_iter=pretrain_iters)
- print("Train textcat")
- train_textcat(nlp, train_examples, n_iter=train_iters)
-
-
-if __name__ == "__main__":
- plac.call(main)
diff --git a/examples/training/rehearsal.py b/examples/training/rehearsal.py
deleted file mode 100644
index 1cdac02aa..000000000
--- a/examples/training/rehearsal.py
+++ /dev/null
@@ -1,97 +0,0 @@
-"""Prevent catastrophic forgetting with rehearsal updates."""
-import plac
-import random
-import warnings
-import srsly
-import spacy
-from spacy.gold import GoldParse
-from spacy.util import minibatch, compounding
-
-
-LABEL = "ANIMAL"
-TRAIN_DATA = [
- (
- "Horses are too tall and they pretend to care about your feelings",
- {"entities": [(0, 6, "ANIMAL")]},
- ),
- ("Do they bite?", {"entities": []}),
- (
- "horses are too tall and they pretend to care about your feelings",
- {"entities": [(0, 6, "ANIMAL")]},
- ),
- ("horses pretend to care about your feelings", {"entities": [(0, 6, "ANIMAL")]}),
- (
- "they pretend to care about your feelings, those horses",
- {"entities": [(48, 54, "ANIMAL")]},
- ),
- ("horses?", {"entities": [(0, 6, "ANIMAL")]}),
-]
-
-
-def read_raw_data(nlp, jsonl_loc):
- for json_obj in srsly.read_jsonl(jsonl_loc):
- if json_obj["text"].strip():
- doc = nlp.make_doc(json_obj["text"])
- yield doc
-
-
-def read_gold_data(nlp, gold_loc):
- docs = []
- golds = []
- for json_obj in srsly.read_jsonl(gold_loc):
- doc = nlp.make_doc(json_obj["text"])
- ents = [(ent["start"], ent["end"], ent["label"]) for ent in json_obj["spans"]]
- gold = GoldParse(doc, entities=ents)
- docs.append(doc)
- golds.append(gold)
- return list(zip(docs, golds))
-
-
-def main(model_name, unlabelled_loc):
- n_iter = 10
- dropout = 0.2
- batch_size = 4
- nlp = spacy.load(model_name)
- nlp.get_pipe("ner").add_label(LABEL)
- raw_docs = list(read_raw_data(nlp, unlabelled_loc))
- optimizer = nlp.resume_training()
- # Avoid use of Adam when resuming training. I don't understand this well
- # yet, but I'm getting weird results from Adam. Try commenting out the
- # nlp.update(), and using Adam -- you'll find the models drift apart.
- # I guess Adam is losing precision, introducing gradient noise?
- optimizer.alpha = 0.1
- optimizer.b1 = 0.0
- optimizer.b2 = 0.0
-
- # get names of other pipes to disable them during training
- pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
- other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
- sizes = compounding(1.0, 4.0, 1.001)
- with nlp.disable_pipes(*other_pipes), warnings.catch_warnings():
- # show warnings for misaligned entity spans once
- warnings.filterwarnings("once", category=UserWarning, module='spacy')
-
- for itn in range(n_iter):
- random.shuffle(TRAIN_DATA)
- random.shuffle(raw_docs)
- losses = {}
- r_losses = {}
- # batch up the examples using spaCy's minibatch
- raw_batches = minibatch(raw_docs, size=4)
- for batch in minibatch(TRAIN_DATA, size=sizes):
- docs, golds = zip(*batch)
- nlp.update(docs, golds, sgd=optimizer, drop=dropout, losses=losses)
- raw_batch = list(next(raw_batches))
- nlp.rehearse(raw_batch, sgd=optimizer, losses=r_losses)
- print("Losses", losses)
- print("R. Losses", r_losses)
- print(nlp.get_pipe("ner").model.unseen_classes)
- test_text = "Do you like horses?"
- doc = nlp(test_text)
- print("Entities in '%s'" % test_text)
- for ent in doc.ents:
- print(ent.label_, ent.text)
-
-
-if __name__ == "__main__":
- plac.call(main)
diff --git a/examples/training/train_entity_linker.py b/examples/training/train_entity_linker.py
deleted file mode 100644
index a68007504..000000000
--- a/examples/training/train_entity_linker.py
+++ /dev/null
@@ -1,177 +0,0 @@
-#!/usr/bin/env python
-# coding: utf8
-
-"""Example of training spaCy's entity linker, starting off with a predefined
-knowledge base and corresponding vocab, and a blank English model.
-
-For more details, see the documentation:
-* Training: https://spacy.io/usage/training
-* Entity Linking: https://spacy.io/usage/linguistic-features#entity-linking
-
-Compatible with: spaCy v2.2.4
-Last tested with: v2.2.4
-"""
-from __future__ import unicode_literals, print_function
-
-import plac
-import random
-from pathlib import Path
-
-from spacy.vocab import Vocab
-
-import spacy
-from spacy.kb import KnowledgeBase
-from spacy.pipeline import EntityRuler
-from spacy.util import minibatch, compounding
-
-
-def sample_train_data():
- train_data = []
-
- # Q2146908 (Russ Cochran): American golfer
- # Q7381115 (Russ Cochran): publisher
-
- text_1 = "Russ Cochran his reprints include EC Comics."
- dict_1 = {(0, 12): {"Q7381115": 1.0, "Q2146908": 0.0}}
- train_data.append((text_1, {"links": dict_1}))
-
- text_2 = "Russ Cochran has been publishing comic art."
- dict_2 = {(0, 12): {"Q7381115": 1.0, "Q2146908": 0.0}}
- train_data.append((text_2, {"links": dict_2}))
-
- text_3 = "Russ Cochran captured his first major title with his son as caddie."
- dict_3 = {(0, 12): {"Q7381115": 0.0, "Q2146908": 1.0}}
- train_data.append((text_3, {"links": dict_3}))
-
- text_4 = "Russ Cochran was a member of University of Kentucky's golf team."
- dict_4 = {(0, 12): {"Q7381115": 0.0, "Q2146908": 1.0}}
- train_data.append((text_4, {"links": dict_4}))
-
- return train_data
-
-
-# training data
-TRAIN_DATA = sample_train_data()
-
-
-@plac.annotations(
- kb_path=("Path to the knowledge base", "positional", None, Path),
- vocab_path=("Path to the vocab for the kb", "positional", None, Path),
- output_dir=("Optional output directory", "option", "o", Path),
- n_iter=("Number of training iterations", "option", "n", int),
-)
-def main(kb_path, vocab_path, output_dir=None, n_iter=50):
- """Create a blank model with the specified vocab, set up the pipeline and train the entity linker.
- The `vocab` should be the one used during creation of the KB."""
- # create blank English model with correct vocab
- nlp = spacy.blank("en")
- nlp.vocab.from_disk(vocab_path)
- nlp.vocab.vectors.name = "spacy_pretrained_vectors"
- print("Created blank 'en' model with vocab from '%s'" % vocab_path)
-
- # Add a sentencizer component. Alternatively, add a dependency parser for higher accuracy.
- nlp.add_pipe(nlp.create_pipe('sentencizer'))
-
- # Add a custom component to recognize "Russ Cochran" as an entity for the example training data.
- # Note that in a realistic application, an actual NER algorithm should be used instead.
- ruler = EntityRuler(nlp)
- patterns = [{"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}]
- ruler.add_patterns(patterns)
- nlp.add_pipe(ruler)
-
- # Create the Entity Linker component and add it to the pipeline.
- if "entity_linker" not in nlp.pipe_names:
- # use only the predicted EL score and not the prior probability (for demo purposes)
- cfg = {"incl_prior": False}
- entity_linker = nlp.create_pipe("entity_linker", cfg)
- kb = KnowledgeBase(vocab=nlp.vocab)
- kb.load_bulk(kb_path)
- print("Loaded Knowledge Base from '%s'" % kb_path)
- entity_linker.set_kb(kb)
- nlp.add_pipe(entity_linker, last=True)
-
- # Convert the texts to docs to make sure we have doc.ents set for the training examples.
- # Also ensure that the annotated examples correspond to known identifiers in the knowlege base.
- kb_ids = nlp.get_pipe("entity_linker").kb.get_entity_strings()
- TRAIN_DOCS = []
- for text, annotation in TRAIN_DATA:
- with nlp.disable_pipes("entity_linker"):
- doc = nlp(text)
- annotation_clean = annotation
- for offset, kb_id_dict in annotation["links"].items():
- new_dict = {}
- for kb_id, value in kb_id_dict.items():
- if kb_id in kb_ids:
- new_dict[kb_id] = value
- else:
- print(
- "Removed", kb_id, "from training because it is not in the KB."
- )
- annotation_clean["links"][offset] = new_dict
- TRAIN_DOCS.append((doc, annotation_clean))
-
- # get names of other pipes to disable them during training
- pipe_exceptions = ["entity_linker", "trf_wordpiecer", "trf_tok2vec"]
- other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
- with nlp.disable_pipes(*other_pipes): # only train entity linker
- # reset and initialize the weights randomly
- optimizer = nlp.begin_training()
- for itn in range(n_iter):
- random.shuffle(TRAIN_DOCS)
- losses = {}
- # batch up the examples using spaCy's minibatch
- batches = minibatch(TRAIN_DOCS, size=compounding(4.0, 32.0, 1.001))
- for batch in batches:
- texts, annotations = zip(*batch)
- nlp.update(
- texts, # batch of texts
- annotations, # batch of annotations
- drop=0.2, # dropout - make it harder to memorise data
- losses=losses,
- sgd=optimizer,
- )
- print(itn, "Losses", losses)
-
- # test the trained model
- _apply_model(nlp)
-
- # save model to output directory
- if output_dir is not None:
- output_dir = Path(output_dir)
- if not output_dir.exists():
- output_dir.mkdir()
- nlp.to_disk(output_dir)
- print()
- print("Saved model to", output_dir)
-
- # test the saved model
- print("Loading from", output_dir)
- nlp2 = spacy.load(output_dir)
- _apply_model(nlp2)
-
-
-def _apply_model(nlp):
- for text, annotation in TRAIN_DATA:
- # apply the entity linker which will now make predictions for the 'Russ Cochran' entities
- doc = nlp(text)
- print()
- print("Entities", [(ent.text, ent.label_, ent.kb_id_) for ent in doc.ents])
- print("Tokens", [(t.text, t.ent_type_, t.ent_kb_id_) for t in doc])
-
-
-if __name__ == "__main__":
- plac.call(main)
-
- # Expected output (can be shuffled):
-
- # Entities[('Russ Cochran', 'PERSON', 'Q7381115')]
- # Tokens[('Russ', 'PERSON', 'Q7381115'), ('Cochran', 'PERSON', 'Q7381115'), ("his", '', ''), ('reprints', '', ''), ('include', '', ''), ('The', '', ''), ('Complete', '', ''), ('EC', '', ''), ('Library', '', ''), ('.', '', '')]
-
- # Entities[('Russ Cochran', 'PERSON', 'Q7381115')]
- # Tokens[('Russ', 'PERSON', 'Q7381115'), ('Cochran', 'PERSON', 'Q7381115'), ('has', '', ''), ('been', '', ''), ('publishing', '', ''), ('comic', '', ''), ('art', '', ''), ('.', '', '')]
-
- # Entities[('Russ Cochran', 'PERSON', 'Q2146908')]
- # Tokens[('Russ', 'PERSON', 'Q2146908'), ('Cochran', 'PERSON', 'Q2146908'), ('captured', '', ''), ('his', '', ''), ('first', '', ''), ('major', '', ''), ('title', '', ''), ('with', '', ''), ('his', '', ''), ('son', '', ''), ('as', '', ''), ('caddie', '', ''), ('.', '', '')]
-
- # Entities[('Russ Cochran', 'PERSON', 'Q2146908')]
- # Tokens[('Russ', 'PERSON', 'Q2146908'), ('Cochran', 'PERSON', 'Q2146908'), ('was', '', ''), ('a', '', ''), ('member', '', ''), ('of', '', ''), ('University', '', ''), ('of', '', ''), ('Kentucky', '', ''), ("'s", '', ''), ('golf', '', ''), ('team', '', ''), ('.', '', '')]
diff --git a/examples/training/train_intent_parser.py b/examples/training/train_intent_parser.py
deleted file mode 100644
index a91102093..000000000
--- a/examples/training/train_intent_parser.py
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-"""Using the parser to recognise your own semantics
-
-spaCy's parser component can be trained to predict any type of tree
-structure over your input text. You can also predict trees over whole documents
-or chat logs, with connections between the sentence-roots used to annotate
-discourse structure. In this example, we'll build a message parser for a common
-"chat intent": finding local businesses. Our message semantics will have the
-following types of relations: ROOT, PLACE, QUALITY, ATTRIBUTE, TIME, LOCATION.
-
-"show me the best hotel in berlin"
-('show', 'ROOT', 'show')
-('best', 'QUALITY', 'hotel') --> hotel with QUALITY best
-('hotel', 'PLACE', 'show') --> show PLACE hotel
-('berlin', 'LOCATION', 'hotel') --> hotel with LOCATION berlin
-
-Compatible with: spaCy v2.0.0+
-"""
-from __future__ import unicode_literals, print_function
-
-import plac
-import random
-from pathlib import Path
-import spacy
-from spacy.util import minibatch, compounding
-
-
-# training data: texts, heads and dependency labels
-# for no relation, we simply chose an arbitrary dependency label, e.g. '-'
-TRAIN_DATA = [
- (
- "find a cafe with great wifi",
- {
- "heads": [0, 2, 0, 5, 5, 2], # index of token head
- "deps": ["ROOT", "-", "PLACE", "-", "QUALITY", "ATTRIBUTE"],
- },
- ),
- (
- "find a hotel near the beach",
- {
- "heads": [0, 2, 0, 5, 5, 2],
- "deps": ["ROOT", "-", "PLACE", "QUALITY", "-", "ATTRIBUTE"],
- },
- ),
- (
- "find me the closest gym that's open late",
- {
- "heads": [0, 0, 4, 4, 0, 6, 4, 6, 6],
- "deps": [
- "ROOT",
- "-",
- "-",
- "QUALITY",
- "PLACE",
- "-",
- "-",
- "ATTRIBUTE",
- "TIME",
- ],
- },
- ),
- (
- "show me the cheapest store that sells flowers",
- {
- "heads": [0, 0, 4, 4, 0, 4, 4, 4], # attach "flowers" to store!
- "deps": ["ROOT", "-", "-", "QUALITY", "PLACE", "-", "-", "PRODUCT"],
- },
- ),
- (
- "find a nice restaurant in london",
- {
- "heads": [0, 3, 3, 0, 3, 3],
- "deps": ["ROOT", "-", "QUALITY", "PLACE", "-", "LOCATION"],
- },
- ),
- (
- "show me the coolest hostel in berlin",
- {
- "heads": [0, 0, 4, 4, 0, 4, 4],
- "deps": ["ROOT", "-", "-", "QUALITY", "PLACE", "-", "LOCATION"],
- },
- ),
- (
- "find a good italian restaurant near work",
- {
- "heads": [0, 4, 4, 4, 0, 4, 5],
- "deps": [
- "ROOT",
- "-",
- "QUALITY",
- "ATTRIBUTE",
- "PLACE",
- "ATTRIBUTE",
- "LOCATION",
- ],
- },
- ),
-]
-
-
-@plac.annotations(
- model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
- output_dir=("Optional output directory", "option", "o", Path),
- n_iter=("Number of training iterations", "option", "n", int),
-)
-def main(model=None, output_dir=None, n_iter=15):
- """Load the model, set up the pipeline and train the parser."""
- if model is not None:
- nlp = spacy.load(model) # load existing spaCy model
- print("Loaded model '%s'" % model)
- else:
- nlp = spacy.blank("en") # create blank Language class
- print("Created blank 'en' model")
-
- # We'll use the built-in dependency parser class, but we want to create a
- # fresh instance – just in case.
- if "parser" in nlp.pipe_names:
- nlp.remove_pipe("parser")
- parser = nlp.create_pipe("parser")
- nlp.add_pipe(parser, first=True)
-
- for text, annotations in TRAIN_DATA:
- for dep in annotations.get("deps", []):
- parser.add_label(dep)
-
- pipe_exceptions = ["parser", "trf_wordpiecer", "trf_tok2vec"]
- other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
- with nlp.disable_pipes(*other_pipes): # only train parser
- optimizer = nlp.begin_training()
- for itn in range(n_iter):
- random.shuffle(TRAIN_DATA)
- losses = {}
- # batch up the examples using spaCy's minibatch
- batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
- for batch in batches:
- texts, annotations = zip(*batch)
- nlp.update(texts, annotations, sgd=optimizer, losses=losses)
- print("Losses", losses)
-
- # test the trained model
- test_model(nlp)
-
- # save model to output directory
- if output_dir is not None:
- output_dir = Path(output_dir)
- if not output_dir.exists():
- output_dir.mkdir()
- nlp.to_disk(output_dir)
- print("Saved model to", output_dir)
-
- # test the saved model
- print("Loading from", output_dir)
- nlp2 = spacy.load(output_dir)
- test_model(nlp2)
-
-
-def test_model(nlp):
- texts = [
- "find a hotel with good wifi",
- "find me the cheapest gym near work",
- "show me the best hotel in berlin",
- ]
- docs = nlp.pipe(texts)
- for doc in docs:
- print(doc.text)
- print([(t.text, t.dep_, t.head.text) for t in doc if t.dep_ != "-"])
-
-
-if __name__ == "__main__":
- plac.call(main)
-
- # Expected output:
- # find a hotel with good wifi
- # [
- # ('find', 'ROOT', 'find'),
- # ('hotel', 'PLACE', 'find'),
- # ('good', 'QUALITY', 'wifi'),
- # ('wifi', 'ATTRIBUTE', 'hotel')
- # ]
- # find me the cheapest gym near work
- # [
- # ('find', 'ROOT', 'find'),
- # ('cheapest', 'QUALITY', 'gym'),
- # ('gym', 'PLACE', 'find'),
- # ('near', 'ATTRIBUTE', 'gym'),
- # ('work', 'LOCATION', 'near')
- # ]
- # show me the best hotel in berlin
- # [
- # ('show', 'ROOT', 'show'),
- # ('best', 'QUALITY', 'hotel'),
- # ('hotel', 'PLACE', 'show'),
- # ('berlin', 'LOCATION', 'hotel')
- # ]
diff --git a/examples/training/train_ner.py b/examples/training/train_ner.py
deleted file mode 100644
index f64ba801a..000000000
--- a/examples/training/train_ner.py
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/usr/bin/env python
-# coding: utf8
-"""Example of training spaCy's named entity recognizer, starting off with an
-existing model or a blank model.
-
-For more details, see the documentation:
-* Training: https://spacy.io/usage/training
-* NER: https://spacy.io/usage/linguistic-features#named-entities
-
-Compatible with: spaCy v2.0.0+
-Last tested with: v2.2.4
-"""
-from __future__ import unicode_literals, print_function
-
-import plac
-import random
-import warnings
-from pathlib import Path
-import spacy
-from spacy.util import minibatch, compounding
-
-
-# training data
-TRAIN_DATA = [
- ("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
- ("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}),
-]
-
-
-@plac.annotations(
- model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
- output_dir=("Optional output directory", "option", "o", Path),
- n_iter=("Number of training iterations", "option", "n", int),
-)
-def main(model=None, output_dir=None, n_iter=100):
- """Load the model, set up the pipeline and train the entity recognizer."""
- if model is not None:
- nlp = spacy.load(model) # load existing spaCy model
- print("Loaded model '%s'" % model)
- else:
- nlp = spacy.blank("en") # create blank Language class
- print("Created blank 'en' model")
-
- # create the built-in pipeline components and add them to the pipeline
- # nlp.create_pipe works for built-ins that are registered with spaCy
- if "ner" not in nlp.pipe_names:
- ner = nlp.create_pipe("ner")
- nlp.add_pipe(ner, last=True)
- # otherwise, get it so we can add labels
- else:
- ner = nlp.get_pipe("ner")
-
- # add labels
- for _, annotations in TRAIN_DATA:
- for ent in annotations.get("entities"):
- ner.add_label(ent[2])
-
- # get names of other pipes to disable them during training
- pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
- other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
- # only train NER
- with nlp.disable_pipes(*other_pipes), warnings.catch_warnings():
- # show warnings for misaligned entity spans once
- warnings.filterwarnings("once", category=UserWarning, module='spacy')
-
- # reset and initialize the weights randomly – but only if we're
- # training a new model
- if model is None:
- nlp.begin_training()
- for itn in range(n_iter):
- random.shuffle(TRAIN_DATA)
- losses = {}
- # batch up the examples using spaCy's minibatch
- batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
- for batch in batches:
- texts, annotations = zip(*batch)
- nlp.update(
- texts, # batch of texts
- annotations, # batch of annotations
- drop=0.5, # dropout - make it harder to memorise data
- losses=losses,
- )
- print("Losses", losses)
-
- # test the trained model
- for text, _ in TRAIN_DATA:
- doc = nlp(text)
- print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
- print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])
-
- # save model to output directory
- if output_dir is not None:
- output_dir = Path(output_dir)
- if not output_dir.exists():
- output_dir.mkdir()
- nlp.to_disk(output_dir)
- print("Saved model to", output_dir)
-
- # test the saved model
- print("Loading from", output_dir)
- nlp2 = spacy.load(output_dir)
- for text, _ in TRAIN_DATA:
- doc = nlp2(text)
- print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
- print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])
-
-
-if __name__ == "__main__":
- plac.call(main)
-
- # Expected output:
- # Entities [('Shaka Khan', 'PERSON')]
- # Tokens [('Who', '', 2), ('is', '', 2), ('Shaka', 'PERSON', 3),
- # ('Khan', 'PERSON', 1), ('?', '', 2)]
- # Entities [('London', 'LOC'), ('Berlin', 'LOC')]
- # Tokens [('I', '', 2), ('like', '', 2), ('London', 'LOC', 3),
- # ('and', '', 2), ('Berlin', 'LOC', 3), ('.', '', 2)]
diff --git a/examples/training/train_new_entity_type.py b/examples/training/train_new_entity_type.py
deleted file mode 100644
index a14688012..000000000
--- a/examples/training/train_new_entity_type.py
+++ /dev/null
@@ -1,144 +0,0 @@
-#!/usr/bin/env python
-# coding: utf8
-"""Example of training an additional entity type
-
-This script shows how to add a new entity type to an existing pretrained NER
-model. To keep the example short and simple, only four sentences are provided
-as examples. In practice, you'll need many more — a few hundred would be a
-good start. You will also likely need to mix in examples of other entity
-types, which might be obtained by running the entity recognizer over unlabelled
-sentences, and adding their annotations to the training set.
-
-The actual training is performed by looping over the examples, and calling
-`nlp.entity.update()`. The `update()` method steps through the words of the
-input. At each word, it makes a prediction. It then consults the annotations
-provided on the GoldParse instance, to see whether it was right. If it was
-wrong, it adjusts its weights so that the correct action will score higher
-next time.
-
-After training your model, you can save it to a directory. We recommend
-wrapping models as Python packages, for ease of deployment.
-
-For more details, see the documentation:
-* Training: https://spacy.io/usage/training
-* NER: https://spacy.io/usage/linguistic-features#named-entities
-
-Compatible with: spaCy v2.1.0+
-Last tested with: v2.2.4
-"""
-from __future__ import unicode_literals, print_function
-
-import plac
-import random
-import warnings
-from pathlib import Path
-import spacy
-from spacy.util import minibatch, compounding
-
-
-# new entity label
-LABEL = "ANIMAL"
-
-# training data
-# Note: If you're using an existing model, make sure to mix in examples of
-# other entity types that spaCy correctly recognized before. Otherwise, your
-# model might learn the new type, but "forget" what it previously knew.
-# https://explosion.ai/blog/pseudo-rehearsal-catastrophic-forgetting
-TRAIN_DATA = [
- (
- "Horses are too tall and they pretend to care about your feelings",
- {"entities": [(0, 6, LABEL)]},
- ),
- ("Do they bite?", {"entities": []}),
- (
- "horses are too tall and they pretend to care about your feelings",
- {"entities": [(0, 6, LABEL)]},
- ),
- ("horses pretend to care about your feelings", {"entities": [(0, 6, LABEL)]}),
- (
- "they pretend to care about your feelings, those horses",
- {"entities": [(48, 54, LABEL)]},
- ),
- ("horses?", {"entities": [(0, 6, LABEL)]}),
-]
-
-
-@plac.annotations(
- model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
- new_model_name=("New model name for model meta.", "option", "nm", str),
- output_dir=("Optional output directory", "option", "o", Path),
- n_iter=("Number of training iterations", "option", "n", int),
-)
-def main(model=None, new_model_name="animal", output_dir=None, n_iter=30):
- """Set up the pipeline and entity recognizer, and train the new entity."""
- random.seed(0)
- if model is not None:
- nlp = spacy.load(model) # load existing spaCy model
- print("Loaded model '%s'" % model)
- else:
- nlp = spacy.blank("en") # create blank Language class
- print("Created blank 'en' model")
- # Add entity recognizer to model if it's not in the pipeline
- # nlp.create_pipe works for built-ins that are registered with spaCy
- if "ner" not in nlp.pipe_names:
- ner = nlp.create_pipe("ner")
- nlp.add_pipe(ner)
- # otherwise, get it, so we can add labels to it
- else:
- ner = nlp.get_pipe("ner")
-
- ner.add_label(LABEL) # add new entity label to entity recognizer
- # Adding extraneous labels shouldn't mess anything up
- ner.add_label("VEGETABLE")
- if model is None:
- optimizer = nlp.begin_training()
- else:
- optimizer = nlp.resume_training()
- move_names = list(ner.move_names)
- # get names of other pipes to disable them during training
- pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
- other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
- # only train NER
- with nlp.disable_pipes(*other_pipes), warnings.catch_warnings():
- # show warnings for misaligned entity spans once
- warnings.filterwarnings("once", category=UserWarning, module='spacy')
-
- sizes = compounding(1.0, 4.0, 1.001)
- # batch up the examples using spaCy's minibatch
- for itn in range(n_iter):
- random.shuffle(TRAIN_DATA)
- batches = minibatch(TRAIN_DATA, size=sizes)
- losses = {}
- for batch in batches:
- texts, annotations = zip(*batch)
- nlp.update(texts, annotations, sgd=optimizer, drop=0.35, losses=losses)
- print("Losses", losses)
-
- # test the trained model
- test_text = "Do you like horses?"
- doc = nlp(test_text)
- print("Entities in '%s'" % test_text)
- for ent in doc.ents:
- print(ent.label_, ent.text)
-
- # save model to output directory
- if output_dir is not None:
- output_dir = Path(output_dir)
- if not output_dir.exists():
- output_dir.mkdir()
- nlp.meta["name"] = new_model_name # rename model
- nlp.to_disk(output_dir)
- print("Saved model to", output_dir)
-
- # test the saved model
- print("Loading from", output_dir)
- nlp2 = spacy.load(output_dir)
- # Check the classes have loaded back consistently
- assert nlp2.get_pipe("ner").move_names == move_names
- doc2 = nlp2(test_text)
- for ent in doc2.ents:
- print(ent.label_, ent.text)
-
-
-if __name__ == "__main__":
- plac.call(main)
diff --git a/examples/training/train_parser.py b/examples/training/train_parser.py
deleted file mode 100644
index c5adb0dec..000000000
--- a/examples/training/train_parser.py
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/usr/bin/env python
-# coding: utf8
-"""Example of training spaCy dependency parser, starting off with an existing
-model or a blank model. For more details, see the documentation:
-* Training: https://spacy.io/usage/training
-* Dependency Parse: https://spacy.io/usage/linguistic-features#dependency-parse
-
-Compatible with: spaCy v2.0.0+
-Last tested with: v2.1.0
-"""
-from __future__ import unicode_literals, print_function
-
-import plac
-import random
-from pathlib import Path
-import spacy
-from spacy.util import minibatch, compounding
-
-
-# training data
-TRAIN_DATA = [
- (
- "They trade mortgage-backed securities.",
- {
- "heads": [1, 1, 4, 4, 5, 1, 1],
- "deps": ["nsubj", "ROOT", "compound", "punct", "nmod", "dobj", "punct"],
- },
- ),
- (
- "I like London and Berlin.",
- {
- "heads": [1, 1, 1, 2, 2, 1],
- "deps": ["nsubj", "ROOT", "dobj", "cc", "conj", "punct"],
- },
- ),
-]
-
-
-@plac.annotations(
- model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
- output_dir=("Optional output directory", "option", "o", Path),
- n_iter=("Number of training iterations", "option", "n", int),
-)
-def main(model=None, output_dir=None, n_iter=15):
- """Load the model, set up the pipeline and train the parser."""
- if model is not None:
- nlp = spacy.load(model) # load existing spaCy model
- print("Loaded model '%s'" % model)
- else:
- nlp = spacy.blank("en") # create blank Language class
- print("Created blank 'en' model")
-
- # add the parser to the pipeline if it doesn't exist
- # nlp.create_pipe works for built-ins that are registered with spaCy
- if "parser" not in nlp.pipe_names:
- parser = nlp.create_pipe("parser")
- nlp.add_pipe(parser, first=True)
- # otherwise, get it, so we can add labels to it
- else:
- parser = nlp.get_pipe("parser")
-
- # add labels to the parser
- for _, annotations in TRAIN_DATA:
- for dep in annotations.get("deps", []):
- parser.add_label(dep)
-
- # get names of other pipes to disable them during training
- pipe_exceptions = ["parser", "trf_wordpiecer", "trf_tok2vec"]
- other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
- with nlp.disable_pipes(*other_pipes): # only train parser
- optimizer = nlp.begin_training()
- for itn in range(n_iter):
- random.shuffle(TRAIN_DATA)
- losses = {}
- # batch up the examples using spaCy's minibatch
- batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
- for batch in batches:
- texts, annotations = zip(*batch)
- nlp.update(texts, annotations, sgd=optimizer, losses=losses)
- print("Losses", losses)
-
- # test the trained model
- test_text = "I like securities."
- doc = nlp(test_text)
- print("Dependencies", [(t.text, t.dep_, t.head.text) for t in doc])
-
- # save model to output directory
- if output_dir is not None:
- output_dir = Path(output_dir)
- if not output_dir.exists():
- output_dir.mkdir()
- nlp.to_disk(output_dir)
- print("Saved model to", output_dir)
-
- # test the saved model
- print("Loading from", output_dir)
- nlp2 = spacy.load(output_dir)
- doc = nlp2(test_text)
- print("Dependencies", [(t.text, t.dep_, t.head.text) for t in doc])
-
-
-if __name__ == "__main__":
- plac.call(main)
-
- # expected result:
- # [
- # ('I', 'nsubj', 'like'),
- # ('like', 'ROOT', 'like'),
- # ('securities', 'dobj', 'like'),
- # ('.', 'punct', 'like')
- # ]
diff --git a/examples/training/train_tagger.py b/examples/training/train_tagger.py
deleted file mode 100644
index 7136273b3..000000000
--- a/examples/training/train_tagger.py
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/usr/bin/env python
-# coding: utf8
-"""
-A simple example for training a part-of-speech tagger with a custom tag map.
-To allow us to update the tag map with our custom one, this example starts off
-with a blank Language class and modifies its defaults. For more details, see
-the documentation:
-* Training: https://spacy.io/usage/training
-* POS Tagging: https://spacy.io/usage/linguistic-features#pos-tagging
-
-Compatible with: spaCy v2.0.0+
-Last tested with: v2.1.0
-"""
-from __future__ import unicode_literals, print_function
-
-import plac
-import random
-from pathlib import Path
-import spacy
-from spacy.util import minibatch, compounding
-
-
-# You need to define a mapping from your data's part-of-speech tag names to the
-# Universal Part-of-Speech tag set, as spaCy includes an enum of these tags.
-# See here for the Universal Tag Set:
-# http://universaldependencies.github.io/docs/u/pos/index.html
-# You may also specify morphological features for your tags, from the universal
-# scheme.
-TAG_MAP = {"N": {"pos": "NOUN"}, "V": {"pos": "VERB"}, "J": {"pos": "ADJ"}}
-
-# Usually you'll read this in, of course. Data formats vary. Ensure your
-# strings are unicode and that the number of tags assigned matches spaCy's
-# tokenization. If not, you can always add a 'words' key to the annotations
-# that specifies the gold-standard tokenization, e.g.:
-# ("Eatblueham", {'words': ['Eat', 'blue', 'ham'], 'tags': ['V', 'J', 'N']})
-TRAIN_DATA = [
- ("I like green eggs", {"tags": ["N", "V", "J", "N"]}),
- ("Eat blue ham", {"tags": ["V", "J", "N"]}),
-]
-
-
-@plac.annotations(
- lang=("ISO Code of language to use", "option", "l", str),
- output_dir=("Optional output directory", "option", "o", Path),
- n_iter=("Number of training iterations", "option", "n", int),
-)
-def main(lang="en", output_dir=None, n_iter=25):
- """Create a new model, set up the pipeline and train the tagger. In order to
- train the tagger with a custom tag map, we're creating a new Language
- instance with a custom vocab.
- """
- nlp = spacy.blank(lang)
- # add the tagger to the pipeline
- # nlp.create_pipe works for built-ins that are registered with spaCy
- tagger = nlp.create_pipe("tagger")
- # Add the tags. This needs to be done before you start training.
- for tag, values in TAG_MAP.items():
- tagger.add_label(tag, values)
- nlp.add_pipe(tagger)
-
- optimizer = nlp.begin_training()
- for i in range(n_iter):
- random.shuffle(TRAIN_DATA)
- losses = {}
- # batch up the examples using spaCy's minibatch
- batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
- for batch in batches:
- texts, annotations = zip(*batch)
- nlp.update(texts, annotations, sgd=optimizer, losses=losses)
- print("Losses", losses)
-
- # test the trained model
- test_text = "I like blue eggs"
- doc = nlp(test_text)
- print("Tags", [(t.text, t.tag_, t.pos_) for t in doc])
-
- # save model to output directory
- if output_dir is not None:
- output_dir = Path(output_dir)
- if not output_dir.exists():
- output_dir.mkdir()
- nlp.to_disk(output_dir)
- print("Saved model to", output_dir)
-
- # test the save model
- print("Loading from", output_dir)
- nlp2 = spacy.load(output_dir)
- doc = nlp2(test_text)
- print("Tags", [(t.text, t.tag_, t.pos_) for t in doc])
-
-
-if __name__ == "__main__":
- plac.call(main)
-
- # Expected output:
- # [
- # ('I', 'N', 'NOUN'),
- # ('like', 'V', 'VERB'),
- # ('blue', 'J', 'ADJ'),
- # ('eggs', 'N', 'NOUN')
- # ]
diff --git a/examples/training/train_textcat.py b/examples/training/train_textcat.py
deleted file mode 100644
index 456ef098c..000000000
--- a/examples/training/train_textcat.py
+++ /dev/null
@@ -1,160 +0,0 @@
-#!/usr/bin/env python
-# coding: utf8
-"""Train a convolutional neural network text classifier on the
-IMDB dataset, using the TextCategorizer component. The dataset will be loaded
-automatically via Thinc's built-in dataset loader. The model is added to
-spacy.pipeline, and predictions are available via `doc.cats`. For more details,
-see the documentation:
-* Training: https://spacy.io/usage/training
-
-Compatible with: spaCy v2.0.0+
-"""
-from __future__ import unicode_literals, print_function
-import plac
-import random
-from pathlib import Path
-import thinc.extra.datasets
-
-import spacy
-from spacy.util import minibatch, compounding
-
-
-@plac.annotations(
- model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
- output_dir=("Optional output directory", "option", "o", Path),
- n_texts=("Number of texts to train from", "option", "t", int),
- n_iter=("Number of training iterations", "option", "n", int),
- init_tok2vec=("Pretrained tok2vec weights", "option", "t2v", Path),
-)
-def main(model=None, output_dir=None, n_iter=20, n_texts=2000, init_tok2vec=None):
- if output_dir is not None:
- output_dir = Path(output_dir)
- if not output_dir.exists():
- output_dir.mkdir()
-
- if model is not None:
- nlp = spacy.load(model) # load existing spaCy model
- print("Loaded model '%s'" % model)
- else:
- nlp = spacy.blank("en") # create blank Language class
- print("Created blank 'en' model")
-
- # add the text classifier to the pipeline if it doesn't exist
- # nlp.create_pipe works for built-ins that are registered with spaCy
- if "textcat" not in nlp.pipe_names:
- textcat = nlp.create_pipe(
- "textcat", config={"exclusive_classes": True, "architecture": "simple_cnn"}
- )
- nlp.add_pipe(textcat, last=True)
- # otherwise, get it, so we can add labels to it
- else:
- textcat = nlp.get_pipe("textcat")
-
- # add label to text classifier
- textcat.add_label("POSITIVE")
- textcat.add_label("NEGATIVE")
-
- # load the IMDB dataset
- print("Loading IMDB data...")
- (train_texts, train_cats), (dev_texts, dev_cats) = load_data()
- train_texts = train_texts[:n_texts]
- train_cats = train_cats[:n_texts]
- print(
- "Using {} examples ({} training, {} evaluation)".format(
- n_texts, len(train_texts), len(dev_texts)
- )
- )
- train_data = list(zip(train_texts, [{"cats": cats} for cats in train_cats]))
-
- # get names of other pipes to disable them during training
- pipe_exceptions = ["textcat", "trf_wordpiecer", "trf_tok2vec"]
- other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
- with nlp.disable_pipes(*other_pipes): # only train textcat
- optimizer = nlp.begin_training()
- if init_tok2vec is not None:
- with init_tok2vec.open("rb") as file_:
- textcat.model.tok2vec.from_bytes(file_.read())
- print("Training the model...")
- print("{:^5}\t{:^5}\t{:^5}\t{:^5}".format("LOSS", "P", "R", "F"))
- batch_sizes = compounding(4.0, 32.0, 1.001)
- for i in range(n_iter):
- losses = {}
- # batch up the examples using spaCy's minibatch
- random.shuffle(train_data)
- batches = minibatch(train_data, size=batch_sizes)
- for batch in batches:
- texts, annotations = zip(*batch)
- nlp.update(texts, annotations, sgd=optimizer, drop=0.2, losses=losses)
- with textcat.model.use_params(optimizer.averages):
- # evaluate on the dev data split off in load_data()
- scores = evaluate(nlp.tokenizer, textcat, dev_texts, dev_cats)
- print(
- "{0:.3f}\t{1:.3f}\t{2:.3f}\t{3:.3f}".format( # print a simple table
- losses["textcat"],
- scores["textcat_p"],
- scores["textcat_r"],
- scores["textcat_f"],
- )
- )
-
- # test the trained model
- test_text = "This movie sucked"
- doc = nlp(test_text)
- print(test_text, doc.cats)
-
- if output_dir is not None:
- with nlp.use_params(optimizer.averages):
- nlp.to_disk(output_dir)
- print("Saved model to", output_dir)
-
- # test the saved model
- print("Loading from", output_dir)
- nlp2 = spacy.load(output_dir)
- doc2 = nlp2(test_text)
- print(test_text, doc2.cats)
-
-
-def load_data(limit=0, split=0.8):
- """Load data from the IMDB dataset."""
- # Partition off part of the train data for evaluation
- train_data, _ = thinc.extra.datasets.imdb()
- random.shuffle(train_data)
- train_data = train_data[-limit:]
- texts, labels = zip(*train_data)
- cats = [{"POSITIVE": bool(y), "NEGATIVE": not bool(y)} for y in labels]
- split = int(len(train_data) * split)
- return (texts[:split], cats[:split]), (texts[split:], cats[split:])
-
-
-def evaluate(tokenizer, textcat, texts, cats):
- docs = (tokenizer(text) for text in texts)
- tp = 0.0 # True positives
- fp = 1e-8 # False positives
- fn = 1e-8 # False negatives
- tn = 0.0 # True negatives
- for i, doc in enumerate(textcat.pipe(docs)):
- gold = cats[i]
- for label, score in doc.cats.items():
- if label not in gold:
- continue
- if label == "NEGATIVE":
- continue
- if score >= 0.5 and gold[label] >= 0.5:
- tp += 1.0
- elif score >= 0.5 and gold[label] < 0.5:
- fp += 1.0
- elif score < 0.5 and gold[label] < 0.5:
- tn += 1
- elif score < 0.5 and gold[label] >= 0.5:
- fn += 1
- precision = tp / (tp + fp)
- recall = tp / (tp + fn)
- if (precision + recall) == 0:
- f_score = 0.0
- else:
- f_score = 2 * (precision * recall) / (precision + recall)
- return {"textcat_p": precision, "textcat_r": recall, "textcat_f": f_score}
-
-
-if __name__ == "__main__":
- plac.call(main)
diff --git a/examples/vectors_fast_text.py b/examples/vectors_fast_text.py
deleted file mode 100644
index 9b34811f7..000000000
--- a/examples/vectors_fast_text.py
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# coding: utf8
-"""Load vectors for a language trained using fastText
-https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md
-Compatible with: spaCy v2.0.0+
-"""
-from __future__ import unicode_literals
-import plac
-import numpy
-
-import spacy
-from spacy.language import Language
-
-
-@plac.annotations(
- vectors_loc=("Path to .vec file", "positional", None, str),
- lang=(
- "Optional language ID. If not set, blank Language() will be used.",
- "positional",
- None,
- str,
- ),
-)
-def main(vectors_loc, lang=None):
- if lang is None:
- nlp = Language()
- else:
- # create empty language class – this is required if you're planning to
- # save the model to disk and load it back later (models always need a
- # "lang" setting). Use 'xx' for blank multi-language class.
- nlp = spacy.blank(lang)
- with open(vectors_loc, "rb") as file_:
- header = file_.readline()
- nr_row, nr_dim = header.split()
- nlp.vocab.reset_vectors(width=int(nr_dim))
- for line in file_:
- line = line.rstrip().decode("utf8")
- pieces = line.rsplit(" ", int(nr_dim))
- word = pieces[0]
- vector = numpy.asarray([float(v) for v in pieces[1:]], dtype="f")
- nlp.vocab.set_vector(word, vector) # add the vectors to the vocab
- # test the vectors and similarity
- text = "class colspan"
- doc = nlp(text)
- print(text, doc[0].similarity(doc[1]))
-
-
-if __name__ == "__main__":
- plac.call(main)
diff --git a/examples/vectors_tensorboard.py b/examples/vectors_tensorboard.py
deleted file mode 100644
index 72eda1edc..000000000
--- a/examples/vectors_tensorboard.py
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env python
-# coding: utf8
-"""Visualize spaCy word vectors in Tensorboard.
-
-Adapted from: https://gist.github.com/BrikerMan/7bd4e4bd0a00ac9076986148afc06507
-"""
-from __future__ import unicode_literals
-
-from os import path
-
-import tqdm
-import math
-import numpy
-import plac
-import spacy
-import tensorflow as tf
-from tensorflow.contrib.tensorboard.plugins.projector import (
- visualize_embeddings,
- ProjectorConfig,
-)
-
-
-@plac.annotations(
- vectors_loc=("Path to spaCy model that contains vectors", "positional", None, str),
- out_loc=(
- "Path to output folder for tensorboard session data",
- "positional",
- None,
- str,
- ),
- name=(
- "Human readable name for tsv file and vectors tensor",
- "positional",
- None,
- str,
- ),
-)
-def main(vectors_loc, out_loc, name="spaCy_vectors"):
- meta_file = "{}.tsv".format(name)
- out_meta_file = path.join(out_loc, meta_file)
-
- print("Loading spaCy vectors model: {}".format(vectors_loc))
- model = spacy.load(vectors_loc)
- print("Finding lexemes with vectors attached: {}".format(vectors_loc))
- strings_stream = tqdm.tqdm(
- model.vocab.strings, total=len(model.vocab.strings), leave=False
- )
- queries = [w for w in strings_stream if model.vocab.has_vector(w)]
- vector_count = len(queries)
-
- print(
- "Building Tensorboard Projector metadata for ({}) vectors: {}".format(
- vector_count, out_meta_file
- )
- )
-
- # Store vector data in a tensorflow variable
- tf_vectors_variable = numpy.zeros((vector_count, model.vocab.vectors.shape[1]))
-
- # Write a tab-separated file that contains information about the vectors for visualization
- #
- # Reference: https://www.tensorflow.org/programmers_guide/embedding#metadata
- with open(out_meta_file, "wb") as file_metadata:
- # Define columns in the first row
- file_metadata.write("Text\tFrequency\n".encode("utf-8"))
- # Write out a row for each vector that we add to the tensorflow variable we created
- vec_index = 0
- for text in tqdm.tqdm(queries, total=len(queries), leave=False):
- # https://github.com/tensorflow/tensorflow/issues/9094
- text = "" if text.lstrip() == "" else text
- lex = model.vocab[text]
-
- # Store vector data and metadata
- tf_vectors_variable[vec_index] = model.vocab.get_vector(text)
- file_metadata.write(
- "{}\t{}\n".format(text, math.exp(lex.prob) * vector_count).encode(
- "utf-8"
- )
- )
- vec_index += 1
-
- print("Running Tensorflow Session...")
- sess = tf.InteractiveSession()
- tf.Variable(tf_vectors_variable, trainable=False, name=name)
- tf.global_variables_initializer().run()
- saver = tf.train.Saver()
- writer = tf.summary.FileWriter(out_loc, sess.graph)
-
- # Link the embeddings into the config
- config = ProjectorConfig()
- embed = config.embeddings.add()
- embed.tensor_name = name
- embed.metadata_path = meta_file
-
- # Tell the projector about the configured embeddings and metadata file
- visualize_embeddings(writer, config)
-
- # Save session and print run command to the output
- print("Saving Tensorboard Session...")
- saver.save(sess, path.join(out_loc, "{}.ckpt".format(name)))
- print("Done. Run `tensorboard --logdir={0}` to view in Tensorboard".format(out_loc))
-
-
-if __name__ == "__main__":
- plac.call(main)
diff --git a/examples/training/ner_example_data/README.md b/extra/example_data/ner_example_data/README.md
similarity index 100%
rename from examples/training/ner_example_data/README.md
rename to extra/example_data/ner_example_data/README.md
diff --git a/examples/training/ner_example_data/ner-sent-per-line.iob b/extra/example_data/ner_example_data/ner-sent-per-line.iob
similarity index 100%
rename from examples/training/ner_example_data/ner-sent-per-line.iob
rename to extra/example_data/ner_example_data/ner-sent-per-line.iob
diff --git a/examples/training/ner_example_data/ner-sent-per-line.json b/extra/example_data/ner_example_data/ner-sent-per-line.json
similarity index 100%
rename from examples/training/ner_example_data/ner-sent-per-line.json
rename to extra/example_data/ner_example_data/ner-sent-per-line.json
diff --git a/examples/training/ner_example_data/ner-token-per-line-conll2003.iob b/extra/example_data/ner_example_data/ner-token-per-line-conll2003.iob
similarity index 100%
rename from examples/training/ner_example_data/ner-token-per-line-conll2003.iob
rename to extra/example_data/ner_example_data/ner-token-per-line-conll2003.iob
diff --git a/examples/training/ner_example_data/ner-token-per-line-conll2003.json b/extra/example_data/ner_example_data/ner-token-per-line-conll2003.json
similarity index 100%
rename from examples/training/ner_example_data/ner-token-per-line-conll2003.json
rename to extra/example_data/ner_example_data/ner-token-per-line-conll2003.json
diff --git a/examples/training/ner_example_data/ner-token-per-line-with-pos.iob b/extra/example_data/ner_example_data/ner-token-per-line-with-pos.iob
similarity index 100%
rename from examples/training/ner_example_data/ner-token-per-line-with-pos.iob
rename to extra/example_data/ner_example_data/ner-token-per-line-with-pos.iob
diff --git a/examples/training/ner_example_data/ner-token-per-line-with-pos.json b/extra/example_data/ner_example_data/ner-token-per-line-with-pos.json
similarity index 100%
rename from examples/training/ner_example_data/ner-token-per-line-with-pos.json
rename to extra/example_data/ner_example_data/ner-token-per-line-with-pos.json
diff --git a/examples/training/ner_example_data/ner-token-per-line.iob b/extra/example_data/ner_example_data/ner-token-per-line.iob
similarity index 100%
rename from examples/training/ner_example_data/ner-token-per-line.iob
rename to extra/example_data/ner_example_data/ner-token-per-line.iob
diff --git a/examples/training/ner_example_data/ner-token-per-line.json b/extra/example_data/ner_example_data/ner-token-per-line.json
similarity index 100%
rename from examples/training/ner_example_data/ner-token-per-line.json
rename to extra/example_data/ner_example_data/ner-token-per-line.json
diff --git a/examples/training/textcat_example_data/CC0.txt b/extra/example_data/textcat_example_data/CC0.txt
similarity index 100%
rename from examples/training/textcat_example_data/CC0.txt
rename to extra/example_data/textcat_example_data/CC0.txt
diff --git a/examples/training/textcat_example_data/CC_BY-SA-3.0.txt b/extra/example_data/textcat_example_data/CC_BY-SA-3.0.txt
similarity index 100%
rename from examples/training/textcat_example_data/CC_BY-SA-3.0.txt
rename to extra/example_data/textcat_example_data/CC_BY-SA-3.0.txt
diff --git a/examples/training/textcat_example_data/CC_BY-SA-4.0.txt b/extra/example_data/textcat_example_data/CC_BY-SA-4.0.txt
similarity index 100%
rename from examples/training/textcat_example_data/CC_BY-SA-4.0.txt
rename to extra/example_data/textcat_example_data/CC_BY-SA-4.0.txt
diff --git a/examples/training/textcat_example_data/README.md b/extra/example_data/textcat_example_data/README.md
similarity index 100%
rename from examples/training/textcat_example_data/README.md
rename to extra/example_data/textcat_example_data/README.md
diff --git a/examples/training/textcat_example_data/cooking.json b/extra/example_data/textcat_example_data/cooking.json
similarity index 100%
rename from examples/training/textcat_example_data/cooking.json
rename to extra/example_data/textcat_example_data/cooking.json
diff --git a/examples/training/textcat_example_data/cooking.jsonl b/extra/example_data/textcat_example_data/cooking.jsonl
similarity index 100%
rename from examples/training/textcat_example_data/cooking.jsonl
rename to extra/example_data/textcat_example_data/cooking.jsonl
diff --git a/examples/training/textcat_example_data/jigsaw-toxic-comment.json b/extra/example_data/textcat_example_data/jigsaw-toxic-comment.json
similarity index 100%
rename from examples/training/textcat_example_data/jigsaw-toxic-comment.json
rename to extra/example_data/textcat_example_data/jigsaw-toxic-comment.json
diff --git a/examples/training/textcat_example_data/jigsaw-toxic-comment.jsonl b/extra/example_data/textcat_example_data/jigsaw-toxic-comment.jsonl
similarity index 100%
rename from examples/training/textcat_example_data/jigsaw-toxic-comment.jsonl
rename to extra/example_data/textcat_example_data/jigsaw-toxic-comment.jsonl
diff --git a/examples/training/textcat_example_data/textcatjsonl_to_trainjson.py b/extra/example_data/textcat_example_data/textcatjsonl_to_trainjson.py
similarity index 92%
rename from examples/training/textcat_example_data/textcatjsonl_to_trainjson.py
rename to extra/example_data/textcat_example_data/textcatjsonl_to_trainjson.py
index 339ce39be..66d96ff68 100644
--- a/examples/training/textcat_example_data/textcatjsonl_to_trainjson.py
+++ b/extra/example_data/textcat_example_data/textcatjsonl_to_trainjson.py
@@ -5,16 +5,17 @@ from spacy.gold import docs_to_json
import srsly
import sys
+
@plac.annotations(
model=("Model name. Defaults to 'en'.", "option", "m", str),
input_file=("Input file (jsonl)", "positional", None, Path),
output_dir=("Output directory", "positional", None, Path),
n_texts=("Number of texts to convert", "option", "t", int),
)
-def convert(model='en', input_file=None, output_dir=None, n_texts=0):
+def convert(model="en", input_file=None, output_dir=None, n_texts=0):
# Load model with tokenizer + sentencizer only
nlp = spacy.load(model)
- nlp.disable_pipes(*nlp.pipe_names)
+ nlp.select_pipes(disable=nlp.pipe_names)
sentencizer = nlp.create_pipe("sentencizer")
nlp.add_pipe(sentencizer, first=True)
@@ -49,5 +50,6 @@ def convert(model='en', input_file=None, output_dir=None, n_texts=0):
srsly.write_json(output_dir / input_file.with_suffix(".json"), [docs_to_json(docs)])
+
if __name__ == "__main__":
plac.call(convert)
diff --git a/examples/training/training-data.json b/extra/example_data/training-data.json
similarity index 100%
rename from examples/training/training-data.json
rename to extra/example_data/training-data.json
diff --git a/examples/training/vocab-data.jsonl b/extra/example_data/vocab-data.jsonl
similarity index 100%
rename from examples/training/vocab-data.jsonl
rename to extra/example_data/vocab-data.jsonl
diff --git a/extra/experiments/onto-joint/defaults.cfg b/extra/experiments/onto-joint/defaults.cfg
new file mode 100644
index 000000000..7954b57b5
--- /dev/null
+++ b/extra/experiments/onto-joint/defaults.cfg
@@ -0,0 +1,133 @@
+[paths]
+train = ""
+dev = ""
+raw = null
+init_tok2vec = null
+
+[system]
+seed = 0
+use_pytorch_for_gpu_memory = false
+
+[training]
+seed = ${system:seed}
+dropout = 0.1
+init_tok2vec = ${paths:init_tok2vec}
+vectors = null
+accumulate_gradient = 1
+max_steps = 0
+max_epochs = 0
+patience = 10000
+eval_frequency = 200
+score_weights = {"dep_las": 0.4, "ents_f": 0.4, "tag_acc": 0.2}
+frozen_components = []
+
+[training.train_corpus]
+@readers = "spacy.Corpus.v1"
+path = ${paths:train}
+gold_preproc = true
+max_length = 0
+limit = 0
+
+[training.dev_corpus]
+@readers = "spacy.Corpus.v1"
+path = ${paths:dev}
+gold_preproc = ${training.read_train:gold_preproc}
+max_length = 0
+limit = 0
+
+[training.batcher]
+@batchers = "spacy.batch_by_words.v1"
+discard_oversize = false
+tolerance = 0.2
+
+[training.batcher.size]
+@schedules = "compounding.v1"
+start = 100
+stop = 1000
+compound = 1.001
+
+[training.optimizer]
+@optimizers = "Adam.v1"
+beta1 = 0.9
+beta2 = 0.999
+L2_is_weight_decay = true
+L2 = 0.01
+grad_clip = 1.0
+use_averages = false
+eps = 1e-8
+learn_rate = 0.001
+
+[nlp]
+lang = "en"
+load_vocab_data = false
+pipeline = ["tok2vec", "ner", "tagger", "parser"]
+
+[nlp.tokenizer]
+@tokenizers = "spacy.Tokenizer.v1"
+
+[nlp.lemmatizer]
+@lemmatizers = "spacy.Lemmatizer.v1"
+
+[components]
+
+[components.tok2vec]
+factory = "tok2vec"
+
+[components.ner]
+factory = "ner"
+learn_tokens = false
+min_action_freq = 1
+
+[components.tagger]
+factory = "tagger"
+
+[components.parser]
+factory = "parser"
+learn_tokens = false
+min_action_freq = 30
+
+[components.tagger.model]
+@architectures = "spacy.Tagger.v1"
+
+[components.tagger.model.tok2vec]
+@architectures = "spacy.Tok2VecListener.v1"
+width = ${components.tok2vec.model.encode:width}
+
+[components.parser.model]
+@architectures = "spacy.TransitionBasedParser.v1"
+nr_feature_tokens = 8
+hidden_width = 128
+maxout_pieces = 2
+use_upper = true
+
+[components.parser.model.tok2vec]
+@architectures = "spacy.Tok2VecListener.v1"
+width = ${components.tok2vec.model.encode:width}
+
+[components.ner.model]
+@architectures = "spacy.TransitionBasedParser.v1"
+nr_feature_tokens = 3
+hidden_width = 128
+maxout_pieces = 2
+use_upper = true
+
+[components.ner.model.tok2vec]
+@architectures = "spacy.Tok2VecListener.v1"
+width = ${components.tok2vec.model.encode:width}
+
+[components.tok2vec.model]
+@architectures = "spacy.Tok2Vec.v1"
+
+[components.tok2vec.model.embed]
+@architectures = "spacy.MultiHashEmbed.v1"
+width = ${components.tok2vec.model.encode:width}
+rows = 2000
+also_embed_subwords = true
+also_use_static_vectors = false
+
+[components.tok2vec.model.encode]
+@architectures = "spacy.MaxoutWindowEncoder.v1"
+width = 96
+depth = 4
+window_size = 1
+maxout_pieces = 3
diff --git a/extra/experiments/onto-joint/pretrain.cfg b/extra/experiments/onto-joint/pretrain.cfg
new file mode 100644
index 000000000..211339603
--- /dev/null
+++ b/extra/experiments/onto-joint/pretrain.cfg
@@ -0,0 +1,152 @@
+# Training hyper-parameters and additional features.
+[training]
+# Whether to train on sequences with 'gold standard' sentence boundaries
+# and tokens. If you set this to true, take care to ensure your run-time
+# data is passed in sentence-by-sentence via some prior preprocessing.
+gold_preproc = false
+# Limitations on training document length or number of examples.
+max_length = 0
+limit = 0
+# Data augmentation
+orth_variant_level = 0.0
+dropout = 0.1
+# Controls early-stopping. 0 or -1 mean unlimited.
+patience = 1600
+max_epochs = 0
+max_steps = 20000
+eval_frequency = 400
+# Other settings
+seed = 0
+accumulate_gradient = 1
+use_pytorch_for_gpu_memory = false
+# Control how scores are printed and checkpoints are evaluated.
+scores = ["speed", "tags_acc", "uas", "las", "ents_f"]
+score_weights = {"las": 0.4, "ents_f": 0.4, "tags_acc": 0.2}
+# These settings are invalid for the transformer models.
+init_tok2vec = null
+discard_oversize = false
+omit_extra_lookups = false
+batch_by = "words"
+use_gpu = -1
+raw_text = null
+tag_map = null
+
+[training.batch_size]
+@schedules = "compounding.v1"
+start = 1000
+stop = 1000
+compound = 1.001
+
+[training.optimizer]
+@optimizers = "Adam.v1"
+beta1 = 0.9
+beta2 = 0.999
+L2_is_weight_decay = true
+L2 = 0.01
+grad_clip = 1.0
+use_averages = true
+eps = 1e-8
+learn_rate = 0.001
+
+[pretraining]
+max_epochs = 1000
+min_length = 5
+max_length = 500
+dropout = 0.2
+n_save_every = null
+batch_size = 3000
+seed = ${training:seed}
+use_pytorch_for_gpu_memory = ${training:use_pytorch_for_gpu_memory}
+tok2vec_model = "nlp.pipeline.tok2vec.model"
+
+[pretraining.objective]
+type = "characters"
+n_characters = 4
+
+[pretraining.optimizer]
+@optimizers = "Adam.v1"
+beta1 = 0.9
+beta2 = 0.999
+L2_is_weight_decay = true
+L2 = 0.01
+grad_clip = 1.0
+use_averages = true
+eps = 1e-8
+learn_rate = 0.001
+
+[nlp]
+lang = "en"
+vectors = null
+base_model = null
+
+[nlp.pipeline]
+
+[nlp.pipeline.tok2vec]
+factory = "tok2vec"
+
+[nlp.pipeline.senter]
+factory = "senter"
+
+[nlp.pipeline.ner]
+factory = "ner"
+learn_tokens = false
+min_action_freq = 1
+beam_width = 1
+beam_update_prob = 1.0
+
+[nlp.pipeline.tagger]
+factory = "tagger"
+
+[nlp.pipeline.parser]
+factory = "parser"
+learn_tokens = false
+min_action_freq = 1
+beam_width = 1
+beam_update_prob = 1.0
+
+[nlp.pipeline.senter.model]
+@architectures = "spacy.Tagger.v1"
+
+[nlp.pipeline.senter.model.tok2vec]
+@architectures = "spacy.Tok2VecTensors.v1"
+width = ${nlp.pipeline.tok2vec.model:width}
+
+[nlp.pipeline.tagger.model]
+@architectures = "spacy.Tagger.v1"
+
+[nlp.pipeline.tagger.model.tok2vec]
+@architectures = "spacy.Tok2VecTensors.v1"
+width = ${nlp.pipeline.tok2vec.model:width}
+
+[nlp.pipeline.parser.model]
+@architectures = "spacy.TransitionBasedParser.v1"
+nr_feature_tokens = 8
+hidden_width = 128
+maxout_pieces = 3
+use_upper = false
+
+[nlp.pipeline.parser.model.tok2vec]
+@architectures = "spacy.Tok2VecTensors.v1"
+width = ${nlp.pipeline.tok2vec.model:width}
+
+[nlp.pipeline.ner.model]
+@architectures = "spacy.TransitionBasedParser.v1"
+nr_feature_tokens = 3
+hidden_width = 128
+maxout_pieces = 3
+use_upper = false
+
+[nlp.pipeline.ner.model.tok2vec]
+@architectures = "spacy.Tok2VecTensors.v1"
+width = ${nlp.pipeline.tok2vec.model:width}
+
+[nlp.pipeline.tok2vec.model]
+@architectures = "spacy.HashEmbedCNN.v1"
+pretrained_vectors = ${nlp:vectors}
+width = 256
+depth = 6
+window_size = 1
+embed_size = 10000
+maxout_pieces = 3
+subword_features = true
+dropout = null
diff --git a/extra/experiments/onto-ner.cfg b/extra/experiments/onto-ner.cfg
new file mode 100644
index 000000000..eab68a27f
--- /dev/null
+++ b/extra/experiments/onto-ner.cfg
@@ -0,0 +1,73 @@
+# Training hyper-parameters and additional features.
+[training]
+# Whether to train on sequences with 'gold standard' sentence boundaries
+# and tokens. If you set this to true, take care to ensure your run-time
+# data is passed in sentence-by-sentence via some prior preprocessing.
+gold_preproc = false
+# Limitations on training document length or number of examples.
+max_length = 3000
+limit = 0
+# Data augmentation
+orth_variant_level = 0.0
+dropout = 0.1
+# Controls early-stopping. 0 or -1 mean unlimited.
+patience = 100000
+max_epochs = 0
+max_steps = 0
+eval_frequency = 1000
+# Other settings
+seed = 0
+accumulate_gradient = 1
+use_pytorch_for_gpu_memory = false
+# Control how scores are printed and checkpoints are evaluated.
+scores = ["speed", "ents_p", "ents_r", "ents_f"]
+score_weights = {"ents_f": 1.0}
+# These settings are invalid for the transformer models.
+init_tok2vec = null
+discard_oversize = false
+omit_extra_lookups = false
+batch_by = "words"
+
+[training.batch_size]
+@schedules = "compounding.v1"
+start = 100
+stop = 1000
+compound = 1.001
+
+[training.optimizer]
+@optimizers = "Adam.v1"
+beta1 = 0.9
+beta2 = 0.999
+L2_is_weight_decay = true
+L2 = 0.01
+grad_clip = 1.0
+use_averages = true
+eps = 1e-8
+learn_rate = 0.001
+
+[nlp]
+lang = "en"
+vectors = null
+
+[nlp.pipeline.ner]
+factory = "ner"
+learn_tokens = false
+min_action_freq = 1
+
+[nlp.pipeline.ner.model]
+@architectures = "spacy.TransitionBasedParser.v1"
+nr_feature_tokens = 3
+hidden_width = 64
+maxout_pieces = 2
+use_upper = true
+
+[nlp.pipeline.ner.model.tok2vec]
+@architectures = "spacy.HashEmbedCNN.v1"
+pretrained_vectors = ${nlp:vectors}
+width = 96
+depth = 4
+window_size = 1
+embed_size = 2000
+maxout_pieces = 3
+subword_features = true
+dropout = ${training:dropout}
diff --git a/extra/experiments/ptb-joint-pos-dep/bilstm_tok2vec.cfg b/extra/experiments/ptb-joint-pos-dep/bilstm_tok2vec.cfg
new file mode 100644
index 000000000..f1b702a4e
--- /dev/null
+++ b/extra/experiments/ptb-joint-pos-dep/bilstm_tok2vec.cfg
@@ -0,0 +1,73 @@
+[training]
+patience = 10000
+eval_frequency = 200
+dropout = 0.2
+init_tok2vec = null
+vectors = null
+max_epochs = 100
+orth_variant_level = 0.0
+gold_preproc = true
+max_length = 0
+use_gpu = 0
+scores = ["tags_acc", "uas", "las"]
+score_weights = {"las": 0.8, "tags_acc": 0.2}
+limit = 0
+seed = 0
+accumulate_gradient = 2
+discard_oversize = false
+
+[training.batch_size]
+@schedules = "compounding.v1"
+start = 100
+stop = 1000
+compound = 1.001
+
+[training.optimizer]
+@optimizers = "Adam.v1"
+learn_rate = 0.001
+beta1 = 0.9
+beta2 = 0.999
+
+[nlp]
+lang = "en"
+vectors = ${training:vectors}
+
+[nlp.pipeline.tok2vec]
+factory = "tok2vec"
+
+[nlp.pipeline.tagger]
+factory = "tagger"
+
+[nlp.pipeline.parser]
+factory = "parser"
+learn_tokens = false
+min_action_freq = 1
+beam_width = 1
+beam_update_prob = 1.0
+
+[nlp.pipeline.tagger.model]
+@architectures = "spacy.Tagger.v1"
+
+[nlp.pipeline.tagger.model.tok2vec]
+@architectures = "spacy.Tok2VecTensors.v1"
+width = ${nlp.pipeline.tok2vec.model:width}
+
+[nlp.pipeline.parser.model]
+@architectures = "spacy.TransitionBasedParser.v1"
+nr_feature_tokens = 8
+hidden_width = 64
+maxout_pieces = 3
+
+[nlp.pipeline.parser.model.tok2vec]
+@architectures = "spacy.Tok2VecTensors.v1"
+width = ${nlp.pipeline.tok2vec.model:width}
+
+[nlp.pipeline.tok2vec.model]
+@architectures = "spacy.HashEmbedBiLSTM.v1"
+pretrained_vectors = ${nlp:vectors}
+width = 96
+depth = 4
+embed_size = 2000
+subword_features = true
+maxout_pieces = 3
+dropout = null
diff --git a/extra/experiments/ptb-joint-pos-dep/defaults.cfg b/extra/experiments/ptb-joint-pos-dep/defaults.cfg
new file mode 100644
index 000000000..8f9c5666e
--- /dev/null
+++ b/extra/experiments/ptb-joint-pos-dep/defaults.cfg
@@ -0,0 +1,110 @@
+[paths]
+train = ""
+dev = ""
+raw = null
+init_tok2vec = null
+
+[system]
+seed = 0
+use_pytorch_for_gpu_memory = false
+
+[training]
+seed = ${system:seed}
+dropout = 0.2
+init_tok2vec = ${paths:init_tok2vec}
+vectors = null
+accumulate_gradient = 1
+max_steps = 0
+max_epochs = 0
+patience = 10000
+eval_frequency = 200
+score_weights = {"dep_las": 0.8, "tag_acc": 0.2}
+
+[training.read_train]
+@readers = "spacy.Corpus.v1"
+path = ${paths:train}
+gold_preproc = true
+max_length = 0
+limit = 0
+
+[training.read_dev]
+@readers = "spacy.Corpus.v1"
+path = ${paths:dev}
+gold_preproc = ${training.read_train:gold_preproc}
+max_length = 0
+limit = 0
+
+[training.batcher]
+@batchers = "spacy.batch_by_words.v1"
+discard_oversize = false
+tolerance = 0.2
+
+[training.batcher.size]
+@schedules = "compounding.v1"
+start = 100
+stop = 1000
+compound = 1.001
+
+[training.optimizer]
+@optimizers = "Adam.v1"
+learn_rate = 0.001
+beta1 = 0.9
+beta2 = 0.999
+
+[nlp]
+lang = "en"
+pipeline = ["tok2vec", "tagger", "parser"]
+load_vocab_data = false
+
+[nlp.tokenizer]
+@tokenizers = "spacy.Tokenizer.v1"
+
+[nlp.lemmatizer]
+@lemmatizers = "spacy.Lemmatizer.v1"
+
+[components]
+
+[components.tok2vec]
+factory = "tok2vec"
+
+[components.tagger]
+factory = "tagger"
+
+[components.parser]
+factory = "parser"
+learn_tokens = false
+min_action_freq = 1
+
+[components.tagger.model]
+@architectures = "spacy.Tagger.v1"
+
+[components.tagger.model.tok2vec]
+@architectures = "spacy.Tok2VecListener.v1"
+width = ${components.tok2vec.model.encode:width}
+
+[components.parser.model]
+@architectures = "spacy.TransitionBasedParser.v1"
+nr_feature_tokens = 8
+hidden_width = 64
+maxout_pieces = 3
+
+[components.parser.model.tok2vec]
+@architectures = "spacy.Tok2VecListener.v1"
+width = ${components.tok2vec.model.encode:width}
+
+[components.tok2vec.model]
+@architectures = "spacy.Tok2Vec.v1"
+
+[components.tok2vec.model.embed]
+@architectures = "spacy.MultiHashEmbed.v1"
+width = ${components.tok2vec.model.encode:width}
+rows = 2000
+also_embed_subwords = true
+also_use_static_vectors = false
+
+[components.tok2vec.model.encode]
+@architectures = "spacy.MaxoutWindowEncoder.v1"
+width = 96
+depth = 4
+window_size = 1
+maxout_pieces = 3
diff --git a/extra/experiments/tok2vec-ner/charembed_tok2vec.cfg b/extra/experiments/tok2vec-ner/charembed_tok2vec.cfg
new file mode 100644
index 000000000..eca6a22fa
--- /dev/null
+++ b/extra/experiments/tok2vec-ner/charembed_tok2vec.cfg
@@ -0,0 +1,69 @@
+[training]
+use_gpu = -1
+limit = 0
+dropout = 0.2
+patience = 10000
+eval_frequency = 200
+scores = ["ents_f"]
+score_weights = {"ents_f": 1}
+orth_variant_level = 0.0
+gold_preproc = true
+max_length = 0
+batch_size = 25
+seed = 0
+accumulate_gradient = 2
+discard_oversize = false
+
+[training.optimizer]
+@optimizers = "Adam.v1"
+learn_rate = 0.001
+beta1 = 0.9
+beta2 = 0.999
+
+[nlp]
+lang = "en"
+vectors = null
+
+[nlp.pipeline.tok2vec]
+factory = "tok2vec"
+
+[nlp.pipeline.tok2vec.model]
+@architectures = "spacy.Tok2Vec.v1"
+
+[nlp.pipeline.tok2vec.model.extract]
+@architectures = "spacy.CharacterEmbed.v1"
+width = 96
+nM = 64
+nC = 8
+rows = 2000
+columns = ["ID", "NORM", "PREFIX", "SUFFIX", "SHAPE", "ORTH"]
+dropout = null
+
+[nlp.pipeline.tok2vec.model.extract.features]
+@architectures = "spacy.Doc2Feats.v1"
+columns = ${nlp.pipeline.tok2vec.model.extract:columns}
+
+[nlp.pipeline.tok2vec.model.embed]
+@architectures = "spacy.LayerNormalizedMaxout.v1"
+width = ${nlp.pipeline.tok2vec.model.extract:width}
+maxout_pieces = 4
+
+[nlp.pipeline.tok2vec.model.encode]
+@architectures = "spacy.MaxoutWindowEncoder.v1"
+width = ${nlp.pipeline.tok2vec.model.extract:width}
+window_size = 1
+maxout_pieces = 2
+depth = 2
+
+[nlp.pipeline.ner]
+factory = "ner"
+
+[nlp.pipeline.ner.model]
+@architectures = "spacy.TransitionBasedParser.v1"
+nr_feature_tokens = 6
+hidden_width = 64
+maxout_pieces = 2
+
+[nlp.pipeline.ner.model.tok2vec]
+@architectures = "spacy.Tok2VecTensors.v1"
+width = ${nlp.pipeline.tok2vec.model.extract:width}
diff --git a/extra/experiments/tok2vec-ner/multihashembed_tok2vec.cfg b/extra/experiments/tok2vec-ner/multihashembed_tok2vec.cfg
new file mode 100644
index 000000000..a5fa32b18
--- /dev/null
+++ b/extra/experiments/tok2vec-ner/multihashembed_tok2vec.cfg
@@ -0,0 +1,48 @@
+[training]
+use_gpu = -1
+limit = 0
+dropout = 0.2
+patience = 10000
+eval_frequency = 200
+scores = ["ents_p", "ents_r", "ents_f"]
+score_weights = {"ents_f": 1}
+orth_variant_level = 0.0
+gold_preproc = true
+max_length = 0
+seed = 0
+accumulate_gradient = 2
+discard_oversize = false
+
+[training.batch_size]
+@schedules = "compounding.v1"
+start = 3000
+stop = 3000
+compound = 1.001
+
+
+[training.optimizer]
+@optimizers = "Adam.v1"
+learn_rate = 0.001
+beta1 = 0.9
+beta2 = 0.999
+
+[nlp]
+lang = "en"
+vectors = null
+
+[nlp.pipeline.ner]
+factory = "simple_ner"
+
+[nlp.pipeline.ner.model]
+@architectures = "spacy.BiluoTagger.v1"
+
+[nlp.pipeline.ner.model.tok2vec]
+@architectures = "spacy.HashEmbedCNN.v1"
+width = 128
+depth = 4
+embed_size = 7000
+maxout_pieces = 3
+window_size = 1
+subword_features = true
+pretrained_vectors = null
+dropout = null
diff --git a/fabfile.py b/fabfile.py
deleted file mode 100644
index fcab493f5..000000000
--- a/fabfile.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals, print_function
-
-import contextlib
-from pathlib import Path
-from fabric.api import local, lcd, env, settings, prefix
-from os import path, environ
-import shutil
-import sys
-
-
-PWD = path.dirname(__file__)
-ENV = environ["VENV_DIR"] if "VENV_DIR" in environ else ".env"
-VENV_DIR = Path(PWD) / ENV
-
-
-@contextlib.contextmanager
-def virtualenv(name, create=False, python="/usr/bin/python3.6"):
- python = Path(python).resolve()
- env_path = VENV_DIR
- if create:
- if env_path.exists():
- shutil.rmtree(str(env_path))
- local("{python} -m venv {env_path}".format(python=python, env_path=VENV_DIR))
-
- def wrapped_local(cmd, env_vars=[], capture=False, direct=False):
- return local(
- "source {}/bin/activate && {}".format(env_path, cmd),
- shell="/bin/bash",
- capture=False,
- )
-
- yield wrapped_local
-
-
-def env(lang="python3.6"):
- if VENV_DIR.exists():
- local("rm -rf {env}".format(env=VENV_DIR))
- if lang.startswith("python3"):
- local("{lang} -m venv {env}".format(lang=lang, env=VENV_DIR))
- else:
- local("{lang} -m pip install virtualenv --no-cache-dir".format(lang=lang))
- local(
- "{lang} -m virtualenv {env} --no-cache-dir".format(lang=lang, env=VENV_DIR)
- )
- with virtualenv(VENV_DIR) as venv_local:
- print(venv_local("python --version", capture=True))
- venv_local("pip install --upgrade setuptools --no-cache-dir")
- venv_local("pip install pytest --no-cache-dir")
- venv_local("pip install wheel --no-cache-dir")
- venv_local("pip install -r requirements.txt --no-cache-dir")
- venv_local("pip install pex --no-cache-dir")
-
-
-def install():
- with virtualenv(VENV_DIR) as venv_local:
- venv_local("pip install dist/*.tar.gz")
-
-
-def make():
- with lcd(path.dirname(__file__)):
- local(
- "export PYTHONPATH=`pwd` && source .env/bin/activate && python setup.py build_ext --inplace",
- shell="/bin/bash",
- )
-
-
-def sdist():
- with virtualenv(VENV_DIR) as venv_local:
- with lcd(path.dirname(__file__)):
- venv_local("python -m pip install -U setuptools srsly")
- venv_local("python setup.py sdist")
-
-
-def wheel():
- with virtualenv(VENV_DIR) as venv_local:
- with lcd(path.dirname(__file__)):
- venv_local("python setup.py bdist_wheel")
-
-
-def pex():
- with virtualenv(VENV_DIR) as venv_local:
- with lcd(path.dirname(__file__)):
- sha = local("git rev-parse --short HEAD", capture=True)
- venv_local(
- "pex dist/*.whl -e spacy -o dist/spacy-%s.pex" % sha, direct=True
- )
-
-
-def clean():
- with lcd(path.dirname(__file__)):
- local("rm -f dist/*.whl")
- local("rm -f dist/*.pex")
- with virtualenv(VENV_DIR) as venv_local:
- venv_local("python setup.py clean --all")
-
-
-def test():
- with virtualenv(VENV_DIR) as venv_local:
- with lcd(path.dirname(__file__)):
- venv_local("pytest -x spacy/tests")
-
-
-def train():
- args = environ.get("SPACY_TRAIN_ARGS", "")
- with virtualenv(VENV_DIR) as venv_local:
- venv_local("spacy train {args}".format(args=args))
-
-
-def conll17(treebank_dir, experiment_dir, vectors_dir, config, corpus=""):
- is_not_clean = local("git status --porcelain", capture=True)
- if is_not_clean:
- print("Repository is not clean")
- print(is_not_clean)
- sys.exit(1)
- git_sha = local("git rev-parse --short HEAD", capture=True)
- config_checksum = local("sha256sum {config}".format(config=config), capture=True)
- experiment_dir = Path(experiment_dir) / "{}--{}".format(
- config_checksum[:6], git_sha
- )
- if not experiment_dir.exists():
- experiment_dir.mkdir()
- test_data_dir = Path(treebank_dir) / "ud-test-v2.0-conll2017"
- assert test_data_dir.exists()
- assert test_data_dir.is_dir()
- if corpus:
- corpora = [corpus]
- else:
- corpora = ["UD_English", "UD_Chinese", "UD_Japanese", "UD_Vietnamese"]
-
- local(
- "cp {config} {experiment_dir}/config.json".format(
- config=config, experiment_dir=experiment_dir
- )
- )
- with virtualenv(VENV_DIR) as venv_local:
- for corpus in corpora:
- venv_local(
- "spacy ud-train {treebank_dir} {experiment_dir} {config} {corpus} -v {vectors_dir}".format(
- treebank_dir=treebank_dir,
- experiment_dir=experiment_dir,
- config=config,
- corpus=corpus,
- vectors_dir=vectors_dir,
- )
- )
- venv_local(
- "spacy ud-run-test {test_data_dir} {experiment_dir} {corpus}".format(
- test_data_dir=test_data_dir,
- experiment_dir=experiment_dir,
- config=config,
- corpus=corpus,
- )
- )
diff --git a/include/msvc9/stdint.h b/include/msvc9/stdint.h
deleted file mode 100644
index 4fe0ef9a9..000000000
--- a/include/msvc9/stdint.h
+++ /dev/null
@@ -1,259 +0,0 @@
-// ISO C9x compliant stdint.h for Microsoft Visual Studio
-// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
-//
-// Copyright (c) 2006-2013 Alexander Chemeris
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// 1. Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the product nor the names of its contributors may
-// be used to endorse or promote products derived from this software
-// without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
-// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef _MSC_VER // [
-#error "Use this header only with Microsoft Visual C++ compilers!"
-#endif // _MSC_VER ]
-
-#ifndef _MSC_STDINT_H_ // [
-#define _MSC_STDINT_H_
-
-#if _MSC_VER > 1000
-#pragma once
-#endif
-
-#if _MSC_VER >= 1600 // [
-#include
-#else // ] _MSC_VER >= 1600 [
-
-#include
-
-// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
-// compiling for ARM we should wrap include with 'extern "C++" {}'
-// or compiler give many errors like this:
-// error C2733: second C linkage of overloaded function 'wmemchr' not allowed
-#ifdef __cplusplus
-extern "C" {
-#endif
-# include
-#ifdef __cplusplus
-}
-#endif
-
-// Define _W64 macros to mark types changing their size, like intptr_t.
-#ifndef _W64
-# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
-# define _W64 __w64
-# else
-# define _W64
-# endif
-#endif
-
-
-// 7.18.1 Integer types
-
-// 7.18.1.1 Exact-width integer types
-
-// Visual Studio 6 and Embedded Visual C++ 4 doesn't
-// realize that, e.g. char has the same size as __int8
-// so we give up on __intX for them.
-#if (_MSC_VER < 1300)
- typedef signed char int8_t;
- typedef signed short int16_t;
- typedef signed int int32_t;
- typedef unsigned char uint8_t;
- typedef unsigned short uint16_t;
- typedef unsigned int uint32_t;
-#else
- typedef signed __int8 int8_t;
- typedef signed __int16 int16_t;
- typedef signed __int32 int32_t;
- typedef unsigned __int8 uint8_t;
- typedef unsigned __int16 uint16_t;
- typedef unsigned __int32 uint32_t;
-#endif
-typedef signed __int64 int64_t;
-typedef unsigned __int64 uint64_t;
-
-
-// 7.18.1.2 Minimum-width integer types
-typedef int8_t int_least8_t;
-typedef int16_t int_least16_t;
-typedef int32_t int_least32_t;
-typedef int64_t int_least64_t;
-typedef uint8_t uint_least8_t;
-typedef uint16_t uint_least16_t;
-typedef uint32_t uint_least32_t;
-typedef uint64_t uint_least64_t;
-
-// 7.18.1.3 Fastest minimum-width integer types
-typedef int8_t int_fast8_t;
-typedef int16_t int_fast16_t;
-typedef int32_t int_fast32_t;
-typedef int64_t int_fast64_t;
-typedef uint8_t uint_fast8_t;
-typedef uint16_t uint_fast16_t;
-typedef uint32_t uint_fast32_t;
-typedef uint64_t uint_fast64_t;
-
-// 7.18.1.4 Integer types capable of holding object pointers
-#ifdef _WIN64 // [
- typedef signed __int64 intptr_t;
- typedef unsigned __int64 uintptr_t;
-#else // _WIN64 ][
- typedef _W64 signed int intptr_t;
- typedef _W64 unsigned int uintptr_t;
-#endif // _WIN64 ]
-
-// 7.18.1.5 Greatest-width integer types
-typedef int64_t intmax_t;
-typedef uint64_t uintmax_t;
-
-
-// 7.18.2 Limits of specified-width integer types
-
-#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259
-
-// 7.18.2.1 Limits of exact-width integer types
-#define INT8_MIN ((int8_t)_I8_MIN)
-#define INT8_MAX _I8_MAX
-#define INT16_MIN ((int16_t)_I16_MIN)
-#define INT16_MAX _I16_MAX
-#define INT32_MIN ((int32_t)_I32_MIN)
-#define INT32_MAX _I32_MAX
-#define INT64_MIN ((int64_t)_I64_MIN)
-#define INT64_MAX _I64_MAX
-#define UINT8_MAX _UI8_MAX
-#define UINT16_MAX _UI16_MAX
-#define UINT32_MAX _UI32_MAX
-#define UINT64_MAX _UI64_MAX
-
-// 7.18.2.2 Limits of minimum-width integer types
-#define INT_LEAST8_MIN INT8_MIN
-#define INT_LEAST8_MAX INT8_MAX
-#define INT_LEAST16_MIN INT16_MIN
-#define INT_LEAST16_MAX INT16_MAX
-#define INT_LEAST32_MIN INT32_MIN
-#define INT_LEAST32_MAX INT32_MAX
-#define INT_LEAST64_MIN INT64_MIN
-#define INT_LEAST64_MAX INT64_MAX
-#define UINT_LEAST8_MAX UINT8_MAX
-#define UINT_LEAST16_MAX UINT16_MAX
-#define UINT_LEAST32_MAX UINT32_MAX
-#define UINT_LEAST64_MAX UINT64_MAX
-
-// 7.18.2.3 Limits of fastest minimum-width integer types
-#define INT_FAST8_MIN INT8_MIN
-#define INT_FAST8_MAX INT8_MAX
-#define INT_FAST16_MIN INT16_MIN
-#define INT_FAST16_MAX INT16_MAX
-#define INT_FAST32_MIN INT32_MIN
-#define INT_FAST32_MAX INT32_MAX
-#define INT_FAST64_MIN INT64_MIN
-#define INT_FAST64_MAX INT64_MAX
-#define UINT_FAST8_MAX UINT8_MAX
-#define UINT_FAST16_MAX UINT16_MAX
-#define UINT_FAST32_MAX UINT32_MAX
-#define UINT_FAST64_MAX UINT64_MAX
-
-// 7.18.2.4 Limits of integer types capable of holding object pointers
-#ifdef _WIN64 // [
-# define INTPTR_MIN INT64_MIN
-# define INTPTR_MAX INT64_MAX
-# define UINTPTR_MAX UINT64_MAX
-#else // _WIN64 ][
-# define INTPTR_MIN INT32_MIN
-# define INTPTR_MAX INT32_MAX
-# define UINTPTR_MAX UINT32_MAX
-#endif // _WIN64 ]
-
-// 7.18.2.5 Limits of greatest-width integer types
-#define INTMAX_MIN INT64_MIN
-#define INTMAX_MAX INT64_MAX
-#define UINTMAX_MAX UINT64_MAX
-
-// 7.18.3 Limits of other integer types
-
-#ifdef _WIN64 // [
-# define PTRDIFF_MIN _I64_MIN
-# define PTRDIFF_MAX _I64_MAX
-#else // _WIN64 ][
-# define PTRDIFF_MIN _I32_MIN
-# define PTRDIFF_MAX _I32_MAX
-#endif // _WIN64 ]
-
-#define SIG_ATOMIC_MIN INT_MIN
-#define SIG_ATOMIC_MAX INT_MAX
-
-#ifndef SIZE_MAX // [
-# ifdef _WIN64 // [
-# define SIZE_MAX _UI64_MAX
-# else // _WIN64 ][
-# define SIZE_MAX _UI32_MAX
-# endif // _WIN64 ]
-#endif // SIZE_MAX ]
-
-// WCHAR_MIN and WCHAR_MAX are also defined in
-#ifndef WCHAR_MIN // [
-# define WCHAR_MIN 0
-#endif // WCHAR_MIN ]
-#ifndef WCHAR_MAX // [
-# define WCHAR_MAX _UI16_MAX
-#endif // WCHAR_MAX ]
-
-#define WINT_MIN 0
-#define WINT_MAX _UI16_MAX
-
-#endif // __STDC_LIMIT_MACROS ]
-
-
-// 7.18.4 Limits of other integer types
-
-#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260
-
-// 7.18.4.1 Macros for minimum-width integer constants
-
-#define INT8_C(val) val##i8
-#define INT16_C(val) val##i16
-#define INT32_C(val) val##i32
-#define INT64_C(val) val##i64
-
-#define UINT8_C(val) val##ui8
-#define UINT16_C(val) val##ui16
-#define UINT32_C(val) val##ui32
-#define UINT64_C(val) val##ui64
-
-// 7.18.4.2 Macros for greatest-width integer constants
-// These #ifndef's are needed to prevent collisions with .
-// Check out Issue 9 for the details.
-#ifndef INTMAX_C // [
-# define INTMAX_C INT64_C
-#endif // INTMAX_C ]
-#ifndef UINTMAX_C // [
-# define UINTMAX_C UINT64_C
-#endif // UINTMAX_C ]
-
-#endif // __STDC_CONSTANT_MACROS ]
-
-#endif // _MSC_VER >= 1600 ]
-
-#endif // _MSC_STDINT_H_ ]
diff --git a/include/murmurhash/MurmurHash2.h b/include/murmurhash/MurmurHash2.h
deleted file mode 100644
index 6d7ccf4b2..000000000
--- a/include/murmurhash/MurmurHash2.h
+++ /dev/null
@@ -1,22 +0,0 @@
-//-----------------------------------------------------------------------------
-// MurmurHash2 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-#ifndef _MURMURHASH2_H_
-#define _MURMURHASH2_H_
-
-#include
-
-//-----------------------------------------------------------------------------
-
-uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed );
-uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed );
-uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed );
-uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed );
-uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed );
-uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed );
-
-//-----------------------------------------------------------------------------
-
-#endif // _MURMURHASH2_H_
-
diff --git a/include/murmurhash/MurmurHash3.h b/include/murmurhash/MurmurHash3.h
deleted file mode 100644
index 9b4c3c90b..000000000
--- a/include/murmurhash/MurmurHash3.h
+++ /dev/null
@@ -1,28 +0,0 @@
-//-----------------------------------------------------------------------------
-// MurmurHash3 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-#ifndef _MURMURHASH3_H_
-#define _MURMURHASH3_H_
-
-#include
-
-//-----------------------------------------------------------------------------
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out );
-
-void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
-
-void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
-
-#ifdef __cplusplus
-}
-#endif
-
-//-----------------------------------------------------------------------------
-
-#endif // _MURMURHASH3_H_
diff --git a/include/numpy/__multiarray_api.h b/include/numpy/__multiarray_api.h
deleted file mode 100644
index c949d732f..000000000
--- a/include/numpy/__multiarray_api.h
+++ /dev/null
@@ -1,1686 +0,0 @@
-
-#ifdef _MULTIARRAYMODULE
-
-typedef struct {
- PyObject_HEAD
- npy_bool obval;
-} PyBoolScalarObject;
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
-extern NPY_NO_EXPORT PyTypeObject PyArrayMapIter_Type;
-extern NPY_NO_EXPORT PyTypeObject PyArrayNeighborhoodIter_Type;
-extern NPY_NO_EXPORT PyBoolScalarObject _PyArrayScalar_BoolValues[2];
-#else
-NPY_NO_EXPORT PyTypeObject PyArrayMapIter_Type;
-NPY_NO_EXPORT PyTypeObject PyArrayNeighborhoodIter_Type;
-NPY_NO_EXPORT PyBoolScalarObject _PyArrayScalar_BoolValues[2];
-#endif
-
-NPY_NO_EXPORT unsigned int PyArray_GetNDArrayCVersion \
- (void);
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyBigArray_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyBigArray_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyArray_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyArray_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyArrayDescr_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyArrayDescr_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyArrayFlags_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyArrayFlags_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyArrayIter_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyArrayIter_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyArrayMultiIter_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyArrayMultiIter_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT int NPY_NUMUSERTYPES;
-#else
- NPY_NO_EXPORT int NPY_NUMUSERTYPES;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyBoolArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyBoolArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
-extern NPY_NO_EXPORT PyBoolScalarObject _PyArrayScalar_BoolValues[2];
-#else
-NPY_NO_EXPORT PyBoolScalarObject _PyArrayScalar_BoolValues[2];
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyGenericArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyGenericArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyNumberArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyNumberArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyIntegerArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyIntegerArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PySignedIntegerArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PySignedIntegerArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyUnsignedIntegerArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyUnsignedIntegerArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyInexactArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyInexactArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyFloatingArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyFloatingArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyComplexFloatingArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyComplexFloatingArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyFlexibleArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyFlexibleArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyCharacterArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyCharacterArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyByteArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyByteArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyShortArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyShortArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyIntArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyIntArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyLongArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyLongArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyLongLongArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyLongLongArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyUByteArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyUByteArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyUShortArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyUShortArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyUIntArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyUIntArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyULongArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyULongArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyULongLongArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyULongLongArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyFloatArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyFloatArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyDoubleArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyDoubleArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyLongDoubleArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyLongDoubleArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyCFloatArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyCFloatArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyCDoubleArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyCDoubleArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyCLongDoubleArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyCLongDoubleArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyObjectArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyObjectArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyStringArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyStringArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyUnicodeArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyUnicodeArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyVoidArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyVoidArrType_Type;
-#endif
-
-NPY_NO_EXPORT int PyArray_SetNumericOps \
- (PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_GetNumericOps \
- (void);
-NPY_NO_EXPORT int PyArray_INCREF \
- (PyArrayObject *);
-NPY_NO_EXPORT int PyArray_XDECREF \
- (PyArrayObject *);
-NPY_NO_EXPORT void PyArray_SetStringFunction \
- (PyObject *, int);
-NPY_NO_EXPORT PyArray_Descr * PyArray_DescrFromType \
- (int);
-NPY_NO_EXPORT PyObject * PyArray_TypeObjectFromType \
- (int);
-NPY_NO_EXPORT char * PyArray_Zero \
- (PyArrayObject *);
-NPY_NO_EXPORT char * PyArray_One \
- (PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_CastToType \
- (PyArrayObject *, PyArray_Descr *, int);
-NPY_NO_EXPORT int PyArray_CastTo \
- (PyArrayObject *, PyArrayObject *);
-NPY_NO_EXPORT int PyArray_CastAnyTo \
- (PyArrayObject *, PyArrayObject *);
-NPY_NO_EXPORT int PyArray_CanCastSafely \
- (int, int);
-NPY_NO_EXPORT npy_bool PyArray_CanCastTo \
- (PyArray_Descr *, PyArray_Descr *);
-NPY_NO_EXPORT int PyArray_ObjectType \
- (PyObject *, int);
-NPY_NO_EXPORT PyArray_Descr * PyArray_DescrFromObject \
- (PyObject *, PyArray_Descr *);
-NPY_NO_EXPORT PyArrayObject ** PyArray_ConvertToCommonType \
- (PyObject *, int *);
-NPY_NO_EXPORT PyArray_Descr * PyArray_DescrFromScalar \
- (PyObject *);
-NPY_NO_EXPORT PyArray_Descr * PyArray_DescrFromTypeObject \
- (PyObject *);
-NPY_NO_EXPORT npy_intp PyArray_Size \
- (PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_Scalar \
- (void *, PyArray_Descr *, PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_FromScalar \
- (PyObject *, PyArray_Descr *);
-NPY_NO_EXPORT void PyArray_ScalarAsCtype \
- (PyObject *, void *);
-NPY_NO_EXPORT int PyArray_CastScalarToCtype \
- (PyObject *, void *, PyArray_Descr *);
-NPY_NO_EXPORT int PyArray_CastScalarDirect \
- (PyObject *, PyArray_Descr *, void *, int);
-NPY_NO_EXPORT PyObject * PyArray_ScalarFromObject \
- (PyObject *);
-NPY_NO_EXPORT PyArray_VectorUnaryFunc * PyArray_GetCastFunc \
- (PyArray_Descr *, int);
-NPY_NO_EXPORT PyObject * PyArray_FromDims \
- (int, int *, int);
-NPY_NO_EXPORT PyObject * PyArray_FromDimsAndDataAndDescr \
- (int, int *, PyArray_Descr *, char *);
-NPY_NO_EXPORT PyObject * PyArray_FromAny \
- (PyObject *, PyArray_Descr *, int, int, int, PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_EnsureArray \
- (PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_EnsureAnyArray \
- (PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_FromFile \
- (FILE *, PyArray_Descr *, npy_intp, char *);
-NPY_NO_EXPORT PyObject * PyArray_FromString \
- (char *, npy_intp, PyArray_Descr *, npy_intp, char *);
-NPY_NO_EXPORT PyObject * PyArray_FromBuffer \
- (PyObject *, PyArray_Descr *, npy_intp, npy_intp);
-NPY_NO_EXPORT PyObject * PyArray_FromIter \
- (PyObject *, PyArray_Descr *, npy_intp);
-NPY_NO_EXPORT PyObject * PyArray_Return \
- (PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_GetField \
- (PyArrayObject *, PyArray_Descr *, int);
-NPY_NO_EXPORT int PyArray_SetField \
- (PyArrayObject *, PyArray_Descr *, int, PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_Byteswap \
- (PyArrayObject *, npy_bool);
-NPY_NO_EXPORT PyObject * PyArray_Resize \
- (PyArrayObject *, PyArray_Dims *, int, NPY_ORDER);
-NPY_NO_EXPORT int PyArray_MoveInto \
- (PyArrayObject *, PyArrayObject *);
-NPY_NO_EXPORT int PyArray_CopyInto \
- (PyArrayObject *, PyArrayObject *);
-NPY_NO_EXPORT int PyArray_CopyAnyInto \
- (PyArrayObject *, PyArrayObject *);
-NPY_NO_EXPORT int PyArray_CopyObject \
- (PyArrayObject *, PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_NewCopy \
- (PyArrayObject *, NPY_ORDER);
-NPY_NO_EXPORT PyObject * PyArray_ToList \
- (PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_ToString \
- (PyArrayObject *, NPY_ORDER);
-NPY_NO_EXPORT int PyArray_ToFile \
- (PyArrayObject *, FILE *, char *, char *);
-NPY_NO_EXPORT int PyArray_Dump \
- (PyObject *, PyObject *, int);
-NPY_NO_EXPORT PyObject * PyArray_Dumps \
- (PyObject *, int);
-NPY_NO_EXPORT int PyArray_ValidType \
- (int);
-NPY_NO_EXPORT void PyArray_UpdateFlags \
- (PyArrayObject *, int);
-NPY_NO_EXPORT PyObject * PyArray_New \
- (PyTypeObject *, int, npy_intp *, int, npy_intp *, void *, int, int, PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_NewFromDescr \
- (PyTypeObject *, PyArray_Descr *, int, npy_intp *, npy_intp *, void *, int, PyObject *);
-NPY_NO_EXPORT PyArray_Descr * PyArray_DescrNew \
- (PyArray_Descr *);
-NPY_NO_EXPORT PyArray_Descr * PyArray_DescrNewFromType \
- (int);
-NPY_NO_EXPORT double PyArray_GetPriority \
- (PyObject *, double);
-NPY_NO_EXPORT PyObject * PyArray_IterNew \
- (PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_MultiIterNew \
- (int, ...);
-NPY_NO_EXPORT int PyArray_PyIntAsInt \
- (PyObject *);
-NPY_NO_EXPORT npy_intp PyArray_PyIntAsIntp \
- (PyObject *);
-NPY_NO_EXPORT int PyArray_Broadcast \
- (PyArrayMultiIterObject *);
-NPY_NO_EXPORT void PyArray_FillObjectArray \
- (PyArrayObject *, PyObject *);
-NPY_NO_EXPORT int PyArray_FillWithScalar \
- (PyArrayObject *, PyObject *);
-NPY_NO_EXPORT npy_bool PyArray_CheckStrides \
- (int, int, npy_intp, npy_intp, npy_intp *, npy_intp *);
-NPY_NO_EXPORT PyArray_Descr * PyArray_DescrNewByteorder \
- (PyArray_Descr *, char);
-NPY_NO_EXPORT PyObject * PyArray_IterAllButAxis \
- (PyObject *, int *);
-NPY_NO_EXPORT PyObject * PyArray_CheckFromAny \
- (PyObject *, PyArray_Descr *, int, int, int, PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_FromArray \
- (PyArrayObject *, PyArray_Descr *, int);
-NPY_NO_EXPORT PyObject * PyArray_FromInterface \
- (PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_FromStructInterface \
- (PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_FromArrayAttr \
- (PyObject *, PyArray_Descr *, PyObject *);
-NPY_NO_EXPORT NPY_SCALARKIND PyArray_ScalarKind \
- (int, PyArrayObject **);
-NPY_NO_EXPORT int PyArray_CanCoerceScalar \
- (int, int, NPY_SCALARKIND);
-NPY_NO_EXPORT PyObject * PyArray_NewFlagsObject \
- (PyObject *);
-NPY_NO_EXPORT npy_bool PyArray_CanCastScalar \
- (PyTypeObject *, PyTypeObject *);
-NPY_NO_EXPORT int PyArray_CompareUCS4 \
- (npy_ucs4 *, npy_ucs4 *, size_t);
-NPY_NO_EXPORT int PyArray_RemoveSmallest \
- (PyArrayMultiIterObject *);
-NPY_NO_EXPORT int PyArray_ElementStrides \
- (PyObject *);
-NPY_NO_EXPORT void PyArray_Item_INCREF \
- (char *, PyArray_Descr *);
-NPY_NO_EXPORT void PyArray_Item_XDECREF \
- (char *, PyArray_Descr *);
-NPY_NO_EXPORT PyObject * PyArray_FieldNames \
- (PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_Transpose \
- (PyArrayObject *, PyArray_Dims *);
-NPY_NO_EXPORT PyObject * PyArray_TakeFrom \
- (PyArrayObject *, PyObject *, int, PyArrayObject *, NPY_CLIPMODE);
-NPY_NO_EXPORT PyObject * PyArray_PutTo \
- (PyArrayObject *, PyObject*, PyObject *, NPY_CLIPMODE);
-NPY_NO_EXPORT PyObject * PyArray_PutMask \
- (PyArrayObject *, PyObject*, PyObject*);
-NPY_NO_EXPORT PyObject * PyArray_Repeat \
- (PyArrayObject *, PyObject *, int);
-NPY_NO_EXPORT PyObject * PyArray_Choose \
- (PyArrayObject *, PyObject *, PyArrayObject *, NPY_CLIPMODE);
-NPY_NO_EXPORT int PyArray_Sort \
- (PyArrayObject *, int, NPY_SORTKIND);
-NPY_NO_EXPORT PyObject * PyArray_ArgSort \
- (PyArrayObject *, int, NPY_SORTKIND);
-NPY_NO_EXPORT PyObject * PyArray_SearchSorted \
- (PyArrayObject *, PyObject *, NPY_SEARCHSIDE, PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_ArgMax \
- (PyArrayObject *, int, PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_ArgMin \
- (PyArrayObject *, int, PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_Reshape \
- (PyArrayObject *, PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_Newshape \
- (PyArrayObject *, PyArray_Dims *, NPY_ORDER);
-NPY_NO_EXPORT PyObject * PyArray_Squeeze \
- (PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_View \
- (PyArrayObject *, PyArray_Descr *, PyTypeObject *);
-NPY_NO_EXPORT PyObject * PyArray_SwapAxes \
- (PyArrayObject *, int, int);
-NPY_NO_EXPORT PyObject * PyArray_Max \
- (PyArrayObject *, int, PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_Min \
- (PyArrayObject *, int, PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_Ptp \
- (PyArrayObject *, int, PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_Mean \
- (PyArrayObject *, int, int, PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_Trace \
- (PyArrayObject *, int, int, int, int, PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_Diagonal \
- (PyArrayObject *, int, int, int);
-NPY_NO_EXPORT PyObject * PyArray_Clip \
- (PyArrayObject *, PyObject *, PyObject *, PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_Conjugate \
- (PyArrayObject *, PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_Nonzero \
- (PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_Std \
- (PyArrayObject *, int, int, PyArrayObject *, int);
-NPY_NO_EXPORT PyObject * PyArray_Sum \
- (PyArrayObject *, int, int, PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_CumSum \
- (PyArrayObject *, int, int, PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_Prod \
- (PyArrayObject *, int, int, PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_CumProd \
- (PyArrayObject *, int, int, PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_All \
- (PyArrayObject *, int, PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_Any \
- (PyArrayObject *, int, PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_Compress \
- (PyArrayObject *, PyObject *, int, PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_Flatten \
- (PyArrayObject *, NPY_ORDER);
-NPY_NO_EXPORT PyObject * PyArray_Ravel \
- (PyArrayObject *, NPY_ORDER);
-NPY_NO_EXPORT npy_intp PyArray_MultiplyList \
- (npy_intp *, int);
-NPY_NO_EXPORT int PyArray_MultiplyIntList \
- (int *, int);
-NPY_NO_EXPORT void * PyArray_GetPtr \
- (PyArrayObject *, npy_intp*);
-NPY_NO_EXPORT int PyArray_CompareLists \
- (npy_intp *, npy_intp *, int);
-NPY_NO_EXPORT int PyArray_AsCArray \
- (PyObject **, void *, npy_intp *, int, PyArray_Descr*);
-NPY_NO_EXPORT int PyArray_As1D \
- (PyObject **, char **, int *, int);
-NPY_NO_EXPORT int PyArray_As2D \
- (PyObject **, char ***, int *, int *, int);
-NPY_NO_EXPORT int PyArray_Free \
- (PyObject *, void *);
-NPY_NO_EXPORT int PyArray_Converter \
- (PyObject *, PyObject **);
-NPY_NO_EXPORT int PyArray_IntpFromSequence \
- (PyObject *, npy_intp *, int);
-NPY_NO_EXPORT PyObject * PyArray_Concatenate \
- (PyObject *, int);
-NPY_NO_EXPORT PyObject * PyArray_InnerProduct \
- (PyObject *, PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_MatrixProduct \
- (PyObject *, PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_CopyAndTranspose \
- (PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_Correlate \
- (PyObject *, PyObject *, int);
-NPY_NO_EXPORT int PyArray_TypestrConvert \
- (int, int);
-NPY_NO_EXPORT int PyArray_DescrConverter \
- (PyObject *, PyArray_Descr **);
-NPY_NO_EXPORT int PyArray_DescrConverter2 \
- (PyObject *, PyArray_Descr **);
-NPY_NO_EXPORT int PyArray_IntpConverter \
- (PyObject *, PyArray_Dims *);
-NPY_NO_EXPORT int PyArray_BufferConverter \
- (PyObject *, PyArray_Chunk *);
-NPY_NO_EXPORT int PyArray_AxisConverter \
- (PyObject *, int *);
-NPY_NO_EXPORT int PyArray_BoolConverter \
- (PyObject *, npy_bool *);
-NPY_NO_EXPORT int PyArray_ByteorderConverter \
- (PyObject *, char *);
-NPY_NO_EXPORT int PyArray_OrderConverter \
- (PyObject *, NPY_ORDER *);
-NPY_NO_EXPORT unsigned char PyArray_EquivTypes \
- (PyArray_Descr *, PyArray_Descr *);
-NPY_NO_EXPORT PyObject * PyArray_Zeros \
- (int, npy_intp *, PyArray_Descr *, int);
-NPY_NO_EXPORT PyObject * PyArray_Empty \
- (int, npy_intp *, PyArray_Descr *, int);
-NPY_NO_EXPORT PyObject * PyArray_Where \
- (PyObject *, PyObject *, PyObject *);
-NPY_NO_EXPORT PyObject * PyArray_Arange \
- (double, double, double, int);
-NPY_NO_EXPORT PyObject * PyArray_ArangeObj \
- (PyObject *, PyObject *, PyObject *, PyArray_Descr *);
-NPY_NO_EXPORT int PyArray_SortkindConverter \
- (PyObject *, NPY_SORTKIND *);
-NPY_NO_EXPORT PyObject * PyArray_LexSort \
- (PyObject *, int);
-NPY_NO_EXPORT PyObject * PyArray_Round \
- (PyArrayObject *, int, PyArrayObject *);
-NPY_NO_EXPORT unsigned char PyArray_EquivTypenums \
- (int, int);
-NPY_NO_EXPORT int PyArray_RegisterDataType \
- (PyArray_Descr *);
-NPY_NO_EXPORT int PyArray_RegisterCastFunc \
- (PyArray_Descr *, int, PyArray_VectorUnaryFunc *);
-NPY_NO_EXPORT int PyArray_RegisterCanCast \
- (PyArray_Descr *, int, NPY_SCALARKIND);
-NPY_NO_EXPORT void PyArray_InitArrFuncs \
- (PyArray_ArrFuncs *);
-NPY_NO_EXPORT PyObject * PyArray_IntTupleFromIntp \
- (int, npy_intp *);
-NPY_NO_EXPORT int PyArray_TypeNumFromName \
- (char *);
-NPY_NO_EXPORT int PyArray_ClipmodeConverter \
- (PyObject *, NPY_CLIPMODE *);
-NPY_NO_EXPORT int PyArray_OutputConverter \
- (PyObject *, PyArrayObject **);
-NPY_NO_EXPORT PyObject * PyArray_BroadcastToShape \
- (PyObject *, npy_intp *, int);
-NPY_NO_EXPORT void _PyArray_SigintHandler \
- (int);
-NPY_NO_EXPORT void* _PyArray_GetSigintBuf \
- (void);
-NPY_NO_EXPORT int PyArray_DescrAlignConverter \
- (PyObject *, PyArray_Descr **);
-NPY_NO_EXPORT int PyArray_DescrAlignConverter2 \
- (PyObject *, PyArray_Descr **);
-NPY_NO_EXPORT int PyArray_SearchsideConverter \
- (PyObject *, void *);
-NPY_NO_EXPORT PyObject * PyArray_CheckAxis \
- (PyArrayObject *, int *, int);
-NPY_NO_EXPORT npy_intp PyArray_OverflowMultiplyList \
- (npy_intp *, int);
-NPY_NO_EXPORT int PyArray_CompareString \
- (char *, char *, size_t);
-NPY_NO_EXPORT PyObject * PyArray_MultiIterFromObjects \
- (PyObject **, int, int, ...);
-NPY_NO_EXPORT int PyArray_GetEndianness \
- (void);
-NPY_NO_EXPORT unsigned int PyArray_GetNDArrayCFeatureVersion \
- (void);
-NPY_NO_EXPORT PyObject * PyArray_Correlate2 \
- (PyObject *, PyObject *, int);
-NPY_NO_EXPORT PyObject* PyArray_NeighborhoodIterNew \
- (PyArrayIterObject *, npy_intp *, int, PyArrayObject*);
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyTimeIntegerArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyTimeIntegerArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyDatetimeArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyDatetimeArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyTimedeltaArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyTimedeltaArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyHalfArrType_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyHalfArrType_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject NpyIter_Type;
-#else
- NPY_NO_EXPORT PyTypeObject NpyIter_Type;
-#endif
-
-NPY_NO_EXPORT void PyArray_SetDatetimeParseFunction \
- (PyObject *);
-NPY_NO_EXPORT void PyArray_DatetimeToDatetimeStruct \
- (npy_datetime, NPY_DATETIMEUNIT, npy_datetimestruct *);
-NPY_NO_EXPORT void PyArray_TimedeltaToTimedeltaStruct \
- (npy_timedelta, NPY_DATETIMEUNIT, npy_timedeltastruct *);
-NPY_NO_EXPORT npy_datetime PyArray_DatetimeStructToDatetime \
- (NPY_DATETIMEUNIT, npy_datetimestruct *);
-NPY_NO_EXPORT npy_datetime PyArray_TimedeltaStructToTimedelta \
- (NPY_DATETIMEUNIT, npy_timedeltastruct *);
-NPY_NO_EXPORT NpyIter * NpyIter_New \
- (PyArrayObject *, npy_uint32, NPY_ORDER, NPY_CASTING, PyArray_Descr*);
-NPY_NO_EXPORT NpyIter * NpyIter_MultiNew \
- (int, PyArrayObject **, npy_uint32, NPY_ORDER, NPY_CASTING, npy_uint32 *, PyArray_Descr **);
-NPY_NO_EXPORT NpyIter * NpyIter_AdvancedNew \
- (int, PyArrayObject **, npy_uint32, NPY_ORDER, NPY_CASTING, npy_uint32 *, PyArray_Descr **, int, int **, npy_intp *, npy_intp);
-NPY_NO_EXPORT NpyIter * NpyIter_Copy \
- (NpyIter *);
-NPY_NO_EXPORT int NpyIter_Deallocate \
- (NpyIter *);
-NPY_NO_EXPORT npy_bool NpyIter_HasDelayedBufAlloc \
- (NpyIter *);
-NPY_NO_EXPORT npy_bool NpyIter_HasExternalLoop \
- (NpyIter *);
-NPY_NO_EXPORT int NpyIter_EnableExternalLoop \
- (NpyIter *);
-NPY_NO_EXPORT npy_intp * NpyIter_GetInnerStrideArray \
- (NpyIter *);
-NPY_NO_EXPORT npy_intp * NpyIter_GetInnerLoopSizePtr \
- (NpyIter *);
-NPY_NO_EXPORT int NpyIter_Reset \
- (NpyIter *, char **);
-NPY_NO_EXPORT int NpyIter_ResetBasePointers \
- (NpyIter *, char **, char **);
-NPY_NO_EXPORT int NpyIter_ResetToIterIndexRange \
- (NpyIter *, npy_intp, npy_intp, char **);
-NPY_NO_EXPORT int NpyIter_GetNDim \
- (NpyIter *);
-NPY_NO_EXPORT int NpyIter_GetNOp \
- (NpyIter *);
-NPY_NO_EXPORT NpyIter_IterNextFunc * NpyIter_GetIterNext \
- (NpyIter *, char **);
-NPY_NO_EXPORT npy_intp NpyIter_GetIterSize \
- (NpyIter *);
-NPY_NO_EXPORT void NpyIter_GetIterIndexRange \
- (NpyIter *, npy_intp *, npy_intp *);
-NPY_NO_EXPORT npy_intp NpyIter_GetIterIndex \
- (NpyIter *);
-NPY_NO_EXPORT int NpyIter_GotoIterIndex \
- (NpyIter *, npy_intp);
-NPY_NO_EXPORT npy_bool NpyIter_HasMultiIndex \
- (NpyIter *);
-NPY_NO_EXPORT int NpyIter_GetShape \
- (NpyIter *, npy_intp *);
-NPY_NO_EXPORT NpyIter_GetMultiIndexFunc * NpyIter_GetGetMultiIndex \
- (NpyIter *, char **);
-NPY_NO_EXPORT int NpyIter_GotoMultiIndex \
- (NpyIter *, npy_intp *);
-NPY_NO_EXPORT int NpyIter_RemoveMultiIndex \
- (NpyIter *);
-NPY_NO_EXPORT npy_bool NpyIter_HasIndex \
- (NpyIter *);
-NPY_NO_EXPORT npy_bool NpyIter_IsBuffered \
- (NpyIter *);
-NPY_NO_EXPORT npy_bool NpyIter_IsGrowInner \
- (NpyIter *);
-NPY_NO_EXPORT npy_intp NpyIter_GetBufferSize \
- (NpyIter *);
-NPY_NO_EXPORT npy_intp * NpyIter_GetIndexPtr \
- (NpyIter *);
-NPY_NO_EXPORT int NpyIter_GotoIndex \
- (NpyIter *, npy_intp);
-NPY_NO_EXPORT char ** NpyIter_GetDataPtrArray \
- (NpyIter *);
-NPY_NO_EXPORT PyArray_Descr ** NpyIter_GetDescrArray \
- (NpyIter *);
-NPY_NO_EXPORT PyArrayObject ** NpyIter_GetOperandArray \
- (NpyIter *);
-NPY_NO_EXPORT PyArrayObject * NpyIter_GetIterView \
- (NpyIter *, npy_intp);
-NPY_NO_EXPORT void NpyIter_GetReadFlags \
- (NpyIter *, char *);
-NPY_NO_EXPORT void NpyIter_GetWriteFlags \
- (NpyIter *, char *);
-NPY_NO_EXPORT void NpyIter_DebugPrint \
- (NpyIter *);
-NPY_NO_EXPORT npy_bool NpyIter_IterationNeedsAPI \
- (NpyIter *);
-NPY_NO_EXPORT void NpyIter_GetInnerFixedStrideArray \
- (NpyIter *, npy_intp *);
-NPY_NO_EXPORT int NpyIter_RemoveAxis \
- (NpyIter *, int);
-NPY_NO_EXPORT npy_intp * NpyIter_GetAxisStrideArray \
- (NpyIter *, int);
-NPY_NO_EXPORT npy_bool NpyIter_RequiresBuffering \
- (NpyIter *);
-NPY_NO_EXPORT char ** NpyIter_GetInitialDataPtrArray \
- (NpyIter *);
-NPY_NO_EXPORT int NpyIter_CreateCompatibleStrides \
- (NpyIter *, npy_intp, npy_intp *);
-NPY_NO_EXPORT int PyArray_CastingConverter \
- (PyObject *, NPY_CASTING *);
-NPY_NO_EXPORT npy_intp PyArray_CountNonzero \
- (PyArrayObject *);
-NPY_NO_EXPORT PyArray_Descr * PyArray_PromoteTypes \
- (PyArray_Descr *, PyArray_Descr *);
-NPY_NO_EXPORT PyArray_Descr * PyArray_MinScalarType \
- (PyArrayObject *);
-NPY_NO_EXPORT PyArray_Descr * PyArray_ResultType \
- (npy_intp, PyArrayObject **, npy_intp, PyArray_Descr **);
-NPY_NO_EXPORT npy_bool PyArray_CanCastArrayTo \
- (PyArrayObject *, PyArray_Descr *, NPY_CASTING);
-NPY_NO_EXPORT npy_bool PyArray_CanCastTypeTo \
- (PyArray_Descr *, PyArray_Descr *, NPY_CASTING);
-NPY_NO_EXPORT PyArrayObject * PyArray_EinsteinSum \
- (char *, npy_intp, PyArrayObject **, PyArray_Descr *, NPY_ORDER, NPY_CASTING, PyArrayObject *);
-NPY_NO_EXPORT PyObject * PyArray_NewLikeArray \
- (PyArrayObject *, NPY_ORDER, PyArray_Descr *, int);
-NPY_NO_EXPORT int PyArray_GetArrayParamsFromObject \
- (PyObject *, PyArray_Descr *, npy_bool, PyArray_Descr **, int *, npy_intp *, PyArrayObject **, PyObject *);
-NPY_NO_EXPORT int PyArray_ConvertClipmodeSequence \
- (PyObject *, NPY_CLIPMODE *, int);
-NPY_NO_EXPORT PyObject * PyArray_MatrixProduct2 \
- (PyObject *, PyObject *, PyArrayObject*);
-NPY_NO_EXPORT npy_bool NpyIter_IsFirstVisit \
- (NpyIter *, int);
-NPY_NO_EXPORT int PyArray_SetBaseObject \
- (PyArrayObject *, PyObject *);
-NPY_NO_EXPORT void PyArray_CreateSortedStridePerm \
- (int, npy_intp *, npy_stride_sort_item *);
-NPY_NO_EXPORT void PyArray_RemoveAxesInPlace \
- (PyArrayObject *, npy_bool *);
-NPY_NO_EXPORT void PyArray_DebugPrint \
- (PyArrayObject *);
-NPY_NO_EXPORT int PyArray_FailUnlessWriteable \
- (PyArrayObject *, const char *);
-NPY_NO_EXPORT int PyArray_SetUpdateIfCopyBase \
- (PyArrayObject *, PyArrayObject *);
-NPY_NO_EXPORT void * PyDataMem_NEW \
- (size_t);
-NPY_NO_EXPORT void PyDataMem_FREE \
- (void *);
-NPY_NO_EXPORT void * PyDataMem_RENEW \
- (void *, size_t);
-NPY_NO_EXPORT PyDataMem_EventHookFunc * PyDataMem_SetEventHook \
- (PyDataMem_EventHookFunc *, void *, void **);
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT NPY_CASTING NPY_DEFAULT_ASSIGN_CASTING;
-#else
- NPY_NO_EXPORT NPY_CASTING NPY_DEFAULT_ASSIGN_CASTING;
-#endif
-
-
-#else
-
-#if defined(PY_ARRAY_UNIQUE_SYMBOL)
-#define PyArray_API PY_ARRAY_UNIQUE_SYMBOL
-#endif
-
-#if defined(NO_IMPORT) || defined(NO_IMPORT_ARRAY)
-extern void **PyArray_API;
-#else
-#if defined(PY_ARRAY_UNIQUE_SYMBOL)
-void **PyArray_API;
-#else
-static void **PyArray_API=NULL;
-#endif
-#endif
-
-#define PyArray_GetNDArrayCVersion \
- (*(unsigned int (*)(void)) \
- PyArray_API[0])
-#define PyBigArray_Type (*(PyTypeObject *)PyArray_API[1])
-#define PyArray_Type (*(PyTypeObject *)PyArray_API[2])
-#define PyArrayDescr_Type (*(PyTypeObject *)PyArray_API[3])
-#define PyArrayFlags_Type (*(PyTypeObject *)PyArray_API[4])
-#define PyArrayIter_Type (*(PyTypeObject *)PyArray_API[5])
-#define PyArrayMultiIter_Type (*(PyTypeObject *)PyArray_API[6])
-#define NPY_NUMUSERTYPES (*(int *)PyArray_API[7])
-#define PyBoolArrType_Type (*(PyTypeObject *)PyArray_API[8])
-#define _PyArrayScalar_BoolValues ((PyBoolScalarObject *)PyArray_API[9])
-#define PyGenericArrType_Type (*(PyTypeObject *)PyArray_API[10])
-#define PyNumberArrType_Type (*(PyTypeObject *)PyArray_API[11])
-#define PyIntegerArrType_Type (*(PyTypeObject *)PyArray_API[12])
-#define PySignedIntegerArrType_Type (*(PyTypeObject *)PyArray_API[13])
-#define PyUnsignedIntegerArrType_Type (*(PyTypeObject *)PyArray_API[14])
-#define PyInexactArrType_Type (*(PyTypeObject *)PyArray_API[15])
-#define PyFloatingArrType_Type (*(PyTypeObject *)PyArray_API[16])
-#define PyComplexFloatingArrType_Type (*(PyTypeObject *)PyArray_API[17])
-#define PyFlexibleArrType_Type (*(PyTypeObject *)PyArray_API[18])
-#define PyCharacterArrType_Type (*(PyTypeObject *)PyArray_API[19])
-#define PyByteArrType_Type (*(PyTypeObject *)PyArray_API[20])
-#define PyShortArrType_Type (*(PyTypeObject *)PyArray_API[21])
-#define PyIntArrType_Type (*(PyTypeObject *)PyArray_API[22])
-#define PyLongArrType_Type (*(PyTypeObject *)PyArray_API[23])
-#define PyLongLongArrType_Type (*(PyTypeObject *)PyArray_API[24])
-#define PyUByteArrType_Type (*(PyTypeObject *)PyArray_API[25])
-#define PyUShortArrType_Type (*(PyTypeObject *)PyArray_API[26])
-#define PyUIntArrType_Type (*(PyTypeObject *)PyArray_API[27])
-#define PyULongArrType_Type (*(PyTypeObject *)PyArray_API[28])
-#define PyULongLongArrType_Type (*(PyTypeObject *)PyArray_API[29])
-#define PyFloatArrType_Type (*(PyTypeObject *)PyArray_API[30])
-#define PyDoubleArrType_Type (*(PyTypeObject *)PyArray_API[31])
-#define PyLongDoubleArrType_Type (*(PyTypeObject *)PyArray_API[32])
-#define PyCFloatArrType_Type (*(PyTypeObject *)PyArray_API[33])
-#define PyCDoubleArrType_Type (*(PyTypeObject *)PyArray_API[34])
-#define PyCLongDoubleArrType_Type (*(PyTypeObject *)PyArray_API[35])
-#define PyObjectArrType_Type (*(PyTypeObject *)PyArray_API[36])
-#define PyStringArrType_Type (*(PyTypeObject *)PyArray_API[37])
-#define PyUnicodeArrType_Type (*(PyTypeObject *)PyArray_API[38])
-#define PyVoidArrType_Type (*(PyTypeObject *)PyArray_API[39])
-#define PyArray_SetNumericOps \
- (*(int (*)(PyObject *)) \
- PyArray_API[40])
-#define PyArray_GetNumericOps \
- (*(PyObject * (*)(void)) \
- PyArray_API[41])
-#define PyArray_INCREF \
- (*(int (*)(PyArrayObject *)) \
- PyArray_API[42])
-#define PyArray_XDECREF \
- (*(int (*)(PyArrayObject *)) \
- PyArray_API[43])
-#define PyArray_SetStringFunction \
- (*(void (*)(PyObject *, int)) \
- PyArray_API[44])
-#define PyArray_DescrFromType \
- (*(PyArray_Descr * (*)(int)) \
- PyArray_API[45])
-#define PyArray_TypeObjectFromType \
- (*(PyObject * (*)(int)) \
- PyArray_API[46])
-#define PyArray_Zero \
- (*(char * (*)(PyArrayObject *)) \
- PyArray_API[47])
-#define PyArray_One \
- (*(char * (*)(PyArrayObject *)) \
- PyArray_API[48])
-#define PyArray_CastToType \
- (*(PyObject * (*)(PyArrayObject *, PyArray_Descr *, int)) \
- PyArray_API[49])
-#define PyArray_CastTo \
- (*(int (*)(PyArrayObject *, PyArrayObject *)) \
- PyArray_API[50])
-#define PyArray_CastAnyTo \
- (*(int (*)(PyArrayObject *, PyArrayObject *)) \
- PyArray_API[51])
-#define PyArray_CanCastSafely \
- (*(int (*)(int, int)) \
- PyArray_API[52])
-#define PyArray_CanCastTo \
- (*(npy_bool (*)(PyArray_Descr *, PyArray_Descr *)) \
- PyArray_API[53])
-#define PyArray_ObjectType \
- (*(int (*)(PyObject *, int)) \
- PyArray_API[54])
-#define PyArray_DescrFromObject \
- (*(PyArray_Descr * (*)(PyObject *, PyArray_Descr *)) \
- PyArray_API[55])
-#define PyArray_ConvertToCommonType \
- (*(PyArrayObject ** (*)(PyObject *, int *)) \
- PyArray_API[56])
-#define PyArray_DescrFromScalar \
- (*(PyArray_Descr * (*)(PyObject *)) \
- PyArray_API[57])
-#define PyArray_DescrFromTypeObject \
- (*(PyArray_Descr * (*)(PyObject *)) \
- PyArray_API[58])
-#define PyArray_Size \
- (*(npy_intp (*)(PyObject *)) \
- PyArray_API[59])
-#define PyArray_Scalar \
- (*(PyObject * (*)(void *, PyArray_Descr *, PyObject *)) \
- PyArray_API[60])
-#define PyArray_FromScalar \
- (*(PyObject * (*)(PyObject *, PyArray_Descr *)) \
- PyArray_API[61])
-#define PyArray_ScalarAsCtype \
- (*(void (*)(PyObject *, void *)) \
- PyArray_API[62])
-#define PyArray_CastScalarToCtype \
- (*(int (*)(PyObject *, void *, PyArray_Descr *)) \
- PyArray_API[63])
-#define PyArray_CastScalarDirect \
- (*(int (*)(PyObject *, PyArray_Descr *, void *, int)) \
- PyArray_API[64])
-#define PyArray_ScalarFromObject \
- (*(PyObject * (*)(PyObject *)) \
- PyArray_API[65])
-#define PyArray_GetCastFunc \
- (*(PyArray_VectorUnaryFunc * (*)(PyArray_Descr *, int)) \
- PyArray_API[66])
-#define PyArray_FromDims \
- (*(PyObject * (*)(int, int *, int)) \
- PyArray_API[67])
-#define PyArray_FromDimsAndDataAndDescr \
- (*(PyObject * (*)(int, int *, PyArray_Descr *, char *)) \
- PyArray_API[68])
-#define PyArray_FromAny \
- (*(PyObject * (*)(PyObject *, PyArray_Descr *, int, int, int, PyObject *)) \
- PyArray_API[69])
-#define PyArray_EnsureArray \
- (*(PyObject * (*)(PyObject *)) \
- PyArray_API[70])
-#define PyArray_EnsureAnyArray \
- (*(PyObject * (*)(PyObject *)) \
- PyArray_API[71])
-#define PyArray_FromFile \
- (*(PyObject * (*)(FILE *, PyArray_Descr *, npy_intp, char *)) \
- PyArray_API[72])
-#define PyArray_FromString \
- (*(PyObject * (*)(char *, npy_intp, PyArray_Descr *, npy_intp, char *)) \
- PyArray_API[73])
-#define PyArray_FromBuffer \
- (*(PyObject * (*)(PyObject *, PyArray_Descr *, npy_intp, npy_intp)) \
- PyArray_API[74])
-#define PyArray_FromIter \
- (*(PyObject * (*)(PyObject *, PyArray_Descr *, npy_intp)) \
- PyArray_API[75])
-#define PyArray_Return \
- (*(PyObject * (*)(PyArrayObject *)) \
- PyArray_API[76])
-#define PyArray_GetField \
- (*(PyObject * (*)(PyArrayObject *, PyArray_Descr *, int)) \
- PyArray_API[77])
-#define PyArray_SetField \
- (*(int (*)(PyArrayObject *, PyArray_Descr *, int, PyObject *)) \
- PyArray_API[78])
-#define PyArray_Byteswap \
- (*(PyObject * (*)(PyArrayObject *, npy_bool)) \
- PyArray_API[79])
-#define PyArray_Resize \
- (*(PyObject * (*)(PyArrayObject *, PyArray_Dims *, int, NPY_ORDER)) \
- PyArray_API[80])
-#define PyArray_MoveInto \
- (*(int (*)(PyArrayObject *, PyArrayObject *)) \
- PyArray_API[81])
-#define PyArray_CopyInto \
- (*(int (*)(PyArrayObject *, PyArrayObject *)) \
- PyArray_API[82])
-#define PyArray_CopyAnyInto \
- (*(int (*)(PyArrayObject *, PyArrayObject *)) \
- PyArray_API[83])
-#define PyArray_CopyObject \
- (*(int (*)(PyArrayObject *, PyObject *)) \
- PyArray_API[84])
-#define PyArray_NewCopy \
- (*(PyObject * (*)(PyArrayObject *, NPY_ORDER)) \
- PyArray_API[85])
-#define PyArray_ToList \
- (*(PyObject * (*)(PyArrayObject *)) \
- PyArray_API[86])
-#define PyArray_ToString \
- (*(PyObject * (*)(PyArrayObject *, NPY_ORDER)) \
- PyArray_API[87])
-#define PyArray_ToFile \
- (*(int (*)(PyArrayObject *, FILE *, char *, char *)) \
- PyArray_API[88])
-#define PyArray_Dump \
- (*(int (*)(PyObject *, PyObject *, int)) \
- PyArray_API[89])
-#define PyArray_Dumps \
- (*(PyObject * (*)(PyObject *, int)) \
- PyArray_API[90])
-#define PyArray_ValidType \
- (*(int (*)(int)) \
- PyArray_API[91])
-#define PyArray_UpdateFlags \
- (*(void (*)(PyArrayObject *, int)) \
- PyArray_API[92])
-#define PyArray_New \
- (*(PyObject * (*)(PyTypeObject *, int, npy_intp *, int, npy_intp *, void *, int, int, PyObject *)) \
- PyArray_API[93])
-#define PyArray_NewFromDescr \
- (*(PyObject * (*)(PyTypeObject *, PyArray_Descr *, int, npy_intp *, npy_intp *, void *, int, PyObject *)) \
- PyArray_API[94])
-#define PyArray_DescrNew \
- (*(PyArray_Descr * (*)(PyArray_Descr *)) \
- PyArray_API[95])
-#define PyArray_DescrNewFromType \
- (*(PyArray_Descr * (*)(int)) \
- PyArray_API[96])
-#define PyArray_GetPriority \
- (*(double (*)(PyObject *, double)) \
- PyArray_API[97])
-#define PyArray_IterNew \
- (*(PyObject * (*)(PyObject *)) \
- PyArray_API[98])
-#define PyArray_MultiIterNew \
- (*(PyObject * (*)(int, ...)) \
- PyArray_API[99])
-#define PyArray_PyIntAsInt \
- (*(int (*)(PyObject *)) \
- PyArray_API[100])
-#define PyArray_PyIntAsIntp \
- (*(npy_intp (*)(PyObject *)) \
- PyArray_API[101])
-#define PyArray_Broadcast \
- (*(int (*)(PyArrayMultiIterObject *)) \
- PyArray_API[102])
-#define PyArray_FillObjectArray \
- (*(void (*)(PyArrayObject *, PyObject *)) \
- PyArray_API[103])
-#define PyArray_FillWithScalar \
- (*(int (*)(PyArrayObject *, PyObject *)) \
- PyArray_API[104])
-#define PyArray_CheckStrides \
- (*(npy_bool (*)(int, int, npy_intp, npy_intp, npy_intp *, npy_intp *)) \
- PyArray_API[105])
-#define PyArray_DescrNewByteorder \
- (*(PyArray_Descr * (*)(PyArray_Descr *, char)) \
- PyArray_API[106])
-#define PyArray_IterAllButAxis \
- (*(PyObject * (*)(PyObject *, int *)) \
- PyArray_API[107])
-#define PyArray_CheckFromAny \
- (*(PyObject * (*)(PyObject *, PyArray_Descr *, int, int, int, PyObject *)) \
- PyArray_API[108])
-#define PyArray_FromArray \
- (*(PyObject * (*)(PyArrayObject *, PyArray_Descr *, int)) \
- PyArray_API[109])
-#define PyArray_FromInterface \
- (*(PyObject * (*)(PyObject *)) \
- PyArray_API[110])
-#define PyArray_FromStructInterface \
- (*(PyObject * (*)(PyObject *)) \
- PyArray_API[111])
-#define PyArray_FromArrayAttr \
- (*(PyObject * (*)(PyObject *, PyArray_Descr *, PyObject *)) \
- PyArray_API[112])
-#define PyArray_ScalarKind \
- (*(NPY_SCALARKIND (*)(int, PyArrayObject **)) \
- PyArray_API[113])
-#define PyArray_CanCoerceScalar \
- (*(int (*)(int, int, NPY_SCALARKIND)) \
- PyArray_API[114])
-#define PyArray_NewFlagsObject \
- (*(PyObject * (*)(PyObject *)) \
- PyArray_API[115])
-#define PyArray_CanCastScalar \
- (*(npy_bool (*)(PyTypeObject *, PyTypeObject *)) \
- PyArray_API[116])
-#define PyArray_CompareUCS4 \
- (*(int (*)(npy_ucs4 *, npy_ucs4 *, size_t)) \
- PyArray_API[117])
-#define PyArray_RemoveSmallest \
- (*(int (*)(PyArrayMultiIterObject *)) \
- PyArray_API[118])
-#define PyArray_ElementStrides \
- (*(int (*)(PyObject *)) \
- PyArray_API[119])
-#define PyArray_Item_INCREF \
- (*(void (*)(char *, PyArray_Descr *)) \
- PyArray_API[120])
-#define PyArray_Item_XDECREF \
- (*(void (*)(char *, PyArray_Descr *)) \
- PyArray_API[121])
-#define PyArray_FieldNames \
- (*(PyObject * (*)(PyObject *)) \
- PyArray_API[122])
-#define PyArray_Transpose \
- (*(PyObject * (*)(PyArrayObject *, PyArray_Dims *)) \
- PyArray_API[123])
-#define PyArray_TakeFrom \
- (*(PyObject * (*)(PyArrayObject *, PyObject *, int, PyArrayObject *, NPY_CLIPMODE)) \
- PyArray_API[124])
-#define PyArray_PutTo \
- (*(PyObject * (*)(PyArrayObject *, PyObject*, PyObject *, NPY_CLIPMODE)) \
- PyArray_API[125])
-#define PyArray_PutMask \
- (*(PyObject * (*)(PyArrayObject *, PyObject*, PyObject*)) \
- PyArray_API[126])
-#define PyArray_Repeat \
- (*(PyObject * (*)(PyArrayObject *, PyObject *, int)) \
- PyArray_API[127])
-#define PyArray_Choose \
- (*(PyObject * (*)(PyArrayObject *, PyObject *, PyArrayObject *, NPY_CLIPMODE)) \
- PyArray_API[128])
-#define PyArray_Sort \
- (*(int (*)(PyArrayObject *, int, NPY_SORTKIND)) \
- PyArray_API[129])
-#define PyArray_ArgSort \
- (*(PyObject * (*)(PyArrayObject *, int, NPY_SORTKIND)) \
- PyArray_API[130])
-#define PyArray_SearchSorted \
- (*(PyObject * (*)(PyArrayObject *, PyObject *, NPY_SEARCHSIDE, PyObject *)) \
- PyArray_API[131])
-#define PyArray_ArgMax \
- (*(PyObject * (*)(PyArrayObject *, int, PyArrayObject *)) \
- PyArray_API[132])
-#define PyArray_ArgMin \
- (*(PyObject * (*)(PyArrayObject *, int, PyArrayObject *)) \
- PyArray_API[133])
-#define PyArray_Reshape \
- (*(PyObject * (*)(PyArrayObject *, PyObject *)) \
- PyArray_API[134])
-#define PyArray_Newshape \
- (*(PyObject * (*)(PyArrayObject *, PyArray_Dims *, NPY_ORDER)) \
- PyArray_API[135])
-#define PyArray_Squeeze \
- (*(PyObject * (*)(PyArrayObject *)) \
- PyArray_API[136])
-#define PyArray_View \
- (*(PyObject * (*)(PyArrayObject *, PyArray_Descr *, PyTypeObject *)) \
- PyArray_API[137])
-#define PyArray_SwapAxes \
- (*(PyObject * (*)(PyArrayObject *, int, int)) \
- PyArray_API[138])
-#define PyArray_Max \
- (*(PyObject * (*)(PyArrayObject *, int, PyArrayObject *)) \
- PyArray_API[139])
-#define PyArray_Min \
- (*(PyObject * (*)(PyArrayObject *, int, PyArrayObject *)) \
- PyArray_API[140])
-#define PyArray_Ptp \
- (*(PyObject * (*)(PyArrayObject *, int, PyArrayObject *)) \
- PyArray_API[141])
-#define PyArray_Mean \
- (*(PyObject * (*)(PyArrayObject *, int, int, PyArrayObject *)) \
- PyArray_API[142])
-#define PyArray_Trace \
- (*(PyObject * (*)(PyArrayObject *, int, int, int, int, PyArrayObject *)) \
- PyArray_API[143])
-#define PyArray_Diagonal \
- (*(PyObject * (*)(PyArrayObject *, int, int, int)) \
- PyArray_API[144])
-#define PyArray_Clip \
- (*(PyObject * (*)(PyArrayObject *, PyObject *, PyObject *, PyArrayObject *)) \
- PyArray_API[145])
-#define PyArray_Conjugate \
- (*(PyObject * (*)(PyArrayObject *, PyArrayObject *)) \
- PyArray_API[146])
-#define PyArray_Nonzero \
- (*(PyObject * (*)(PyArrayObject *)) \
- PyArray_API[147])
-#define PyArray_Std \
- (*(PyObject * (*)(PyArrayObject *, int, int, PyArrayObject *, int)) \
- PyArray_API[148])
-#define PyArray_Sum \
- (*(PyObject * (*)(PyArrayObject *, int, int, PyArrayObject *)) \
- PyArray_API[149])
-#define PyArray_CumSum \
- (*(PyObject * (*)(PyArrayObject *, int, int, PyArrayObject *)) \
- PyArray_API[150])
-#define PyArray_Prod \
- (*(PyObject * (*)(PyArrayObject *, int, int, PyArrayObject *)) \
- PyArray_API[151])
-#define PyArray_CumProd \
- (*(PyObject * (*)(PyArrayObject *, int, int, PyArrayObject *)) \
- PyArray_API[152])
-#define PyArray_All \
- (*(PyObject * (*)(PyArrayObject *, int, PyArrayObject *)) \
- PyArray_API[153])
-#define PyArray_Any \
- (*(PyObject * (*)(PyArrayObject *, int, PyArrayObject *)) \
- PyArray_API[154])
-#define PyArray_Compress \
- (*(PyObject * (*)(PyArrayObject *, PyObject *, int, PyArrayObject *)) \
- PyArray_API[155])
-#define PyArray_Flatten \
- (*(PyObject * (*)(PyArrayObject *, NPY_ORDER)) \
- PyArray_API[156])
-#define PyArray_Ravel \
- (*(PyObject * (*)(PyArrayObject *, NPY_ORDER)) \
- PyArray_API[157])
-#define PyArray_MultiplyList \
- (*(npy_intp (*)(npy_intp *, int)) \
- PyArray_API[158])
-#define PyArray_MultiplyIntList \
- (*(int (*)(int *, int)) \
- PyArray_API[159])
-#define PyArray_GetPtr \
- (*(void * (*)(PyArrayObject *, npy_intp*)) \
- PyArray_API[160])
-#define PyArray_CompareLists \
- (*(int (*)(npy_intp *, npy_intp *, int)) \
- PyArray_API[161])
-#define PyArray_AsCArray \
- (*(int (*)(PyObject **, void *, npy_intp *, int, PyArray_Descr*)) \
- PyArray_API[162])
-#define PyArray_As1D \
- (*(int (*)(PyObject **, char **, int *, int)) \
- PyArray_API[163])
-#define PyArray_As2D \
- (*(int (*)(PyObject **, char ***, int *, int *, int)) \
- PyArray_API[164])
-#define PyArray_Free \
- (*(int (*)(PyObject *, void *)) \
- PyArray_API[165])
-#define PyArray_Converter \
- (*(int (*)(PyObject *, PyObject **)) \
- PyArray_API[166])
-#define PyArray_IntpFromSequence \
- (*(int (*)(PyObject *, npy_intp *, int)) \
- PyArray_API[167])
-#define PyArray_Concatenate \
- (*(PyObject * (*)(PyObject *, int)) \
- PyArray_API[168])
-#define PyArray_InnerProduct \
- (*(PyObject * (*)(PyObject *, PyObject *)) \
- PyArray_API[169])
-#define PyArray_MatrixProduct \
- (*(PyObject * (*)(PyObject *, PyObject *)) \
- PyArray_API[170])
-#define PyArray_CopyAndTranspose \
- (*(PyObject * (*)(PyObject *)) \
- PyArray_API[171])
-#define PyArray_Correlate \
- (*(PyObject * (*)(PyObject *, PyObject *, int)) \
- PyArray_API[172])
-#define PyArray_TypestrConvert \
- (*(int (*)(int, int)) \
- PyArray_API[173])
-#define PyArray_DescrConverter \
- (*(int (*)(PyObject *, PyArray_Descr **)) \
- PyArray_API[174])
-#define PyArray_DescrConverter2 \
- (*(int (*)(PyObject *, PyArray_Descr **)) \
- PyArray_API[175])
-#define PyArray_IntpConverter \
- (*(int (*)(PyObject *, PyArray_Dims *)) \
- PyArray_API[176])
-#define PyArray_BufferConverter \
- (*(int (*)(PyObject *, PyArray_Chunk *)) \
- PyArray_API[177])
-#define PyArray_AxisConverter \
- (*(int (*)(PyObject *, int *)) \
- PyArray_API[178])
-#define PyArray_BoolConverter \
- (*(int (*)(PyObject *, npy_bool *)) \
- PyArray_API[179])
-#define PyArray_ByteorderConverter \
- (*(int (*)(PyObject *, char *)) \
- PyArray_API[180])
-#define PyArray_OrderConverter \
- (*(int (*)(PyObject *, NPY_ORDER *)) \
- PyArray_API[181])
-#define PyArray_EquivTypes \
- (*(unsigned char (*)(PyArray_Descr *, PyArray_Descr *)) \
- PyArray_API[182])
-#define PyArray_Zeros \
- (*(PyObject * (*)(int, npy_intp *, PyArray_Descr *, int)) \
- PyArray_API[183])
-#define PyArray_Empty \
- (*(PyObject * (*)(int, npy_intp *, PyArray_Descr *, int)) \
- PyArray_API[184])
-#define PyArray_Where \
- (*(PyObject * (*)(PyObject *, PyObject *, PyObject *)) \
- PyArray_API[185])
-#define PyArray_Arange \
- (*(PyObject * (*)(double, double, double, int)) \
- PyArray_API[186])
-#define PyArray_ArangeObj \
- (*(PyObject * (*)(PyObject *, PyObject *, PyObject *, PyArray_Descr *)) \
- PyArray_API[187])
-#define PyArray_SortkindConverter \
- (*(int (*)(PyObject *, NPY_SORTKIND *)) \
- PyArray_API[188])
-#define PyArray_LexSort \
- (*(PyObject * (*)(PyObject *, int)) \
- PyArray_API[189])
-#define PyArray_Round \
- (*(PyObject * (*)(PyArrayObject *, int, PyArrayObject *)) \
- PyArray_API[190])
-#define PyArray_EquivTypenums \
- (*(unsigned char (*)(int, int)) \
- PyArray_API[191])
-#define PyArray_RegisterDataType \
- (*(int (*)(PyArray_Descr *)) \
- PyArray_API[192])
-#define PyArray_RegisterCastFunc \
- (*(int (*)(PyArray_Descr *, int, PyArray_VectorUnaryFunc *)) \
- PyArray_API[193])
-#define PyArray_RegisterCanCast \
- (*(int (*)(PyArray_Descr *, int, NPY_SCALARKIND)) \
- PyArray_API[194])
-#define PyArray_InitArrFuncs \
- (*(void (*)(PyArray_ArrFuncs *)) \
- PyArray_API[195])
-#define PyArray_IntTupleFromIntp \
- (*(PyObject * (*)(int, npy_intp *)) \
- PyArray_API[196])
-#define PyArray_TypeNumFromName \
- (*(int (*)(char *)) \
- PyArray_API[197])
-#define PyArray_ClipmodeConverter \
- (*(int (*)(PyObject *, NPY_CLIPMODE *)) \
- PyArray_API[198])
-#define PyArray_OutputConverter \
- (*(int (*)(PyObject *, PyArrayObject **)) \
- PyArray_API[199])
-#define PyArray_BroadcastToShape \
- (*(PyObject * (*)(PyObject *, npy_intp *, int)) \
- PyArray_API[200])
-#define _PyArray_SigintHandler \
- (*(void (*)(int)) \
- PyArray_API[201])
-#define _PyArray_GetSigintBuf \
- (*(void* (*)(void)) \
- PyArray_API[202])
-#define PyArray_DescrAlignConverter \
- (*(int (*)(PyObject *, PyArray_Descr **)) \
- PyArray_API[203])
-#define PyArray_DescrAlignConverter2 \
- (*(int (*)(PyObject *, PyArray_Descr **)) \
- PyArray_API[204])
-#define PyArray_SearchsideConverter \
- (*(int (*)(PyObject *, void *)) \
- PyArray_API[205])
-#define PyArray_CheckAxis \
- (*(PyObject * (*)(PyArrayObject *, int *, int)) \
- PyArray_API[206])
-#define PyArray_OverflowMultiplyList \
- (*(npy_intp (*)(npy_intp *, int)) \
- PyArray_API[207])
-#define PyArray_CompareString \
- (*(int (*)(char *, char *, size_t)) \
- PyArray_API[208])
-#define PyArray_MultiIterFromObjects \
- (*(PyObject * (*)(PyObject **, int, int, ...)) \
- PyArray_API[209])
-#define PyArray_GetEndianness \
- (*(int (*)(void)) \
- PyArray_API[210])
-#define PyArray_GetNDArrayCFeatureVersion \
- (*(unsigned int (*)(void)) \
- PyArray_API[211])
-#define PyArray_Correlate2 \
- (*(PyObject * (*)(PyObject *, PyObject *, int)) \
- PyArray_API[212])
-#define PyArray_NeighborhoodIterNew \
- (*(PyObject* (*)(PyArrayIterObject *, npy_intp *, int, PyArrayObject*)) \
- PyArray_API[213])
-#define PyTimeIntegerArrType_Type (*(PyTypeObject *)PyArray_API[214])
-#define PyDatetimeArrType_Type (*(PyTypeObject *)PyArray_API[215])
-#define PyTimedeltaArrType_Type (*(PyTypeObject *)PyArray_API[216])
-#define PyHalfArrType_Type (*(PyTypeObject *)PyArray_API[217])
-#define NpyIter_Type (*(PyTypeObject *)PyArray_API[218])
-#define PyArray_SetDatetimeParseFunction \
- (*(void (*)(PyObject *)) \
- PyArray_API[219])
-#define PyArray_DatetimeToDatetimeStruct \
- (*(void (*)(npy_datetime, NPY_DATETIMEUNIT, npy_datetimestruct *)) \
- PyArray_API[220])
-#define PyArray_TimedeltaToTimedeltaStruct \
- (*(void (*)(npy_timedelta, NPY_DATETIMEUNIT, npy_timedeltastruct *)) \
- PyArray_API[221])
-#define PyArray_DatetimeStructToDatetime \
- (*(npy_datetime (*)(NPY_DATETIMEUNIT, npy_datetimestruct *)) \
- PyArray_API[222])
-#define PyArray_TimedeltaStructToTimedelta \
- (*(npy_datetime (*)(NPY_DATETIMEUNIT, npy_timedeltastruct *)) \
- PyArray_API[223])
-#define NpyIter_New \
- (*(NpyIter * (*)(PyArrayObject *, npy_uint32, NPY_ORDER, NPY_CASTING, PyArray_Descr*)) \
- PyArray_API[224])
-#define NpyIter_MultiNew \
- (*(NpyIter * (*)(int, PyArrayObject **, npy_uint32, NPY_ORDER, NPY_CASTING, npy_uint32 *, PyArray_Descr **)) \
- PyArray_API[225])
-#define NpyIter_AdvancedNew \
- (*(NpyIter * (*)(int, PyArrayObject **, npy_uint32, NPY_ORDER, NPY_CASTING, npy_uint32 *, PyArray_Descr **, int, int **, npy_intp *, npy_intp)) \
- PyArray_API[226])
-#define NpyIter_Copy \
- (*(NpyIter * (*)(NpyIter *)) \
- PyArray_API[227])
-#define NpyIter_Deallocate \
- (*(int (*)(NpyIter *)) \
- PyArray_API[228])
-#define NpyIter_HasDelayedBufAlloc \
- (*(npy_bool (*)(NpyIter *)) \
- PyArray_API[229])
-#define NpyIter_HasExternalLoop \
- (*(npy_bool (*)(NpyIter *)) \
- PyArray_API[230])
-#define NpyIter_EnableExternalLoop \
- (*(int (*)(NpyIter *)) \
- PyArray_API[231])
-#define NpyIter_GetInnerStrideArray \
- (*(npy_intp * (*)(NpyIter *)) \
- PyArray_API[232])
-#define NpyIter_GetInnerLoopSizePtr \
- (*(npy_intp * (*)(NpyIter *)) \
- PyArray_API[233])
-#define NpyIter_Reset \
- (*(int (*)(NpyIter *, char **)) \
- PyArray_API[234])
-#define NpyIter_ResetBasePointers \
- (*(int (*)(NpyIter *, char **, char **)) \
- PyArray_API[235])
-#define NpyIter_ResetToIterIndexRange \
- (*(int (*)(NpyIter *, npy_intp, npy_intp, char **)) \
- PyArray_API[236])
-#define NpyIter_GetNDim \
- (*(int (*)(NpyIter *)) \
- PyArray_API[237])
-#define NpyIter_GetNOp \
- (*(int (*)(NpyIter *)) \
- PyArray_API[238])
-#define NpyIter_GetIterNext \
- (*(NpyIter_IterNextFunc * (*)(NpyIter *, char **)) \
- PyArray_API[239])
-#define NpyIter_GetIterSize \
- (*(npy_intp (*)(NpyIter *)) \
- PyArray_API[240])
-#define NpyIter_GetIterIndexRange \
- (*(void (*)(NpyIter *, npy_intp *, npy_intp *)) \
- PyArray_API[241])
-#define NpyIter_GetIterIndex \
- (*(npy_intp (*)(NpyIter *)) \
- PyArray_API[242])
-#define NpyIter_GotoIterIndex \
- (*(int (*)(NpyIter *, npy_intp)) \
- PyArray_API[243])
-#define NpyIter_HasMultiIndex \
- (*(npy_bool (*)(NpyIter *)) \
- PyArray_API[244])
-#define NpyIter_GetShape \
- (*(int (*)(NpyIter *, npy_intp *)) \
- PyArray_API[245])
-#define NpyIter_GetGetMultiIndex \
- (*(NpyIter_GetMultiIndexFunc * (*)(NpyIter *, char **)) \
- PyArray_API[246])
-#define NpyIter_GotoMultiIndex \
- (*(int (*)(NpyIter *, npy_intp *)) \
- PyArray_API[247])
-#define NpyIter_RemoveMultiIndex \
- (*(int (*)(NpyIter *)) \
- PyArray_API[248])
-#define NpyIter_HasIndex \
- (*(npy_bool (*)(NpyIter *)) \
- PyArray_API[249])
-#define NpyIter_IsBuffered \
- (*(npy_bool (*)(NpyIter *)) \
- PyArray_API[250])
-#define NpyIter_IsGrowInner \
- (*(npy_bool (*)(NpyIter *)) \
- PyArray_API[251])
-#define NpyIter_GetBufferSize \
- (*(npy_intp (*)(NpyIter *)) \
- PyArray_API[252])
-#define NpyIter_GetIndexPtr \
- (*(npy_intp * (*)(NpyIter *)) \
- PyArray_API[253])
-#define NpyIter_GotoIndex \
- (*(int (*)(NpyIter *, npy_intp)) \
- PyArray_API[254])
-#define NpyIter_GetDataPtrArray \
- (*(char ** (*)(NpyIter *)) \
- PyArray_API[255])
-#define NpyIter_GetDescrArray \
- (*(PyArray_Descr ** (*)(NpyIter *)) \
- PyArray_API[256])
-#define NpyIter_GetOperandArray \
- (*(PyArrayObject ** (*)(NpyIter *)) \
- PyArray_API[257])
-#define NpyIter_GetIterView \
- (*(PyArrayObject * (*)(NpyIter *, npy_intp)) \
- PyArray_API[258])
-#define NpyIter_GetReadFlags \
- (*(void (*)(NpyIter *, char *)) \
- PyArray_API[259])
-#define NpyIter_GetWriteFlags \
- (*(void (*)(NpyIter *, char *)) \
- PyArray_API[260])
-#define NpyIter_DebugPrint \
- (*(void (*)(NpyIter *)) \
- PyArray_API[261])
-#define NpyIter_IterationNeedsAPI \
- (*(npy_bool (*)(NpyIter *)) \
- PyArray_API[262])
-#define NpyIter_GetInnerFixedStrideArray \
- (*(void (*)(NpyIter *, npy_intp *)) \
- PyArray_API[263])
-#define NpyIter_RemoveAxis \
- (*(int (*)(NpyIter *, int)) \
- PyArray_API[264])
-#define NpyIter_GetAxisStrideArray \
- (*(npy_intp * (*)(NpyIter *, int)) \
- PyArray_API[265])
-#define NpyIter_RequiresBuffering \
- (*(npy_bool (*)(NpyIter *)) \
- PyArray_API[266])
-#define NpyIter_GetInitialDataPtrArray \
- (*(char ** (*)(NpyIter *)) \
- PyArray_API[267])
-#define NpyIter_CreateCompatibleStrides \
- (*(int (*)(NpyIter *, npy_intp, npy_intp *)) \
- PyArray_API[268])
-#define PyArray_CastingConverter \
- (*(int (*)(PyObject *, NPY_CASTING *)) \
- PyArray_API[269])
-#define PyArray_CountNonzero \
- (*(npy_intp (*)(PyArrayObject *)) \
- PyArray_API[270])
-#define PyArray_PromoteTypes \
- (*(PyArray_Descr * (*)(PyArray_Descr *, PyArray_Descr *)) \
- PyArray_API[271])
-#define PyArray_MinScalarType \
- (*(PyArray_Descr * (*)(PyArrayObject *)) \
- PyArray_API[272])
-#define PyArray_ResultType \
- (*(PyArray_Descr * (*)(npy_intp, PyArrayObject **, npy_intp, PyArray_Descr **)) \
- PyArray_API[273])
-#define PyArray_CanCastArrayTo \
- (*(npy_bool (*)(PyArrayObject *, PyArray_Descr *, NPY_CASTING)) \
- PyArray_API[274])
-#define PyArray_CanCastTypeTo \
- (*(npy_bool (*)(PyArray_Descr *, PyArray_Descr *, NPY_CASTING)) \
- PyArray_API[275])
-#define PyArray_EinsteinSum \
- (*(PyArrayObject * (*)(char *, npy_intp, PyArrayObject **, PyArray_Descr *, NPY_ORDER, NPY_CASTING, PyArrayObject *)) \
- PyArray_API[276])
-#define PyArray_NewLikeArray \
- (*(PyObject * (*)(PyArrayObject *, NPY_ORDER, PyArray_Descr *, int)) \
- PyArray_API[277])
-#define PyArray_GetArrayParamsFromObject \
- (*(int (*)(PyObject *, PyArray_Descr *, npy_bool, PyArray_Descr **, int *, npy_intp *, PyArrayObject **, PyObject *)) \
- PyArray_API[278])
-#define PyArray_ConvertClipmodeSequence \
- (*(int (*)(PyObject *, NPY_CLIPMODE *, int)) \
- PyArray_API[279])
-#define PyArray_MatrixProduct2 \
- (*(PyObject * (*)(PyObject *, PyObject *, PyArrayObject*)) \
- PyArray_API[280])
-#define NpyIter_IsFirstVisit \
- (*(npy_bool (*)(NpyIter *, int)) \
- PyArray_API[281])
-#define PyArray_SetBaseObject \
- (*(int (*)(PyArrayObject *, PyObject *)) \
- PyArray_API[282])
-#define PyArray_CreateSortedStridePerm \
- (*(void (*)(int, npy_intp *, npy_stride_sort_item *)) \
- PyArray_API[283])
-#define PyArray_RemoveAxesInPlace \
- (*(void (*)(PyArrayObject *, npy_bool *)) \
- PyArray_API[284])
-#define PyArray_DebugPrint \
- (*(void (*)(PyArrayObject *)) \
- PyArray_API[285])
-#define PyArray_FailUnlessWriteable \
- (*(int (*)(PyArrayObject *, const char *)) \
- PyArray_API[286])
-#define PyArray_SetUpdateIfCopyBase \
- (*(int (*)(PyArrayObject *, PyArrayObject *)) \
- PyArray_API[287])
-#define PyDataMem_NEW \
- (*(void * (*)(size_t)) \
- PyArray_API[288])
-#define PyDataMem_FREE \
- (*(void (*)(void *)) \
- PyArray_API[289])
-#define PyDataMem_RENEW \
- (*(void * (*)(void *, size_t)) \
- PyArray_API[290])
-#define PyDataMem_SetEventHook \
- (*(PyDataMem_EventHookFunc * (*)(PyDataMem_EventHookFunc *, void *, void **)) \
- PyArray_API[291])
-#define NPY_DEFAULT_ASSIGN_CASTING (*(NPY_CASTING *)PyArray_API[292])
-
-#if !defined(NO_IMPORT_ARRAY) && !defined(NO_IMPORT)
-static int
-_import_array(void)
-{
- int st;
- PyObject *numpy = PyImport_ImportModule("numpy.core.multiarray");
- PyObject *c_api = NULL;
-
- if (numpy == NULL) {
- PyErr_SetString(PyExc_ImportError, "numpy.core.multiarray failed to import");
- return -1;
- }
- c_api = PyObject_GetAttrString(numpy, "_ARRAY_API");
- Py_DECREF(numpy);
- if (c_api == NULL) {
- PyErr_SetString(PyExc_AttributeError, "_ARRAY_API not found");
- return -1;
- }
-
-#if PY_VERSION_HEX >= 0x03000000
- if (!PyCapsule_CheckExact(c_api)) {
- PyErr_SetString(PyExc_RuntimeError, "_ARRAY_API is not PyCapsule object");
- Py_DECREF(c_api);
- return -1;
- }
- PyArray_API = (void **)PyCapsule_GetPointer(c_api, NULL);
-#else
- if (!PyCObject_Check(c_api)) {
- PyErr_SetString(PyExc_RuntimeError, "_ARRAY_API is not PyCObject object");
- Py_DECREF(c_api);
- return -1;
- }
- PyArray_API = (void **)PyCObject_AsVoidPtr(c_api);
-#endif
- Py_DECREF(c_api);
- if (PyArray_API == NULL) {
- PyErr_SetString(PyExc_RuntimeError, "_ARRAY_API is NULL pointer");
- return -1;
- }
-
- /* Perform runtime check of C API version */
- if (NPY_VERSION != PyArray_GetNDArrayCVersion()) {
- PyErr_Format(PyExc_RuntimeError, "module compiled against "\
- "ABI version %x but this version of numpy is %x", \
- (int) NPY_VERSION, (int) PyArray_GetNDArrayCVersion());
- return -1;
- }
- if (NPY_FEATURE_VERSION > PyArray_GetNDArrayCFeatureVersion()) {
- PyErr_Format(PyExc_RuntimeError, "module compiled against "\
- "API version %x but this version of numpy is %x", \
- (int) NPY_FEATURE_VERSION, (int) PyArray_GetNDArrayCFeatureVersion());
- return -1;
- }
-
- /*
- * Perform runtime check of endianness and check it matches the one set by
- * the headers (npy_endian.h) as a safeguard
- */
- st = PyArray_GetEndianness();
- if (st == NPY_CPU_UNKNOWN_ENDIAN) {
- PyErr_Format(PyExc_RuntimeError, "FATAL: module compiled as unknown endian");
- return -1;
- }
-#if NPY_BYTE_ORDER == NPY_BIG_ENDIAN
- if (st != NPY_CPU_BIG) {
- PyErr_Format(PyExc_RuntimeError, "FATAL: module compiled as "\
- "big endian, but detected different endianness at runtime");
- return -1;
- }
-#elif NPY_BYTE_ORDER == NPY_LITTLE_ENDIAN
- if (st != NPY_CPU_LITTLE) {
- PyErr_Format(PyExc_RuntimeError, "FATAL: module compiled as "\
- "little endian, but detected different endianness at runtime");
- return -1;
- }
-#endif
-
- return 0;
-}
-
-#if PY_VERSION_HEX >= 0x03000000
-#define NUMPY_IMPORT_ARRAY_RETVAL NULL
-#else
-#define NUMPY_IMPORT_ARRAY_RETVAL
-#endif
-
-#define import_array() {if (_import_array() < 0) {PyErr_Print(); PyErr_SetString(PyExc_ImportError, "numpy.core.multiarray failed to import"); return NUMPY_IMPORT_ARRAY_RETVAL; } }
-
-#define import_array1(ret) {if (_import_array() < 0) {PyErr_Print(); PyErr_SetString(PyExc_ImportError, "numpy.core.multiarray failed to import"); return ret; } }
-
-#define import_array2(msg, ret) {if (_import_array() < 0) {PyErr_Print(); PyErr_SetString(PyExc_ImportError, msg); return ret; } }
-
-#endif
-
-#endif
diff --git a/include/numpy/__ufunc_api.h b/include/numpy/__ufunc_api.h
deleted file mode 100644
index fd81d07b5..000000000
--- a/include/numpy/__ufunc_api.h
+++ /dev/null
@@ -1,323 +0,0 @@
-
-#ifdef _UMATHMODULE
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
-extern NPY_NO_EXPORT PyTypeObject PyUFunc_Type;
-#else
-NPY_NO_EXPORT PyTypeObject PyUFunc_Type;
-#endif
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- extern NPY_NO_EXPORT PyTypeObject PyUFunc_Type;
-#else
- NPY_NO_EXPORT PyTypeObject PyUFunc_Type;
-#endif
-
-NPY_NO_EXPORT PyObject * PyUFunc_FromFuncAndData \
- (PyUFuncGenericFunction *, void **, char *, int, int, int, int, char *, char *, int);
-NPY_NO_EXPORT int PyUFunc_RegisterLoopForType \
- (PyUFuncObject *, int, PyUFuncGenericFunction, int *, void *);
-NPY_NO_EXPORT int PyUFunc_GenericFunction \
- (PyUFuncObject *, PyObject *, PyObject *, PyArrayObject **);
-NPY_NO_EXPORT void PyUFunc_f_f_As_d_d \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_d_d \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_f_f \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_g_g \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_F_F_As_D_D \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_F_F \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_D_D \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_G_G \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_O_O \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_ff_f_As_dd_d \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_ff_f \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_dd_d \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_gg_g \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_FF_F_As_DD_D \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_DD_D \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_FF_F \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_GG_G \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_OO_O \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_O_O_method \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_OO_O_method \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_On_Om \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT int PyUFunc_GetPyValues \
- (char *, int *, int *, PyObject **);
-NPY_NO_EXPORT int PyUFunc_checkfperr \
- (int, PyObject *, int *);
-NPY_NO_EXPORT void PyUFunc_clearfperr \
- (void);
-NPY_NO_EXPORT int PyUFunc_getfperr \
- (void);
-NPY_NO_EXPORT int PyUFunc_handlefperr \
- (int, PyObject *, int, int *);
-NPY_NO_EXPORT int PyUFunc_ReplaceLoopBySignature \
- (PyUFuncObject *, PyUFuncGenericFunction, int *, PyUFuncGenericFunction *);
-NPY_NO_EXPORT PyObject * PyUFunc_FromFuncAndDataAndSignature \
- (PyUFuncGenericFunction *, void **, char *, int, int, int, int, char *, char *, int, const char *);
-NPY_NO_EXPORT int PyUFunc_SetUsesArraysAsData \
- (void **, size_t);
-NPY_NO_EXPORT void PyUFunc_e_e \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_e_e_As_f_f \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_e_e_As_d_d \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_ee_e \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_ee_e_As_ff_f \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT void PyUFunc_ee_e_As_dd_d \
- (char **, npy_intp *, npy_intp *, void *);
-NPY_NO_EXPORT int PyUFunc_DefaultTypeResolver \
- (PyUFuncObject *, NPY_CASTING, PyArrayObject **, PyObject *, PyArray_Descr **);
-NPY_NO_EXPORT int PyUFunc_ValidateCasting \
- (PyUFuncObject *, NPY_CASTING, PyArrayObject **, PyArray_Descr **);
-
-#else
-
-#if defined(PY_UFUNC_UNIQUE_SYMBOL)
-#define PyUFunc_API PY_UFUNC_UNIQUE_SYMBOL
-#endif
-
-#if defined(NO_IMPORT) || defined(NO_IMPORT_UFUNC)
-extern void **PyUFunc_API;
-#else
-#if defined(PY_UFUNC_UNIQUE_SYMBOL)
-void **PyUFunc_API;
-#else
-static void **PyUFunc_API=NULL;
-#endif
-#endif
-
-#define PyUFunc_Type (*(PyTypeObject *)PyUFunc_API[0])
-#define PyUFunc_FromFuncAndData \
- (*(PyObject * (*)(PyUFuncGenericFunction *, void **, char *, int, int, int, int, char *, char *, int)) \
- PyUFunc_API[1])
-#define PyUFunc_RegisterLoopForType \
- (*(int (*)(PyUFuncObject *, int, PyUFuncGenericFunction, int *, void *)) \
- PyUFunc_API[2])
-#define PyUFunc_GenericFunction \
- (*(int (*)(PyUFuncObject *, PyObject *, PyObject *, PyArrayObject **)) \
- PyUFunc_API[3])
-#define PyUFunc_f_f_As_d_d \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[4])
-#define PyUFunc_d_d \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[5])
-#define PyUFunc_f_f \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[6])
-#define PyUFunc_g_g \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[7])
-#define PyUFunc_F_F_As_D_D \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[8])
-#define PyUFunc_F_F \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[9])
-#define PyUFunc_D_D \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[10])
-#define PyUFunc_G_G \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[11])
-#define PyUFunc_O_O \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[12])
-#define PyUFunc_ff_f_As_dd_d \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[13])
-#define PyUFunc_ff_f \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[14])
-#define PyUFunc_dd_d \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[15])
-#define PyUFunc_gg_g \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[16])
-#define PyUFunc_FF_F_As_DD_D \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[17])
-#define PyUFunc_DD_D \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[18])
-#define PyUFunc_FF_F \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[19])
-#define PyUFunc_GG_G \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[20])
-#define PyUFunc_OO_O \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[21])
-#define PyUFunc_O_O_method \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[22])
-#define PyUFunc_OO_O_method \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[23])
-#define PyUFunc_On_Om \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[24])
-#define PyUFunc_GetPyValues \
- (*(int (*)(char *, int *, int *, PyObject **)) \
- PyUFunc_API[25])
-#define PyUFunc_checkfperr \
- (*(int (*)(int, PyObject *, int *)) \
- PyUFunc_API[26])
-#define PyUFunc_clearfperr \
- (*(void (*)(void)) \
- PyUFunc_API[27])
-#define PyUFunc_getfperr \
- (*(int (*)(void)) \
- PyUFunc_API[28])
-#define PyUFunc_handlefperr \
- (*(int (*)(int, PyObject *, int, int *)) \
- PyUFunc_API[29])
-#define PyUFunc_ReplaceLoopBySignature \
- (*(int (*)(PyUFuncObject *, PyUFuncGenericFunction, int *, PyUFuncGenericFunction *)) \
- PyUFunc_API[30])
-#define PyUFunc_FromFuncAndDataAndSignature \
- (*(PyObject * (*)(PyUFuncGenericFunction *, void **, char *, int, int, int, int, char *, char *, int, const char *)) \
- PyUFunc_API[31])
-#define PyUFunc_SetUsesArraysAsData \
- (*(int (*)(void **, size_t)) \
- PyUFunc_API[32])
-#define PyUFunc_e_e \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[33])
-#define PyUFunc_e_e_As_f_f \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[34])
-#define PyUFunc_e_e_As_d_d \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[35])
-#define PyUFunc_ee_e \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[36])
-#define PyUFunc_ee_e_As_ff_f \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[37])
-#define PyUFunc_ee_e_As_dd_d \
- (*(void (*)(char **, npy_intp *, npy_intp *, void *)) \
- PyUFunc_API[38])
-#define PyUFunc_DefaultTypeResolver \
- (*(int (*)(PyUFuncObject *, NPY_CASTING, PyArrayObject **, PyObject *, PyArray_Descr **)) \
- PyUFunc_API[39])
-#define PyUFunc_ValidateCasting \
- (*(int (*)(PyUFuncObject *, NPY_CASTING, PyArrayObject **, PyArray_Descr **)) \
- PyUFunc_API[40])
-
-static int
-_import_umath(void)
-{
- PyObject *numpy = PyImport_ImportModule("numpy.core.umath");
- PyObject *c_api = NULL;
-
- if (numpy == NULL) {
- PyErr_SetString(PyExc_ImportError, "numpy.core.umath failed to import");
- return -1;
- }
- c_api = PyObject_GetAttrString(numpy, "_UFUNC_API");
- Py_DECREF(numpy);
- if (c_api == NULL) {
- PyErr_SetString(PyExc_AttributeError, "_UFUNC_API not found");
- return -1;
- }
-
-#if PY_VERSION_HEX >= 0x03000000
- if (!PyCapsule_CheckExact(c_api)) {
- PyErr_SetString(PyExc_RuntimeError, "_UFUNC_API is not PyCapsule object");
- Py_DECREF(c_api);
- return -1;
- }
- PyUFunc_API = (void **)PyCapsule_GetPointer(c_api, NULL);
-#else
- if (!PyCObject_Check(c_api)) {
- PyErr_SetString(PyExc_RuntimeError, "_UFUNC_API is not PyCObject object");
- Py_DECREF(c_api);
- return -1;
- }
- PyUFunc_API = (void **)PyCObject_AsVoidPtr(c_api);
-#endif
- Py_DECREF(c_api);
- if (PyUFunc_API == NULL) {
- PyErr_SetString(PyExc_RuntimeError, "_UFUNC_API is NULL pointer");
- return -1;
- }
- return 0;
-}
-
-#if PY_VERSION_HEX >= 0x03000000
-#define NUMPY_IMPORT_UMATH_RETVAL NULL
-#else
-#define NUMPY_IMPORT_UMATH_RETVAL
-#endif
-
-#define import_umath() \
- do {\
- UFUNC_NOFPE\
- if (_import_umath() < 0) {\
- PyErr_Print();\
- PyErr_SetString(PyExc_ImportError,\
- "numpy.core.umath failed to import");\
- return NUMPY_IMPORT_UMATH_RETVAL;\
- }\
- } while(0)
-
-#define import_umath1(ret) \
- do {\
- UFUNC_NOFPE\
- if (_import_umath() < 0) {\
- PyErr_Print();\
- PyErr_SetString(PyExc_ImportError,\
- "numpy.core.umath failed to import");\
- return ret;\
- }\
- } while(0)
-
-#define import_umath2(ret, msg) \
- do {\
- UFUNC_NOFPE\
- if (_import_umath() < 0) {\
- PyErr_Print();\
- PyErr_SetString(PyExc_ImportError, msg);\
- return ret;\
- }\
- } while(0)
-
-#define import_ufunc() \
- do {\
- UFUNC_NOFPE\
- if (_import_umath() < 0) {\
- PyErr_Print();\
- PyErr_SetString(PyExc_ImportError,\
- "numpy.core.umath failed to import");\
- }\
- } while(0)
-
-#endif
diff --git a/include/numpy/_neighborhood_iterator_imp.h b/include/numpy/_neighborhood_iterator_imp.h
deleted file mode 100644
index e8860cbc7..000000000
--- a/include/numpy/_neighborhood_iterator_imp.h
+++ /dev/null
@@ -1,90 +0,0 @@
-#ifndef _NPY_INCLUDE_NEIGHBORHOOD_IMP
-#error You should not include this header directly
-#endif
-/*
- * Private API (here for inline)
- */
-static NPY_INLINE int
-_PyArrayNeighborhoodIter_IncrCoord(PyArrayNeighborhoodIterObject* iter);
-
-/*
- * Update to next item of the iterator
- *
- * Note: this simply increment the coordinates vector, last dimension
- * incremented first , i.e, for dimension 3
- * ...
- * -1, -1, -1
- * -1, -1, 0
- * -1, -1, 1
- * ....
- * -1, 0, -1
- * -1, 0, 0
- * ....
- * 0, -1, -1
- * 0, -1, 0
- * ....
- */
-#define _UPDATE_COORD_ITER(c) \
- wb = iter->coordinates[c] < iter->bounds[c][1]; \
- if (wb) { \
- iter->coordinates[c] += 1; \
- return 0; \
- } \
- else { \
- iter->coordinates[c] = iter->bounds[c][0]; \
- }
-
-static NPY_INLINE int
-_PyArrayNeighborhoodIter_IncrCoord(PyArrayNeighborhoodIterObject* iter)
-{
- npy_intp i, wb;
-
- for (i = iter->nd - 1; i >= 0; --i) {
- _UPDATE_COORD_ITER(i)
- }
-
- return 0;
-}
-
-/*
- * Version optimized for 2d arrays, manual loop unrolling
- */
-static NPY_INLINE int
-_PyArrayNeighborhoodIter_IncrCoord2D(PyArrayNeighborhoodIterObject* iter)
-{
- npy_intp wb;
-
- _UPDATE_COORD_ITER(1)
- _UPDATE_COORD_ITER(0)
-
- return 0;
-}
-#undef _UPDATE_COORD_ITER
-
-/*
- * Advance to the next neighbour
- */
-static NPY_INLINE int
-PyArrayNeighborhoodIter_Next(PyArrayNeighborhoodIterObject* iter)
-{
- _PyArrayNeighborhoodIter_IncrCoord (iter);
- iter->dataptr = iter->translate((PyArrayIterObject*)iter, iter->coordinates);
-
- return 0;
-}
-
-/*
- * Reset functions
- */
-static NPY_INLINE int
-PyArrayNeighborhoodIter_Reset(PyArrayNeighborhoodIterObject* iter)
-{
- npy_intp i;
-
- for (i = 0; i < iter->nd; ++i) {
- iter->coordinates[i] = iter->bounds[i][0];
- }
- iter->dataptr = iter->translate((PyArrayIterObject*)iter, iter->coordinates);
-
- return 0;
-}
diff --git a/include/numpy/_numpyconfig.h b/include/numpy/_numpyconfig.h
deleted file mode 100644
index d55ffc38d..000000000
--- a/include/numpy/_numpyconfig.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#define NPY_SIZEOF_SHORT SIZEOF_SHORT
-#define NPY_SIZEOF_INT SIZEOF_INT
-#define NPY_SIZEOF_LONG SIZEOF_LONG
-#define NPY_SIZEOF_FLOAT 4
-#define NPY_SIZEOF_COMPLEX_FLOAT 8
-#define NPY_SIZEOF_DOUBLE 8
-#define NPY_SIZEOF_COMPLEX_DOUBLE 16
-#define NPY_SIZEOF_LONGDOUBLE 16
-#define NPY_SIZEOF_COMPLEX_LONGDOUBLE 32
-#define NPY_SIZEOF_PY_INTPTR_T 8
-#define NPY_SIZEOF_PY_LONG_LONG 8
-#define NPY_SIZEOF_LONGLONG 8
-#define NPY_NO_SMP 0
-#define NPY_HAVE_DECL_ISNAN
-#define NPY_HAVE_DECL_ISINF
-#define NPY_HAVE_DECL_ISFINITE
-#define NPY_HAVE_DECL_SIGNBIT
-#define NPY_USE_C99_COMPLEX 1
-#define NPY_HAVE_COMPLEX_DOUBLE 1
-#define NPY_HAVE_COMPLEX_FLOAT 1
-#define NPY_HAVE_COMPLEX_LONG_DOUBLE 1
-#define NPY_USE_C99_FORMATS 1
-#define NPY_VISIBILITY_HIDDEN __attribute__((visibility("hidden")))
-#define NPY_ABI_VERSION 0x01000009
-#define NPY_API_VERSION 0x00000007
-
-#ifndef __STDC_FORMAT_MACROS
-#define __STDC_FORMAT_MACROS 1
-#endif
diff --git a/include/numpy/arrayobject.h b/include/numpy/arrayobject.h
deleted file mode 100644
index a84766f63..000000000
--- a/include/numpy/arrayobject.h
+++ /dev/null
@@ -1,22 +0,0 @@
-
-/* This expects the following variables to be defined (besides
- the usual ones from pyconfig.h
-
- SIZEOF_LONG_DOUBLE -- sizeof(long double) or sizeof(double) if no
- long double is present on platform.
- CHAR_BIT -- number of bits in a char (usually 8)
- (should be in limits.h)
-
-*/
-
-#ifndef Py_ARRAYOBJECT_H
-#define Py_ARRAYOBJECT_H
-
-#include "ndarrayobject.h"
-#include "npy_interrupt.h"
-
-#ifdef NPY_NO_PREFIX
-#include "noprefix.h"
-#endif
-
-#endif
diff --git a/include/numpy/arrayscalars.h b/include/numpy/arrayscalars.h
deleted file mode 100644
index 64450e713..000000000
--- a/include/numpy/arrayscalars.h
+++ /dev/null
@@ -1,175 +0,0 @@
-#ifndef _NPY_ARRAYSCALARS_H_
-#define _NPY_ARRAYSCALARS_H_
-
-#ifndef _MULTIARRAYMODULE
-typedef struct {
- PyObject_HEAD
- npy_bool obval;
-} PyBoolScalarObject;
-#endif
-
-
-typedef struct {
- PyObject_HEAD
- signed char obval;
-} PyByteScalarObject;
-
-
-typedef struct {
- PyObject_HEAD
- short obval;
-} PyShortScalarObject;
-
-
-typedef struct {
- PyObject_HEAD
- int obval;
-} PyIntScalarObject;
-
-
-typedef struct {
- PyObject_HEAD
- long obval;
-} PyLongScalarObject;
-
-
-typedef struct {
- PyObject_HEAD
- npy_longlong obval;
-} PyLongLongScalarObject;
-
-
-typedef struct {
- PyObject_HEAD
- unsigned char obval;
-} PyUByteScalarObject;
-
-
-typedef struct {
- PyObject_HEAD
- unsigned short obval;
-} PyUShortScalarObject;
-
-
-typedef struct {
- PyObject_HEAD
- unsigned int obval;
-} PyUIntScalarObject;
-
-
-typedef struct {
- PyObject_HEAD
- unsigned long obval;
-} PyULongScalarObject;
-
-
-typedef struct {
- PyObject_HEAD
- npy_ulonglong obval;
-} PyULongLongScalarObject;
-
-
-typedef struct {
- PyObject_HEAD
- npy_half obval;
-} PyHalfScalarObject;
-
-
-typedef struct {
- PyObject_HEAD
- float obval;
-} PyFloatScalarObject;
-
-
-typedef struct {
- PyObject_HEAD
- double obval;
-} PyDoubleScalarObject;
-
-
-typedef struct {
- PyObject_HEAD
- npy_longdouble obval;
-} PyLongDoubleScalarObject;
-
-
-typedef struct {
- PyObject_HEAD
- npy_cfloat obval;
-} PyCFloatScalarObject;
-
-
-typedef struct {
- PyObject_HEAD
- npy_cdouble obval;
-} PyCDoubleScalarObject;
-
-
-typedef struct {
- PyObject_HEAD
- npy_clongdouble obval;
-} PyCLongDoubleScalarObject;
-
-
-typedef struct {
- PyObject_HEAD
- PyObject * obval;
-} PyObjectScalarObject;
-
-typedef struct {
- PyObject_HEAD
- npy_datetime obval;
- PyArray_DatetimeMetaData obmeta;
-} PyDatetimeScalarObject;
-
-typedef struct {
- PyObject_HEAD
- npy_timedelta obval;
- PyArray_DatetimeMetaData obmeta;
-} PyTimedeltaScalarObject;
-
-
-typedef struct {
- PyObject_HEAD
- char obval;
-} PyScalarObject;
-
-#define PyStringScalarObject PyStringObject
-#define PyUnicodeScalarObject PyUnicodeObject
-
-typedef struct {
- PyObject_VAR_HEAD
- char *obval;
- PyArray_Descr *descr;
- int flags;
- PyObject *base;
-} PyVoidScalarObject;
-
-/* Macros
- PyScalarObject
- PyArrType_Type
- are defined in ndarrayobject.h
-*/
-
-#define PyArrayScalar_False ((PyObject *)(&(_PyArrayScalar_BoolValues[0])))
-#define PyArrayScalar_True ((PyObject *)(&(_PyArrayScalar_BoolValues[1])))
-#define PyArrayScalar_FromLong(i) \
- ((PyObject *)(&(_PyArrayScalar_BoolValues[((i)!=0)])))
-#define PyArrayScalar_RETURN_BOOL_FROM_LONG(i) \
- return Py_INCREF(PyArrayScalar_FromLong(i)), \
- PyArrayScalar_FromLong(i)
-#define PyArrayScalar_RETURN_FALSE \
- return Py_INCREF(PyArrayScalar_False), \
- PyArrayScalar_False
-#define PyArrayScalar_RETURN_TRUE \
- return Py_INCREF(PyArrayScalar_True), \
- PyArrayScalar_True
-
-#define PyArrayScalar_New(cls) \
- Py##cls##ArrType_Type.tp_alloc(&Py##cls##ArrType_Type, 0)
-#define PyArrayScalar_VAL(obj, cls) \
- ((Py##cls##ScalarObject *)obj)->obval
-#define PyArrayScalar_ASSIGN(obj, cls, val) \
- PyArrayScalar_VAL(obj, cls) = val
-
-#endif
diff --git a/include/numpy/halffloat.h b/include/numpy/halffloat.h
deleted file mode 100644
index 944f0ea34..000000000
--- a/include/numpy/halffloat.h
+++ /dev/null
@@ -1,69 +0,0 @@
-#ifndef __NPY_HALFFLOAT_H__
-#define __NPY_HALFFLOAT_H__
-
-#include
-#include
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Half-precision routines
- */
-
-/* Conversions */
-float npy_half_to_float(npy_half h);
-double npy_half_to_double(npy_half h);
-npy_half npy_float_to_half(float f);
-npy_half npy_double_to_half(double d);
-/* Comparisons */
-int npy_half_eq(npy_half h1, npy_half h2);
-int npy_half_ne(npy_half h1, npy_half h2);
-int npy_half_le(npy_half h1, npy_half h2);
-int npy_half_lt(npy_half h1, npy_half h2);
-int npy_half_ge(npy_half h1, npy_half h2);
-int npy_half_gt(npy_half h1, npy_half h2);
-/* faster *_nonan variants for when you know h1 and h2 are not NaN */
-int npy_half_eq_nonan(npy_half h1, npy_half h2);
-int npy_half_lt_nonan(npy_half h1, npy_half h2);
-int npy_half_le_nonan(npy_half h1, npy_half h2);
-/* Miscellaneous functions */
-int npy_half_iszero(npy_half h);
-int npy_half_isnan(npy_half h);
-int npy_half_isinf(npy_half h);
-int npy_half_isfinite(npy_half h);
-int npy_half_signbit(npy_half h);
-npy_half npy_half_copysign(npy_half x, npy_half y);
-npy_half npy_half_spacing(npy_half h);
-npy_half npy_half_nextafter(npy_half x, npy_half y);
-
-/*
- * Half-precision constants
- */
-
-#define NPY_HALF_ZERO (0x0000u)
-#define NPY_HALF_PZERO (0x0000u)
-#define NPY_HALF_NZERO (0x8000u)
-#define NPY_HALF_ONE (0x3c00u)
-#define NPY_HALF_NEGONE (0xbc00u)
-#define NPY_HALF_PINF (0x7c00u)
-#define NPY_HALF_NINF (0xfc00u)
-#define NPY_HALF_NAN (0x7e00u)
-
-#define NPY_MAX_HALF (0x7bffu)
-
-/*
- * Bit-level conversions
- */
-
-npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f);
-npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d);
-npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h);
-npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/include/numpy/multiarray_api.txt b/include/numpy/multiarray_api.txt
deleted file mode 100644
index 7e588f067..000000000
--- a/include/numpy/multiarray_api.txt
+++ /dev/null
@@ -1,2375 +0,0 @@
-
-===========
-Numpy C-API
-===========
-::
-
- unsigned int
- PyArray_GetNDArrayCVersion(void )
-
-
-Included at the very first so not auto-grabbed and thus not labeled.
-
-::
-
- int
- PyArray_SetNumericOps(PyObject *dict)
-
-Set internal structure with number functions that all arrays will use
-
-::
-
- PyObject *
- PyArray_GetNumericOps(void )
-
-Get dictionary showing number functions that all arrays will use
-
-::
-
- int
- PyArray_INCREF(PyArrayObject *mp)
-
-For object arrays, increment all internal references.
-
-::
-
- int
- PyArray_XDECREF(PyArrayObject *mp)
-
-Decrement all internal references for object arrays.
-(or arrays with object fields)
-
-::
-
- void
- PyArray_SetStringFunction(PyObject *op, int repr)
-
-Set the array print function to be a Python function.
-
-::
-
- PyArray_Descr *
- PyArray_DescrFromType(int type)
-
-Get the PyArray_Descr structure for a type.
-
-::
-
- PyObject *
- PyArray_TypeObjectFromType(int type)
-
-Get a typeobject from a type-number -- can return NULL.
-
-New reference
-
-::
-
- char *
- PyArray_Zero(PyArrayObject *arr)
-
-Get pointer to zero of correct type for array.
-
-::
-
- char *
- PyArray_One(PyArrayObject *arr)
-
-Get pointer to one of correct type for array
-
-::
-
- PyObject *
- PyArray_CastToType(PyArrayObject *arr, PyArray_Descr *dtype, int
- is_f_order)
-
-For backward compatibility
-
-Cast an array using typecode structure.
-steals reference to at --- cannot be NULL
-
-This function always makes a copy of arr, even if the dtype
-doesn't change.
-
-::
-
- int
- PyArray_CastTo(PyArrayObject *out, PyArrayObject *mp)
-
-Cast to an already created array.
-
-::
-
- int
- PyArray_CastAnyTo(PyArrayObject *out, PyArrayObject *mp)
-
-Cast to an already created array. Arrays don't have to be "broadcastable"
-Only requirement is they have the same number of elements.
-
-::
-
- int
- PyArray_CanCastSafely(int fromtype, int totype)
-
-Check the type coercion rules.
-
-::
-
- npy_bool
- PyArray_CanCastTo(PyArray_Descr *from, PyArray_Descr *to)
-
-leaves reference count alone --- cannot be NULL
-
-PyArray_CanCastTypeTo is equivalent to this, but adds a 'casting'
-parameter.
-
-::
-
- int
- PyArray_ObjectType(PyObject *op, int minimum_type)
-
-Return the typecode of the array a Python object would be converted to
-
-Returns the type number the result should have, or NPY_NOTYPE on error.
-
-::
-
- PyArray_Descr *
- PyArray_DescrFromObject(PyObject *op, PyArray_Descr *mintype)
-
-new reference -- accepts NULL for mintype
-
-::
-
- PyArrayObject **
- PyArray_ConvertToCommonType(PyObject *op, int *retn)
-
-
-::
-
- PyArray_Descr *
- PyArray_DescrFromScalar(PyObject *sc)
-
-Return descr object from array scalar.
-
-New reference
-
-::
-
- PyArray_Descr *
- PyArray_DescrFromTypeObject(PyObject *type)
-
-
-::
-
- npy_intp
- PyArray_Size(PyObject *op)
-
-Compute the size of an array (in number of items)
-
-::
-
- PyObject *
- PyArray_Scalar(void *data, PyArray_Descr *descr, PyObject *base)
-
-Get scalar-equivalent to a region of memory described by a descriptor.
-
-::
-
- PyObject *
- PyArray_FromScalar(PyObject *scalar, PyArray_Descr *outcode)
-
-Get 0-dim array from scalar
-
-0-dim array from array-scalar object
-always contains a copy of the data
-unless outcode is NULL, it is of void type and the referrer does
-not own it either.
-
-steals reference to outcode
-
-::
-
- void
- PyArray_ScalarAsCtype(PyObject *scalar, void *ctypeptr)
-
-Convert to c-type
-
-no error checking is performed -- ctypeptr must be same type as scalar
-in case of flexible type, the data is not copied
-into ctypeptr which is expected to be a pointer to pointer
-
-::
-
- int
- PyArray_CastScalarToCtype(PyObject *scalar, void
- *ctypeptr, PyArray_Descr *outcode)
-
-Cast Scalar to c-type
-
-The output buffer must be large-enough to receive the value
-Even for flexible types which is different from ScalarAsCtype
-where only a reference for flexible types is returned
-
-This may not work right on narrow builds for NumPy unicode scalars.
-
-::
-
- int
- PyArray_CastScalarDirect(PyObject *scalar, PyArray_Descr
- *indescr, void *ctypeptr, int outtype)
-
-Cast Scalar to c-type
-
-::
-
- PyObject *
- PyArray_ScalarFromObject(PyObject *object)
-
-Get an Array Scalar From a Python Object
-
-Returns NULL if unsuccessful but error is only set if another error occurred.
-Currently only Numeric-like object supported.
-
-::
-
- PyArray_VectorUnaryFunc *
- PyArray_GetCastFunc(PyArray_Descr *descr, int type_num)
-
-Get a cast function to cast from the input descriptor to the
-output type_number (must be a registered data-type).
-Returns NULL if un-successful.
-
-::
-
- PyObject *
- PyArray_FromDims(int nd, int *d, int type)
-
-Construct an empty array from dimensions and typenum
-
-::
-
- PyObject *
- PyArray_FromDimsAndDataAndDescr(int nd, int *d, PyArray_Descr
- *descr, char *data)
-
-Like FromDimsAndData but uses the Descr structure instead of typecode
-as input.
-
-::
-
- PyObject *
- PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int
- min_depth, int max_depth, int flags, PyObject
- *context)
-
-Does not check for NPY_ARRAY_ENSURECOPY and NPY_ARRAY_NOTSWAPPED in flags
-Steals a reference to newtype --- which can be NULL
-
-::
-
- PyObject *
- PyArray_EnsureArray(PyObject *op)
-
-This is a quick wrapper around PyArray_FromAny(op, NULL, 0, 0, ENSUREARRAY)
-that special cases Arrays and PyArray_Scalars up front
-It *steals a reference* to the object
-It also guarantees that the result is PyArray_Type
-Because it decrefs op if any conversion needs to take place
-so it can be used like PyArray_EnsureArray(some_function(...))
-
-::
-
- PyObject *
- PyArray_EnsureAnyArray(PyObject *op)
-
-
-::
-
- PyObject *
- PyArray_FromFile(FILE *fp, PyArray_Descr *dtype, npy_intp num, char
- *sep)
-
-
-Given a ``FILE *`` pointer ``fp``, and a ``PyArray_Descr``, return an
-array corresponding to the data encoded in that file.
-
-If the dtype is NULL, the default array type is used (double).
-If non-null, the reference is stolen.
-
-The number of elements to read is given as ``num``; if it is < 0, then
-then as many as possible are read.
-
-If ``sep`` is NULL or empty, then binary data is assumed, else
-text data, with ``sep`` as the separator between elements. Whitespace in
-the separator matches any length of whitespace in the text, and a match
-for whitespace around the separator is added.
-
-For memory-mapped files, use the buffer interface. No more data than
-necessary is read by this routine.
-
-::
-
- PyObject *
- PyArray_FromString(char *data, npy_intp slen, PyArray_Descr
- *dtype, npy_intp num, char *sep)
-
-
-Given a pointer to a string ``data``, a string length ``slen``, and
-a ``PyArray_Descr``, return an array corresponding to the data
-encoded in that string.
-
-If the dtype is NULL, the default array type is used (double).
-If non-null, the reference is stolen.
-
-If ``slen`` is < 0, then the end of string is used for text data.
-It is an error for ``slen`` to be < 0 for binary data (since embedded NULLs
-would be the norm).
-
-The number of elements to read is given as ``num``; if it is < 0, then
-then as many as possible are read.
-
-If ``sep`` is NULL or empty, then binary data is assumed, else
-text data, with ``sep`` as the separator between elements. Whitespace in
-the separator matches any length of whitespace in the text, and a match
-for whitespace around the separator is added.
-
-::
-
- PyObject *
- PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type, npy_intp
- count, npy_intp offset)
-
-
-::
-
- PyObject *
- PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count)
-
-
-steals a reference to dtype (which cannot be NULL)
-
-::
-
- PyObject *
- PyArray_Return(PyArrayObject *mp)
-
-
-Return either an array or the appropriate Python object if the array
-is 0d and matches a Python type.
-
-::
-
- PyObject *
- PyArray_GetField(PyArrayObject *self, PyArray_Descr *typed, int
- offset)
-
-Get a subset of bytes from each element of the array
-
-::
-
- int
- PyArray_SetField(PyArrayObject *self, PyArray_Descr *dtype, int
- offset, PyObject *val)
-
-Set a subset of bytes from each element of the array
-
-::
-
- PyObject *
- PyArray_Byteswap(PyArrayObject *self, npy_bool inplace)
-
-
-::
-
- PyObject *
- PyArray_Resize(PyArrayObject *self, PyArray_Dims *newshape, int
- refcheck, NPY_ORDER order)
-
-Resize (reallocate data). Only works if nothing else is referencing this
-array and it is contiguous. If refcheck is 0, then the reference count is
-not checked and assumed to be 1. You still must own this data and have no
-weak-references and no base object.
-
-::
-
- int
- PyArray_MoveInto(PyArrayObject *dst, PyArrayObject *src)
-
-Move the memory of one array into another, allowing for overlapping data.
-
-Returns 0 on success, negative on failure.
-
-::
-
- int
- PyArray_CopyInto(PyArrayObject *dst, PyArrayObject *src)
-
-Copy an Array into another array.
-Broadcast to the destination shape if necessary.
-
-Returns 0 on success, -1 on failure.
-
-::
-
- int
- PyArray_CopyAnyInto(PyArrayObject *dst, PyArrayObject *src)
-
-Copy an Array into another array -- memory must not overlap
-Does not require src and dest to have "broadcastable" shapes
-(only the same number of elements).
-
-TODO: For NumPy 2.0, this could accept an order parameter which
-only allows NPY_CORDER and NPY_FORDER. Could also rename
-this to CopyAsFlat to make the name more intuitive.
-
-Returns 0 on success, -1 on error.
-
-::
-
- int
- PyArray_CopyObject(PyArrayObject *dest, PyObject *src_object)
-
-
-::
-
- PyObject *
- PyArray_NewCopy(PyArrayObject *obj, NPY_ORDER order)
-
-Copy an array.
-
-::
-
- PyObject *
- PyArray_ToList(PyArrayObject *self)
-
-To List
-
-::
-
- PyObject *
- PyArray_ToString(PyArrayObject *self, NPY_ORDER order)
-
-
-::
-
- int
- PyArray_ToFile(PyArrayObject *self, FILE *fp, char *sep, char *format)
-
-To File
-
-::
-
- int
- PyArray_Dump(PyObject *self, PyObject *file, int protocol)
-
-
-::
-
- PyObject *
- PyArray_Dumps(PyObject *self, int protocol)
-
-
-::
-
- int
- PyArray_ValidType(int type)
-
-Is the typenum valid?
-
-::
-
- void
- PyArray_UpdateFlags(PyArrayObject *ret, int flagmask)
-
-Update Several Flags at once.
-
-::
-
- PyObject *
- PyArray_New(PyTypeObject *subtype, int nd, npy_intp *dims, int
- type_num, npy_intp *strides, void *data, int itemsize, int
- flags, PyObject *obj)
-
-Generic new array creation routine.
-
-::
-
- PyObject *
- PyArray_NewFromDescr(PyTypeObject *subtype, PyArray_Descr *descr, int
- nd, npy_intp *dims, npy_intp *strides, void
- *data, int flags, PyObject *obj)
-
-Generic new array creation routine.
-
-steals a reference to descr (even on failure)
-
-::
-
- PyArray_Descr *
- PyArray_DescrNew(PyArray_Descr *base)
-
-base cannot be NULL
-
-::
-
- PyArray_Descr *
- PyArray_DescrNewFromType(int type_num)
-
-
-::
-
- double
- PyArray_GetPriority(PyObject *obj, double default_)
-
-Get Priority from object
-
-::
-
- PyObject *
- PyArray_IterNew(PyObject *obj)
-
-Get Iterator.
-
-::
-
- PyObject *
- PyArray_MultiIterNew(int n, ... )
-
-Get MultiIterator,
-
-::
-
- int
- PyArray_PyIntAsInt(PyObject *o)
-
-
-::
-
- npy_intp
- PyArray_PyIntAsIntp(PyObject *o)
-
-
-::
-
- int
- PyArray_Broadcast(PyArrayMultiIterObject *mit)
-
-
-::
-
- void
- PyArray_FillObjectArray(PyArrayObject *arr, PyObject *obj)
-
-Assumes contiguous
-
-::
-
- int
- PyArray_FillWithScalar(PyArrayObject *arr, PyObject *obj)
-
-
-::
-
- npy_bool
- PyArray_CheckStrides(int elsize, int nd, npy_intp numbytes, npy_intp
- offset, npy_intp *dims, npy_intp *newstrides)
-
-
-::
-
- PyArray_Descr *
- PyArray_DescrNewByteorder(PyArray_Descr *self, char newendian)
-
-
-returns a copy of the PyArray_Descr structure with the byteorder
-altered:
-no arguments: The byteorder is swapped (in all subfields as well)
-single argument: The byteorder is forced to the given state
-(in all subfields as well)
-
-Valid states: ('big', '>') or ('little' or '<')
-('native', or '=')
-
-If a descr structure with | is encountered it's own
-byte-order is not changed but any fields are:
-
-
-Deep bytorder change of a data-type descriptor
-Leaves reference count of self unchanged --- does not DECREF self ***
-
-::
-
- PyObject *
- PyArray_IterAllButAxis(PyObject *obj, int *inaxis)
-
-Get Iterator that iterates over all but one axis (don't use this with
-PyArray_ITER_GOTO1D). The axis will be over-written if negative
-with the axis having the smallest stride.
-
-::
-
- PyObject *
- PyArray_CheckFromAny(PyObject *op, PyArray_Descr *descr, int
- min_depth, int max_depth, int requires, PyObject
- *context)
-
-steals a reference to descr -- accepts NULL
-
-::
-
- PyObject *
- PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int
- flags)
-
-steals reference to newtype --- acc. NULL
-
-::
-
- PyObject *
- PyArray_FromInterface(PyObject *origin)
-
-
-::
-
- PyObject *
- PyArray_FromStructInterface(PyObject *input)
-
-
-::
-
- PyObject *
- PyArray_FromArrayAttr(PyObject *op, PyArray_Descr *typecode, PyObject
- *context)
-
-
-::
-
- NPY_SCALARKIND
- PyArray_ScalarKind(int typenum, PyArrayObject **arr)
-
-ScalarKind
-
-Returns the scalar kind of a type number, with an
-optional tweak based on the scalar value itself.
-If no scalar is provided, it returns INTPOS_SCALAR
-for both signed and unsigned integers, otherwise
-it checks the sign of any signed integer to choose
-INTNEG_SCALAR when appropriate.
-
-::
-
- int
- PyArray_CanCoerceScalar(int thistype, int neededtype, NPY_SCALARKIND
- scalar)
-
-
-Determines whether the data type 'thistype', with
-scalar kind 'scalar', can be coerced into 'neededtype'.
-
-::
-
- PyObject *
- PyArray_NewFlagsObject(PyObject *obj)
-
-
-Get New ArrayFlagsObject
-
-::
-
- npy_bool
- PyArray_CanCastScalar(PyTypeObject *from, PyTypeObject *to)
-
-See if array scalars can be cast.
-
-TODO: For NumPy 2.0, add a NPY_CASTING parameter.
-
-::
-
- int
- PyArray_CompareUCS4(npy_ucs4 *s1, npy_ucs4 *s2, size_t len)
-
-
-::
-
- int
- PyArray_RemoveSmallest(PyArrayMultiIterObject *multi)
-
-Adjusts previously broadcasted iterators so that the axis with
-the smallest sum of iterator strides is not iterated over.
-Returns dimension which is smallest in the range [0,multi->nd).
-A -1 is returned if multi->nd == 0.
-
-don't use with PyArray_ITER_GOTO1D because factors are not adjusted
-
-::
-
- int
- PyArray_ElementStrides(PyObject *obj)
-
-
-::
-
- void
- PyArray_Item_INCREF(char *data, PyArray_Descr *descr)
-
-
-::
-
- void
- PyArray_Item_XDECREF(char *data, PyArray_Descr *descr)
-
-
-::
-
- PyObject *
- PyArray_FieldNames(PyObject *fields)
-
-Return the tuple of ordered field names from a dictionary.
-
-::
-
- PyObject *
- PyArray_Transpose(PyArrayObject *ap, PyArray_Dims *permute)
-
-Return Transpose.
-
-::
-
- PyObject *
- PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int
- axis, PyArrayObject *out, NPY_CLIPMODE clipmode)
-
-Take
-
-::
-
- PyObject *
- PyArray_PutTo(PyArrayObject *self, PyObject*values0, PyObject
- *indices0, NPY_CLIPMODE clipmode)
-
-Put values into an array
-
-::
-
- PyObject *
- PyArray_PutMask(PyArrayObject *self, PyObject*values0, PyObject*mask0)
-
-Put values into an array according to a mask.
-
-::
-
- PyObject *
- PyArray_Repeat(PyArrayObject *aop, PyObject *op, int axis)
-
-Repeat the array.
-
-::
-
- PyObject *
- PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject
- *out, NPY_CLIPMODE clipmode)
-
-
-::
-
- int
- PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which)
-
-Sort an array in-place
-
-::
-
- PyObject *
- PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which)
-
-ArgSort an array
-
-::
-
- PyObject *
- PyArray_SearchSorted(PyArrayObject *op1, PyObject *op2, NPY_SEARCHSIDE
- side, PyObject *perm)
-
-
-Search the sorted array op1 for the location of the items in op2. The
-result is an array of indexes, one for each element in op2, such that if
-the item were to be inserted in op1 just before that index the array
-would still be in sorted order.
-
-Parameters
-----------
-op1 : PyArrayObject *
-Array to be searched, must be 1-D.
-op2 : PyObject *
-Array of items whose insertion indexes in op1 are wanted
-side : {NPY_SEARCHLEFT, NPY_SEARCHRIGHT}
-If NPY_SEARCHLEFT, return first valid insertion indexes
-If NPY_SEARCHRIGHT, return last valid insertion indexes
-perm : PyObject *
-Permutation array that sorts op1 (optional)
-
-Returns
--------
-ret : PyObject *
-New reference to npy_intp array containing indexes where items in op2
-could be validly inserted into op1. NULL on error.
-
-Notes
------
-Binary search is used to find the indexes.
-
-::
-
- PyObject *
- PyArray_ArgMax(PyArrayObject *op, int axis, PyArrayObject *out)
-
-ArgMax
-
-::
-
- PyObject *
- PyArray_ArgMin(PyArrayObject *op, int axis, PyArrayObject *out)
-
-ArgMin
-
-::
-
- PyObject *
- PyArray_Reshape(PyArrayObject *self, PyObject *shape)
-
-Reshape
-
-::
-
- PyObject *
- PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims, NPY_ORDER
- order)
-
-New shape for an array
-
-::
-
- PyObject *
- PyArray_Squeeze(PyArrayObject *self)
-
-
-return a new view of the array object with all of its unit-length
-dimensions squeezed out if needed, otherwise
-return the same array.
-
-::
-
- PyObject *
- PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject
- *pytype)
-
-View
-steals a reference to type -- accepts NULL
-
-::
-
- PyObject *
- PyArray_SwapAxes(PyArrayObject *ap, int a1, int a2)
-
-SwapAxes
-
-::
-
- PyObject *
- PyArray_Max(PyArrayObject *ap, int axis, PyArrayObject *out)
-
-Max
-
-::
-
- PyObject *
- PyArray_Min(PyArrayObject *ap, int axis, PyArrayObject *out)
-
-Min
-
-::
-
- PyObject *
- PyArray_Ptp(PyArrayObject *ap, int axis, PyArrayObject *out)
-
-Ptp
-
-::
-
- PyObject *
- PyArray_Mean(PyArrayObject *self, int axis, int rtype, PyArrayObject
- *out)
-
-Mean
-
-::
-
- PyObject *
- PyArray_Trace(PyArrayObject *self, int offset, int axis1, int
- axis2, int rtype, PyArrayObject *out)
-
-Trace
-
-::
-
- PyObject *
- PyArray_Diagonal(PyArrayObject *self, int offset, int axis1, int
- axis2)
-
-Diagonal
-
-In NumPy versions prior to 1.7, this function always returned a copy of
-the diagonal array. In 1.7, the code has been updated to compute a view
-onto 'self', but it still copies this array before returning, as well as
-setting the internal WARN_ON_WRITE flag. In a future version, it will
-simply return a view onto self.
-
-::
-
- PyObject *
- PyArray_Clip(PyArrayObject *self, PyObject *min, PyObject
- *max, PyArrayObject *out)
-
-Clip
-
-::
-
- PyObject *
- PyArray_Conjugate(PyArrayObject *self, PyArrayObject *out)
-
-Conjugate
-
-::
-
- PyObject *
- PyArray_Nonzero(PyArrayObject *self)
-
-Nonzero
-
-TODO: In NumPy 2.0, should make the iteration order a parameter.
-
-::
-
- PyObject *
- PyArray_Std(PyArrayObject *self, int axis, int rtype, PyArrayObject
- *out, int variance)
-
-Set variance to 1 to by-pass square-root calculation and return variance
-Std
-
-::
-
- PyObject *
- PyArray_Sum(PyArrayObject *self, int axis, int rtype, PyArrayObject
- *out)
-
-Sum
-
-::
-
- PyObject *
- PyArray_CumSum(PyArrayObject *self, int axis, int rtype, PyArrayObject
- *out)
-
-CumSum
-
-::
-
- PyObject *
- PyArray_Prod(PyArrayObject *self, int axis, int rtype, PyArrayObject
- *out)
-
-Prod
-
-::
-
- PyObject *
- PyArray_CumProd(PyArrayObject *self, int axis, int
- rtype, PyArrayObject *out)
-
-CumProd
-
-::
-
- PyObject *
- PyArray_All(PyArrayObject *self, int axis, PyArrayObject *out)
-
-All
-
-::
-
- PyObject *
- PyArray_Any(PyArrayObject *self, int axis, PyArrayObject *out)
-
-Any
-
-::
-
- PyObject *
- PyArray_Compress(PyArrayObject *self, PyObject *condition, int
- axis, PyArrayObject *out)
-
-Compress
-
-::
-
- PyObject *
- PyArray_Flatten(PyArrayObject *a, NPY_ORDER order)
-
-Flatten
-
-::
-
- PyObject *
- PyArray_Ravel(PyArrayObject *arr, NPY_ORDER order)
-
-Ravel
-Returns a contiguous array
-
-::
-
- npy_intp
- PyArray_MultiplyList(npy_intp *l1, int n)
-
-Multiply a List
-
-::
-
- int
- PyArray_MultiplyIntList(int *l1, int n)
-
-Multiply a List of ints
-
-::
-
- void *
- PyArray_GetPtr(PyArrayObject *obj, npy_intp*ind)
-
-Produce a pointer into array
-
-::
-
- int
- PyArray_CompareLists(npy_intp *l1, npy_intp *l2, int n)
-
-Compare Lists
-
-::
-
- int
- PyArray_AsCArray(PyObject **op, void *ptr, npy_intp *dims, int
- nd, PyArray_Descr*typedescr)
-
-Simulate a C-array
-steals a reference to typedescr -- can be NULL
-
-::
-
- int
- PyArray_As1D(PyObject **op, char **ptr, int *d1, int typecode)
-
-Convert to a 1D C-array
-
-::
-
- int
- PyArray_As2D(PyObject **op, char ***ptr, int *d1, int *d2, int
- typecode)
-
-Convert to a 2D C-array
-
-::
-
- int
- PyArray_Free(PyObject *op, void *ptr)
-
-Free pointers created if As2D is called
-
-::
-
- int
- PyArray_Converter(PyObject *object, PyObject **address)
-
-
-Useful to pass as converter function for O& processing in PyArgs_ParseTuple.
-
-This conversion function can be used with the "O&" argument for
-PyArg_ParseTuple. It will immediately return an object of array type
-or will convert to a NPY_ARRAY_CARRAY any other object.
-
-If you use PyArray_Converter, you must DECREF the array when finished
-as you get a new reference to it.
-
-::
-
- int
- PyArray_IntpFromSequence(PyObject *seq, npy_intp *vals, int maxvals)
-
-PyArray_IntpFromSequence
-Returns the number of dimensions or -1 if an error occurred.
-vals must be large enough to hold maxvals
-
-::
-
- PyObject *
- PyArray_Concatenate(PyObject *op, int axis)
-
-Concatenate
-
-Concatenate an arbitrary Python sequence into an array.
-op is a python object supporting the sequence interface.
-Its elements will be concatenated together to form a single
-multidimensional array. If axis is NPY_MAXDIMS or bigger, then
-each sequence object will be flattened before concatenation
-
-::
-
- PyObject *
- PyArray_InnerProduct(PyObject *op1, PyObject *op2)
-
-Numeric.innerproduct(a,v)
-
-::
-
- PyObject *
- PyArray_MatrixProduct(PyObject *op1, PyObject *op2)
-
-Numeric.matrixproduct(a,v)
-just like inner product but does the swapaxes stuff on the fly
-
-::
-
- PyObject *
- PyArray_CopyAndTranspose(PyObject *op)
-
-Copy and Transpose
-
-Could deprecate this function, as there isn't a speed benefit over
-calling Transpose and then Copy.
-
-::
-
- PyObject *
- PyArray_Correlate(PyObject *op1, PyObject *op2, int mode)
-
-Numeric.correlate(a1,a2,mode)
-
-::
-
- int
- PyArray_TypestrConvert(int itemsize, int gentype)
-
-Typestr converter
-
-::
-
- int
- PyArray_DescrConverter(PyObject *obj, PyArray_Descr **at)
-
-Get typenum from an object -- None goes to NPY_DEFAULT_TYPE
-This function takes a Python object representing a type and converts it
-to a the correct PyArray_Descr * structure to describe the type.
-
-Many objects can be used to represent a data-type which in NumPy is
-quite a flexible concept.
-
-This is the central code that converts Python objects to
-Type-descriptor objects that are used throughout numpy.
-
-Returns a new reference in *at, but the returned should not be
-modified as it may be one of the canonical immutable objects or
-a reference to the input obj.
-
-::
-
- int
- PyArray_DescrConverter2(PyObject *obj, PyArray_Descr **at)
-
-Get typenum from an object -- None goes to NULL
-
-::
-
- int
- PyArray_IntpConverter(PyObject *obj, PyArray_Dims *seq)
-
-Get intp chunk from sequence
-
-This function takes a Python sequence object and allocates and
-fills in an intp array with the converted values.
-
-Remember to free the pointer seq.ptr when done using
-PyDimMem_FREE(seq.ptr)**
-
-::
-
- int
- PyArray_BufferConverter(PyObject *obj, PyArray_Chunk *buf)
-
-Get buffer chunk from object
-
-this function takes a Python object which exposes the (single-segment)
-buffer interface and returns a pointer to the data segment
-
-You should increment the reference count by one of buf->base
-if you will hang on to a reference
-
-You only get a borrowed reference to the object. Do not free the
-memory...
-
-::
-
- int
- PyArray_AxisConverter(PyObject *obj, int *axis)
-
-Get axis from an object (possibly None) -- a converter function,
-
-See also PyArray_ConvertMultiAxis, which also handles a tuple of axes.
-
-::
-
- int
- PyArray_BoolConverter(PyObject *object, npy_bool *val)
-
-Convert an object to true / false
-
-::
-
- int
- PyArray_ByteorderConverter(PyObject *obj, char *endian)
-
-Convert object to endian
-
-::
-
- int
- PyArray_OrderConverter(PyObject *object, NPY_ORDER *val)
-
-Convert an object to FORTRAN / C / ANY / KEEP
-
-::
-
- unsigned char
- PyArray_EquivTypes(PyArray_Descr *type1, PyArray_Descr *type2)
-
-
-This function returns true if the two typecodes are
-equivalent (same basic kind and same itemsize).
-
-::
-
- PyObject *
- PyArray_Zeros(int nd, npy_intp *dims, PyArray_Descr *type, int
- is_f_order)
-
-Zeros
-
-steal a reference
-accepts NULL type
-
-::
-
- PyObject *
- PyArray_Empty(int nd, npy_intp *dims, PyArray_Descr *type, int
- is_f_order)
-
-Empty
-
-accepts NULL type
-steals referenct to type
-
-::
-
- PyObject *
- PyArray_Where(PyObject *condition, PyObject *x, PyObject *y)
-
-Where
-
-::
-
- PyObject *
- PyArray_Arange(double start, double stop, double step, int type_num)
-
-Arange,
-
-::
-
- PyObject *
- PyArray_ArangeObj(PyObject *start, PyObject *stop, PyObject
- *step, PyArray_Descr *dtype)
-
-
-ArangeObj,
-
-this doesn't change the references
-
-::
-
- int
- PyArray_SortkindConverter(PyObject *obj, NPY_SORTKIND *sortkind)
-
-Convert object to sort kind
-
-::
-
- PyObject *
- PyArray_LexSort(PyObject *sort_keys, int axis)
-
-LexSort an array providing indices that will sort a collection of arrays
-lexicographically. The first key is sorted on first, followed by the second key
--- requires that arg"merge"sort is available for each sort_key
-
-Returns an index array that shows the indexes for the lexicographic sort along
-the given axis.
-
-::
-
- PyObject *
- PyArray_Round(PyArrayObject *a, int decimals, PyArrayObject *out)
-
-Round
-
-::
-
- unsigned char
- PyArray_EquivTypenums(int typenum1, int typenum2)
-
-
-::
-
- int
- PyArray_RegisterDataType(PyArray_Descr *descr)
-
-Register Data type
-Does not change the reference count of descr
-
-::
-
- int
- PyArray_RegisterCastFunc(PyArray_Descr *descr, int
- totype, PyArray_VectorUnaryFunc *castfunc)
-
-Register Casting Function
-Replaces any function currently stored.
-
-::
-
- int
- PyArray_RegisterCanCast(PyArray_Descr *descr, int
- totype, NPY_SCALARKIND scalar)
-
-Register a type number indicating that a descriptor can be cast
-to it safely
-
-::
-
- void
- PyArray_InitArrFuncs(PyArray_ArrFuncs *f)
-
-Initialize arrfuncs to NULL
-
-::
-
- PyObject *
- PyArray_IntTupleFromIntp(int len, npy_intp *vals)
-
-PyArray_IntTupleFromIntp
-
-::
-
- int
- PyArray_TypeNumFromName(char *str)
-
-
-::
-
- int
- PyArray_ClipmodeConverter(PyObject *object, NPY_CLIPMODE *val)
-
-Convert an object to NPY_RAISE / NPY_CLIP / NPY_WRAP
-
-::
-
- int
- PyArray_OutputConverter(PyObject *object, PyArrayObject **address)
-
-Useful to pass as converter function for O& processing in
-PyArgs_ParseTuple for output arrays
-
-::
-
- PyObject *
- PyArray_BroadcastToShape(PyObject *obj, npy_intp *dims, int nd)
-
-Get Iterator broadcast to a particular shape
-
-::
-
- void
- _PyArray_SigintHandler(int signum)
-
-
-::
-
- void*
- _PyArray_GetSigintBuf(void )
-
-
-::
-
- int
- PyArray_DescrAlignConverter(PyObject *obj, PyArray_Descr **at)
-
-
-Get type-descriptor from an object forcing alignment if possible
-None goes to DEFAULT type.
-
-any object with the .fields attribute and/or .itemsize attribute (if the
-.fields attribute does not give the total size -- i.e. a partial record
-naming). If itemsize is given it must be >= size computed from fields
-
-The .fields attribute must return a convertible dictionary if present.
-Result inherits from NPY_VOID.
-
-::
-
- int
- PyArray_DescrAlignConverter2(PyObject *obj, PyArray_Descr **at)
-
-
-Get type-descriptor from an object forcing alignment if possible
-None goes to NULL.
-
-::
-
- int
- PyArray_SearchsideConverter(PyObject *obj, void *addr)
-
-Convert object to searchsorted side
-
-::
-
- PyObject *
- PyArray_CheckAxis(PyArrayObject *arr, int *axis, int flags)
-
-PyArray_CheckAxis
-
-check that axis is valid
-convert 0-d arrays to 1-d arrays
-
-::
-
- npy_intp
- PyArray_OverflowMultiplyList(npy_intp *l1, int n)
-
-Multiply a List of Non-negative numbers with over-flow detection.
-
-::
-
- int
- PyArray_CompareString(char *s1, char *s2, size_t len)
-
-
-::
-
- PyObject *
- PyArray_MultiIterFromObjects(PyObject **mps, int n, int nadd, ... )
-
-Get MultiIterator from array of Python objects and any additional
-
-PyObject **mps -- array of PyObjects
-int n - number of PyObjects in the array
-int nadd - number of additional arrays to include in the iterator.
-
-Returns a multi-iterator object.
-
-::
-
- int
- PyArray_GetEndianness(void )
-
-
-::
-
- unsigned int
- PyArray_GetNDArrayCFeatureVersion(void )
-
-Returns the built-in (at compilation time) C API version
-
-::
-
- PyObject *
- PyArray_Correlate2(PyObject *op1, PyObject *op2, int mode)
-
-correlate(a1,a2,mode)
-
-This function computes the usual correlation (correlate(a1, a2) !=
-correlate(a2, a1), and conjugate the second argument for complex inputs
-
-::
-
- PyObject*
- PyArray_NeighborhoodIterNew(PyArrayIterObject *x, npy_intp
- *bounds, int mode, PyArrayObject*fill)
-
-A Neighborhood Iterator object.
-
-::
-
- void
- PyArray_SetDatetimeParseFunction(PyObject *op)
-
-This function is scheduled to be removed
-
-TO BE REMOVED - NOT USED INTERNALLY.
-
-::
-
- void
- PyArray_DatetimeToDatetimeStruct(npy_datetime val, NPY_DATETIMEUNIT
- fr, npy_datetimestruct *result)
-
-Fill the datetime struct from the value and resolution unit.
-
-TO BE REMOVED - NOT USED INTERNALLY.
-
-::
-
- void
- PyArray_TimedeltaToTimedeltaStruct(npy_timedelta val, NPY_DATETIMEUNIT
- fr, npy_timedeltastruct *result)
-
-Fill the timedelta struct from the timedelta value and resolution unit.
-
-TO BE REMOVED - NOT USED INTERNALLY.
-
-::
-
- npy_datetime
- PyArray_DatetimeStructToDatetime(NPY_DATETIMEUNIT
- fr, npy_datetimestruct *d)
-
-Create a datetime value from a filled datetime struct and resolution unit.
-
-TO BE REMOVED - NOT USED INTERNALLY.
-
-::
-
- npy_datetime
- PyArray_TimedeltaStructToTimedelta(NPY_DATETIMEUNIT
- fr, npy_timedeltastruct *d)
-
-Create a timdelta value from a filled timedelta struct and resolution unit.
-
-TO BE REMOVED - NOT USED INTERNALLY.
-
-::
-
- NpyIter *
- NpyIter_New(PyArrayObject *op, npy_uint32 flags, NPY_ORDER
- order, NPY_CASTING casting, PyArray_Descr*dtype)
-
-Allocate a new iterator for one array object.
-
-::
-
- NpyIter *
- NpyIter_MultiNew(int nop, PyArrayObject **op_in, npy_uint32
- flags, NPY_ORDER order, NPY_CASTING
- casting, npy_uint32 *op_flags, PyArray_Descr
- **op_request_dtypes)
-
-Allocate a new iterator for more than one array object, using
-standard NumPy broadcasting rules and the default buffer size.
-
-::
-
- NpyIter *
- NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32
- flags, NPY_ORDER order, NPY_CASTING
- casting, npy_uint32 *op_flags, PyArray_Descr
- **op_request_dtypes, int oa_ndim, int
- **op_axes, npy_intp *itershape, npy_intp
- buffersize)
-
-Allocate a new iterator for multiple array objects, and advanced
-options for controlling the broadcasting, shape, and buffer size.
-
-::
-
- NpyIter *
- NpyIter_Copy(NpyIter *iter)
-
-Makes a copy of the iterator
-
-::
-
- int
- NpyIter_Deallocate(NpyIter *iter)
-
-Deallocate an iterator
-
-::
-
- npy_bool
- NpyIter_HasDelayedBufAlloc(NpyIter *iter)
-
-Whether the buffer allocation is being delayed
-
-::
-
- npy_bool
- NpyIter_HasExternalLoop(NpyIter *iter)
-
-Whether the iterator handles the inner loop
-
-::
-
- int
- NpyIter_EnableExternalLoop(NpyIter *iter)
-
-Removes the inner loop handling (so HasExternalLoop returns true)
-
-::
-
- npy_intp *
- NpyIter_GetInnerStrideArray(NpyIter *iter)
-
-Get the array of strides for the inner loop (when HasExternalLoop is true)
-
-This function may be safely called without holding the Python GIL.
-
-::
-
- npy_intp *
- NpyIter_GetInnerLoopSizePtr(NpyIter *iter)
-
-Get a pointer to the size of the inner loop (when HasExternalLoop is true)
-
-This function may be safely called without holding the Python GIL.
-
-::
-
- int
- NpyIter_Reset(NpyIter *iter, char **errmsg)
-
-Resets the iterator to its initial state
-
-If errmsg is non-NULL, it should point to a variable which will
-receive the error message, and no Python exception will be set.
-This is so that the function can be called from code not holding
-the GIL.
-
-::
-
- int
- NpyIter_ResetBasePointers(NpyIter *iter, char **baseptrs, char
- **errmsg)
-
-Resets the iterator to its initial state, with new base data pointers.
-This function requires great caution.
-
-If errmsg is non-NULL, it should point to a variable which will
-receive the error message, and no Python exception will be set.
-This is so that the function can be called from code not holding
-the GIL.
-
-::
-
- int
- NpyIter_ResetToIterIndexRange(NpyIter *iter, npy_intp istart, npy_intp
- iend, char **errmsg)
-
-Resets the iterator to a new iterator index range
-
-If errmsg is non-NULL, it should point to a variable which will
-receive the error message, and no Python exception will be set.
-This is so that the function can be called from code not holding
-the GIL.
-
-::
-
- int
- NpyIter_GetNDim(NpyIter *iter)
-
-Gets the number of dimensions being iterated
-
-::
-
- int
- NpyIter_GetNOp(NpyIter *iter)
-
-Gets the number of operands being iterated
-
-::
-
- NpyIter_IterNextFunc *
- NpyIter_GetIterNext(NpyIter *iter, char **errmsg)
-
-Compute the specialized iteration function for an iterator
-
-If errmsg is non-NULL, it should point to a variable which will
-receive the error message, and no Python exception will be set.
-This is so that the function can be called from code not holding
-the GIL.
-
-::
-
- npy_intp
- NpyIter_GetIterSize(NpyIter *iter)
-
-Gets the number of elements being iterated
-
-::
-
- void
- NpyIter_GetIterIndexRange(NpyIter *iter, npy_intp *istart, npy_intp
- *iend)
-
-Gets the range of iteration indices being iterated
-
-::
-
- npy_intp
- NpyIter_GetIterIndex(NpyIter *iter)
-
-Gets the current iteration index
-
-::
-
- int
- NpyIter_GotoIterIndex(NpyIter *iter, npy_intp iterindex)
-
-Sets the iterator position to the specified iterindex,
-which matches the iteration order of the iterator.
-
-Returns NPY_SUCCEED on success, NPY_FAIL on failure.
-
-::
-
- npy_bool
- NpyIter_HasMultiIndex(NpyIter *iter)
-
-Whether the iterator is tracking a multi-index
-
-::
-
- int
- NpyIter_GetShape(NpyIter *iter, npy_intp *outshape)
-
-Gets the broadcast shape if a multi-index is being tracked by the iterator,
-otherwise gets the shape of the iteration as Fortran-order
-(fastest-changing index first).
-
-The reason Fortran-order is returned when a multi-index
-is not enabled is that this is providing a direct view into how
-the iterator traverses the n-dimensional space. The iterator organizes
-its memory from fastest index to slowest index, and when
-a multi-index is enabled, it uses a permutation to recover the original
-order.
-
-Returns NPY_SUCCEED or NPY_FAIL.
-
-::
-
- NpyIter_GetMultiIndexFunc *
- NpyIter_GetGetMultiIndex(NpyIter *iter, char **errmsg)
-
-Compute a specialized get_multi_index function for the iterator
-
-If errmsg is non-NULL, it should point to a variable which will
-receive the error message, and no Python exception will be set.
-This is so that the function can be called from code not holding
-the GIL.
-
-::
-
- int
- NpyIter_GotoMultiIndex(NpyIter *iter, npy_intp *multi_index)
-
-Sets the iterator to the specified multi-index, which must have the
-correct number of entries for 'ndim'. It is only valid
-when NPY_ITER_MULTI_INDEX was passed to the constructor. This operation
-fails if the multi-index is out of bounds.
-
-Returns NPY_SUCCEED on success, NPY_FAIL on failure.
-
-::
-
- int
- NpyIter_RemoveMultiIndex(NpyIter *iter)
-
-Removes multi-index support from an iterator.
-
-Returns NPY_SUCCEED or NPY_FAIL.
-
-::
-
- npy_bool
- NpyIter_HasIndex(NpyIter *iter)
-
-Whether the iterator is tracking an index
-
-::
-
- npy_bool
- NpyIter_IsBuffered(NpyIter *iter)
-
-Whether the iterator is buffered
-
-::
-
- npy_bool
- NpyIter_IsGrowInner(NpyIter *iter)
-
-Whether the inner loop can grow if buffering is unneeded
-
-::
-
- npy_intp
- NpyIter_GetBufferSize(NpyIter *iter)
-
-Gets the size of the buffer, or 0 if buffering is not enabled
-
-::
-
- npy_intp *
- NpyIter_GetIndexPtr(NpyIter *iter)
-
-Get a pointer to the index, if it is being tracked
-
-::
-
- int
- NpyIter_GotoIndex(NpyIter *iter, npy_intp flat_index)
-
-If the iterator is tracking an index, sets the iterator
-to the specified index.
-
-Returns NPY_SUCCEED on success, NPY_FAIL on failure.
-
-::
-
- char **
- NpyIter_GetDataPtrArray(NpyIter *iter)
-
-Get the array of data pointers (1 per object being iterated)
-
-This function may be safely called without holding the Python GIL.
-
-::
-
- PyArray_Descr **
- NpyIter_GetDescrArray(NpyIter *iter)
-
-Get the array of data type pointers (1 per object being iterated)
-
-::
-
- PyArrayObject **
- NpyIter_GetOperandArray(NpyIter *iter)
-
-Get the array of objects being iterated
-
-::
-
- PyArrayObject *
- NpyIter_GetIterView(NpyIter *iter, npy_intp i)
-
-Returns a view to the i-th object with the iterator's internal axes
-
-::
-
- void
- NpyIter_GetReadFlags(NpyIter *iter, char *outreadflags)
-
-Gets an array of read flags (1 per object being iterated)
-
-::
-
- void
- NpyIter_GetWriteFlags(NpyIter *iter, char *outwriteflags)
-
-Gets an array of write flags (1 per object being iterated)
-
-::
-
- void
- NpyIter_DebugPrint(NpyIter *iter)
-
-For debugging
-
-::
-
- npy_bool
- NpyIter_IterationNeedsAPI(NpyIter *iter)
-
-Whether the iteration loop, and in particular the iternext()
-function, needs API access. If this is true, the GIL must
-be retained while iterating.
-
-::
-
- void
- NpyIter_GetInnerFixedStrideArray(NpyIter *iter, npy_intp *out_strides)
-
-Get an array of strides which are fixed. Any strides which may
-change during iteration receive the value NPY_MAX_INTP. Once
-the iterator is ready to iterate, call this to get the strides
-which will always be fixed in the inner loop, then choose optimized
-inner loop functions which take advantage of those fixed strides.
-
-This function may be safely called without holding the Python GIL.
-
-::
-
- int
- NpyIter_RemoveAxis(NpyIter *iter, int axis)
-
-Removes an axis from iteration. This requires that NPY_ITER_MULTI_INDEX
-was set for iterator creation, and does not work if buffering is
-enabled. This function also resets the iterator to its initial state.
-
-Returns NPY_SUCCEED or NPY_FAIL.
-
-::
-
- npy_intp *
- NpyIter_GetAxisStrideArray(NpyIter *iter, int axis)
-
-Gets the array of strides for the specified axis.
-If the iterator is tracking a multi-index, gets the strides
-for the axis specified, otherwise gets the strides for
-the iteration axis as Fortran order (fastest-changing axis first).
-
-Returns NULL if an error occurs.
-
-::
-
- npy_bool
- NpyIter_RequiresBuffering(NpyIter *iter)
-
-Whether the iteration could be done with no buffering.
-
-::
-
- char **
- NpyIter_GetInitialDataPtrArray(NpyIter *iter)
-
-Get the array of data pointers (1 per object being iterated),
-directly into the arrays (never pointing to a buffer), for starting
-unbuffered iteration. This always returns the addresses for the
-iterator position as reset to iterator index 0.
-
-These pointers are different from the pointers accepted by
-NpyIter_ResetBasePointers, because the direction along some
-axes may have been reversed, requiring base offsets.
-
-This function may be safely called without holding the Python GIL.
-
-::
-
- int
- NpyIter_CreateCompatibleStrides(NpyIter *iter, npy_intp
- itemsize, npy_intp *outstrides)
-
-Builds a set of strides which are the same as the strides of an
-output array created using the NPY_ITER_ALLOCATE flag, where NULL
-was passed for op_axes. This is for data packed contiguously,
-but not necessarily in C or Fortran order. This should be used
-together with NpyIter_GetShape and NpyIter_GetNDim.
-
-A use case for this function is to match the shape and layout of
-the iterator and tack on one or more dimensions. For example,
-in order to generate a vector per input value for a numerical gradient,
-you pass in ndim*itemsize for itemsize, then add another dimension to
-the end with size ndim and stride itemsize. To do the Hessian matrix,
-you do the same thing but add two dimensions, or take advantage of
-the symmetry and pack it into 1 dimension with a particular encoding.
-
-This function may only be called if the iterator is tracking a multi-index
-and if NPY_ITER_DONT_NEGATE_STRIDES was used to prevent an axis from
-being iterated in reverse order.
-
-If an array is created with this method, simply adding 'itemsize'
-for each iteration will traverse the new array matching the
-iterator.
-
-Returns NPY_SUCCEED or NPY_FAIL.
-
-::
-
- int
- PyArray_CastingConverter(PyObject *obj, NPY_CASTING *casting)
-
-Convert any Python object, *obj*, to an NPY_CASTING enum.
-
-::
-
- npy_intp
- PyArray_CountNonzero(PyArrayObject *self)
-
-Counts the number of non-zero elements in the array.
-
-Returns -1 on error.
-
-::
-
- PyArray_Descr *
- PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
-
-Produces the smallest size and lowest kind type to which both
-input types can be cast.
-
-::
-
- PyArray_Descr *
- PyArray_MinScalarType(PyArrayObject *arr)
-
-If arr is a scalar (has 0 dimensions) with a built-in number data type,
-finds the smallest type size/kind which can still represent its data.
-Otherwise, returns the array's data type.
-
-
-::
-
- PyArray_Descr *
- PyArray_ResultType(npy_intp narrs, PyArrayObject **arr, npy_intp
- ndtypes, PyArray_Descr **dtypes)
-
-Produces the result type of a bunch of inputs, using the UFunc
-type promotion rules. Use this function when you have a set of
-input arrays, and need to determine an output array dtype.
-
-If all the inputs are scalars (have 0 dimensions) or the maximum "kind"
-of the scalars is greater than the maximum "kind" of the arrays, does
-a regular type promotion.
-
-Otherwise, does a type promotion on the MinScalarType
-of all the inputs. Data types passed directly are treated as array
-types.
-
-
-::
-
- npy_bool
- PyArray_CanCastArrayTo(PyArrayObject *arr, PyArray_Descr
- *to, NPY_CASTING casting)
-
-Returns 1 if the array object may be cast to the given data type using
-the casting rule, 0 otherwise. This differs from PyArray_CanCastTo in
-that it handles scalar arrays (0 dimensions) specially, by checking
-their value.
-
-::
-
- npy_bool
- PyArray_CanCastTypeTo(PyArray_Descr *from, PyArray_Descr
- *to, NPY_CASTING casting)
-
-Returns true if data of type 'from' may be cast to data of type
-'to' according to the rule 'casting'.
-
-::
-
- PyArrayObject *
- PyArray_EinsteinSum(char *subscripts, npy_intp nop, PyArrayObject
- **op_in, PyArray_Descr *dtype, NPY_ORDER
- order, NPY_CASTING casting, PyArrayObject *out)
-
-This function provides summation of array elements according to
-the Einstein summation convention. For example:
-- trace(a) -> einsum("ii", a)
-- transpose(a) -> einsum("ji", a)
-- multiply(a,b) -> einsum(",", a, b)
-- inner(a,b) -> einsum("i,i", a, b)
-- outer(a,b) -> einsum("i,j", a, b)
-- matvec(a,b) -> einsum("ij,j", a, b)
-- matmat(a,b) -> einsum("ij,jk", a, b)
-
-subscripts: The string of subscripts for einstein summation.
-nop: The number of operands
-op_in: The array of operands
-dtype: Either NULL, or the data type to force the calculation as.
-order: The order for the calculation/the output axes.
-casting: What kind of casts should be permitted.
-out: Either NULL, or an array into which the output should be placed.
-
-By default, the labels get placed in alphabetical order
-at the end of the output. So, if c = einsum("i,j", a, b)
-then c[i,j] == a[i]*b[j], but if c = einsum("j,i", a, b)
-then c[i,j] = a[j]*b[i].
-
-Alternatively, you can control the output order or prevent
-an axis from being summed/force an axis to be summed by providing
-indices for the output. This allows us to turn 'trace' into
-'diag', for example.
-- diag(a) -> einsum("ii->i", a)
-- sum(a, axis=0) -> einsum("i...->", a)
-
-Subscripts at the beginning and end may be specified by
-putting an ellipsis "..." in the middle. For example,
-the function einsum("i...i", a) takes the diagonal of
-the first and last dimensions of the operand, and
-einsum("ij...,jk...->ik...") takes the matrix product using
-the first two indices of each operand instead of the last two.
-
-When there is only one operand, no axes being summed, and
-no output parameter, this function returns a view
-into the operand instead of making a copy.
-
-::
-
- PyObject *
- PyArray_NewLikeArray(PyArrayObject *prototype, NPY_ORDER
- order, PyArray_Descr *dtype, int subok)
-
-Creates a new array with the same shape as the provided one,
-with possible memory layout order and data type changes.
-
-prototype - The array the new one should be like.
-order - NPY_CORDER - C-contiguous result.
-NPY_FORTRANORDER - Fortran-contiguous result.
-NPY_ANYORDER - Fortran if prototype is Fortran, C otherwise.
-NPY_KEEPORDER - Keeps the axis ordering of prototype.
-dtype - If not NULL, overrides the data type of the result.
-subok - If 1, use the prototype's array subtype, otherwise
-always create a base-class array.
-
-NOTE: If dtype is not NULL, steals the dtype reference.
-
-::
-
- int
- PyArray_GetArrayParamsFromObject(PyObject *op, PyArray_Descr
- *requested_dtype, npy_bool
- writeable, PyArray_Descr
- **out_dtype, int *out_ndim, npy_intp
- *out_dims, PyArrayObject
- **out_arr, PyObject *context)
-
-Retrieves the array parameters for viewing/converting an arbitrary
-PyObject* to a NumPy array. This allows the "innate type and shape"
-of Python list-of-lists to be discovered without
-actually converting to an array.
-
-In some cases, such as structured arrays and the __array__ interface,
-a data type needs to be used to make sense of the object. When
-this is needed, provide a Descr for 'requested_dtype', otherwise
-provide NULL. This reference is not stolen. Also, if the requested
-dtype doesn't modify the interpretation of the input, out_dtype will
-still get the "innate" dtype of the object, not the dtype passed
-in 'requested_dtype'.
-
-If writing to the value in 'op' is desired, set the boolean
-'writeable' to 1. This raises an error when 'op' is a scalar, list
-of lists, or other non-writeable 'op'.
-
-Result: When success (0 return value) is returned, either out_arr
-is filled with a non-NULL PyArrayObject and
-the rest of the parameters are untouched, or out_arr is
-filled with NULL, and the rest of the parameters are
-filled.
-
-Typical usage:
-
-PyArrayObject *arr = NULL;
-PyArray_Descr *dtype = NULL;
-int ndim = 0;
-npy_intp dims[NPY_MAXDIMS];
-
-if (PyArray_GetArrayParamsFromObject(op, NULL, 1, &dtype,
-&ndim, &dims, &arr, NULL) < 0) {
-return NULL;
-}
-if (arr == NULL) {
-... validate/change dtype, validate flags, ndim, etc ...
-// Could make custom strides here too
-arr = PyArray_NewFromDescr(&PyArray_Type, dtype, ndim,
-dims, NULL,
-is_f_order ? NPY_ARRAY_F_CONTIGUOUS : 0,
-NULL);
-if (arr == NULL) {
-return NULL;
-}
-if (PyArray_CopyObject(arr, op) < 0) {
-Py_DECREF(arr);
-return NULL;
-}
-}
-else {
-... in this case the other parameters weren't filled, just
-validate and possibly copy arr itself ...
-}
-... use arr ...
-
-::
-
- int
- PyArray_ConvertClipmodeSequence(PyObject *object, NPY_CLIPMODE
- *modes, int n)
-
-Convert an object to an array of n NPY_CLIPMODE values.
-This is intended to be used in functions where a different mode
-could be applied to each axis, like in ravel_multi_index.
-
-::
-
- PyObject *
- PyArray_MatrixProduct2(PyObject *op1, PyObject
- *op2, PyArrayObject*out)
-
-Numeric.matrixproduct(a,v,out)
-just like inner product but does the swapaxes stuff on the fly
-
-::
-
- npy_bool
- NpyIter_IsFirstVisit(NpyIter *iter, int iop)
-
-Checks to see whether this is the first time the elements
-of the specified reduction operand which the iterator points at are
-being seen for the first time. The function returns
-a reasonable answer for reduction operands and when buffering is
-disabled. The answer may be incorrect for buffered non-reduction
-operands.
-
-This function is intended to be used in EXTERNAL_LOOP mode only,
-and will produce some wrong answers when that mode is not enabled.
-
-If this function returns true, the caller should also
-check the inner loop stride of the operand, because if
-that stride is 0, then only the first element of the innermost
-external loop is being visited for the first time.
-
-WARNING: For performance reasons, 'iop' is not bounds-checked,
-it is not confirmed that 'iop' is actually a reduction
-operand, and it is not confirmed that EXTERNAL_LOOP
-mode is enabled. These checks are the responsibility of
-the caller, and should be done outside of any inner loops.
-
-::
-
- int
- PyArray_SetBaseObject(PyArrayObject *arr, PyObject *obj)
-
-Sets the 'base' attribute of the array. This steals a reference
-to 'obj'.
-
-Returns 0 on success, -1 on failure.
-
-::
-
- void
- PyArray_CreateSortedStridePerm(int ndim, npy_intp
- *strides, npy_stride_sort_item
- *out_strideperm)
-
-
-This function populates the first ndim elements
-of strideperm with sorted descending by their absolute values.
-For example, the stride array (4, -2, 12) becomes
-[(2, 12), (0, 4), (1, -2)].
-
-::
-
- void
- PyArray_RemoveAxesInPlace(PyArrayObject *arr, npy_bool *flags)
-
-
-Removes the axes flagged as True from the array,
-modifying it in place. If an axis flagged for removal
-has a shape entry bigger than one, this effectively selects
-index zero for that axis.
-
-WARNING: If an axis flagged for removal has a shape equal to zero,
-the array will point to invalid memory. The caller must
-validate this!
-
-For example, this can be used to remove the reduction axes
-from a reduction result once its computation is complete.
-
-::
-
- void
- PyArray_DebugPrint(PyArrayObject *obj)
-
-Prints the raw data of the ndarray in a form useful for debugging
-low-level C issues.
-
-::
-
- int
- PyArray_FailUnlessWriteable(PyArrayObject *obj, const char *name)
-
-
-This function does nothing if obj is writeable, and raises an exception
-(and returns -1) if obj is not writeable. It may also do other
-house-keeping, such as issuing warnings on arrays which are transitioning
-to become views. Always call this function at some point before writing to
-an array.
-
-'name' is a name for the array, used to give better error
-messages. Something like "assignment destination", "output array", or even
-just "array".
-
-::
-
- int
- PyArray_SetUpdateIfCopyBase(PyArrayObject *arr, PyArrayObject *base)
-
-
-Precondition: 'arr' is a copy of 'base' (though possibly with different
-strides, ordering, etc.). This function sets the UPDATEIFCOPY flag and the
-->base pointer on 'arr', so that when 'arr' is destructed, it will copy any
-changes back to 'base'.
-
-Steals a reference to 'base'.
-
-Returns 0 on success, -1 on failure.
-
-::
-
- void *
- PyDataMem_NEW(size_t size)
-
-Allocates memory for array data.
-
-::
-
- void
- PyDataMem_FREE(void *ptr)
-
-Free memory for array data.
-
-::
-
- void *
- PyDataMem_RENEW(void *ptr, size_t size)
-
-Reallocate/resize memory for array data.
-
-::
-
- PyDataMem_EventHookFunc *
- PyDataMem_SetEventHook(PyDataMem_EventHookFunc *newhook, void
- *user_data, void **old_data)
-
-Sets the allocation event hook for numpy array data.
-Takes a PyDataMem_EventHookFunc *, which has the signature:
-void hook(void *old, void *new, size_t size, void *user_data).
-Also takes a void *user_data, and void **old_data.
-
-Returns a pointer to the previous hook or NULL. If old_data is
-non-NULL, the previous user_data pointer will be copied to it.
-
-If not NULL, hook will be called at the end of each PyDataMem_NEW/FREE/RENEW:
-result = PyDataMem_NEW(size) -> (*hook)(NULL, result, size, user_data)
-PyDataMem_FREE(ptr) -> (*hook)(ptr, NULL, 0, user_data)
-result = PyDataMem_RENEW(ptr, size) -> (*hook)(ptr, result, size, user_data)
-
-When the hook is called, the GIL will be held by the calling
-thread. The hook should be written to be reentrant, if it performs
-operations that might cause new allocation events (such as the
-creation/descruction numpy objects, or creating/destroying Python
-objects which might cause a gc)
-
diff --git a/include/numpy/ndarrayobject.h b/include/numpy/ndarrayobject.h
deleted file mode 100644
index f00dd7744..000000000
--- a/include/numpy/ndarrayobject.h
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * DON'T INCLUDE THIS DIRECTLY.
- */
-
-#ifndef NPY_NDARRAYOBJECT_H
-#define NPY_NDARRAYOBJECT_H
-#ifdef __cplusplus
-#define CONFUSE_EMACS {
-#define CONFUSE_EMACS2 }
-extern "C" CONFUSE_EMACS
-#undef CONFUSE_EMACS
-#undef CONFUSE_EMACS2
-/* ... otherwise a semi-smart identer (like emacs) tries to indent
- everything when you're typing */
-#endif
-
-#include "ndarraytypes.h"
-
-/* Includes the "function" C-API -- these are all stored in a
- list of pointers --- one for each file
- The two lists are concatenated into one in multiarray.
-
- They are available as import_array()
-*/
-
-#include "__multiarray_api.h"
-
-
-/* C-API that requries previous API to be defined */
-
-#define PyArray_DescrCheck(op) (((PyObject*)(op))->ob_type==&PyArrayDescr_Type)
-
-#define PyArray_Check(op) PyObject_TypeCheck(op, &PyArray_Type)
-#define PyArray_CheckExact(op) (((PyObject*)(op))->ob_type == &PyArray_Type)
-
-#define PyArray_HasArrayInterfaceType(op, type, context, out) \
- ((((out)=PyArray_FromStructInterface(op)) != Py_NotImplemented) || \
- (((out)=PyArray_FromInterface(op)) != Py_NotImplemented) || \
- (((out)=PyArray_FromArrayAttr(op, type, context)) != \
- Py_NotImplemented))
-
-#define PyArray_HasArrayInterface(op, out) \
- PyArray_HasArrayInterfaceType(op, NULL, NULL, out)
-
-#define PyArray_IsZeroDim(op) (PyArray_Check(op) && \
- (PyArray_NDIM((PyArrayObject *)op) == 0))
-
-#define PyArray_IsScalar(obj, cls) \
- (PyObject_TypeCheck(obj, &Py##cls##ArrType_Type))
-
-#define PyArray_CheckScalar(m) (PyArray_IsScalar(m, Generic) || \
- PyArray_IsZeroDim(m))
-
-#define PyArray_IsPythonNumber(obj) \
- (PyInt_Check(obj) || PyFloat_Check(obj) || PyComplex_Check(obj) || \
- PyLong_Check(obj) || PyBool_Check(obj))
-
-#define PyArray_IsPythonScalar(obj) \
- (PyArray_IsPythonNumber(obj) || PyString_Check(obj) || \
- PyUnicode_Check(obj))
-
-#define PyArray_IsAnyScalar(obj) \
- (PyArray_IsScalar(obj, Generic) || PyArray_IsPythonScalar(obj))
-
-#define PyArray_CheckAnyScalar(obj) (PyArray_IsPythonScalar(obj) || \
- PyArray_CheckScalar(obj))
-
-#define PyArray_IsIntegerScalar(obj) (PyInt_Check(obj) \
- || PyLong_Check(obj) \
- || PyArray_IsScalar((obj), Integer))
-
-
-#define PyArray_GETCONTIGUOUS(m) (PyArray_ISCONTIGUOUS(m) ? \
- Py_INCREF(m), (m) : \
- (PyArrayObject *)(PyArray_Copy(m)))
-
-#define PyArray_SAMESHAPE(a1,a2) ((PyArray_NDIM(a1) == PyArray_NDIM(a2)) && \
- PyArray_CompareLists(PyArray_DIMS(a1), \
- PyArray_DIMS(a2), \
- PyArray_NDIM(a1)))
-
-#define PyArray_SIZE(m) PyArray_MultiplyList(PyArray_DIMS(m), PyArray_NDIM(m))
-#define PyArray_NBYTES(m) (PyArray_ITEMSIZE(m) * PyArray_SIZE(m))
-#define PyArray_FROM_O(m) PyArray_FromAny(m, NULL, 0, 0, 0, NULL)
-
-#define PyArray_FROM_OF(m,flags) PyArray_CheckFromAny(m, NULL, 0, 0, flags, \
- NULL)
-
-#define PyArray_FROM_OT(m,type) PyArray_FromAny(m, \
- PyArray_DescrFromType(type), 0, 0, 0, NULL);
-
-#define PyArray_FROM_OTF(m, type, flags) \
- PyArray_FromAny(m, PyArray_DescrFromType(type), 0, 0, \
- (((flags) & NPY_ARRAY_ENSURECOPY) ? \
- ((flags) | NPY_ARRAY_DEFAULT) : (flags)), NULL)
-
-#define PyArray_FROMANY(m, type, min, max, flags) \
- PyArray_FromAny(m, PyArray_DescrFromType(type), min, max, \
- (((flags) & NPY_ARRAY_ENSURECOPY) ? \
- (flags) | NPY_ARRAY_DEFAULT : (flags)), NULL)
-
-#define PyArray_ZEROS(m, dims, type, is_f_order) \
- PyArray_Zeros(m, dims, PyArray_DescrFromType(type), is_f_order)
-
-#define PyArray_EMPTY(m, dims, type, is_f_order) \
- PyArray_Empty(m, dims, PyArray_DescrFromType(type), is_f_order)
-
-#define PyArray_FILLWBYTE(obj, val) memset(PyArray_DATA(obj), val, \
- PyArray_NBYTES(obj))
-
-#define PyArray_REFCOUNT(obj) (((PyObject *)(obj))->ob_refcnt)
-#define NPY_REFCOUNT PyArray_REFCOUNT
-#define NPY_MAX_ELSIZE (2 * NPY_SIZEOF_LONGDOUBLE)
-
-#define PyArray_ContiguousFromAny(op, type, min_depth, max_depth) \
- PyArray_FromAny(op, PyArray_DescrFromType(type), min_depth, \
- max_depth, NPY_ARRAY_DEFAULT, NULL)
-
-#define PyArray_EquivArrTypes(a1, a2) \
- PyArray_EquivTypes(PyArray_DESCR(a1), PyArray_DESCR(a2))
-
-#define PyArray_EquivByteorders(b1, b2) \
- (((b1) == (b2)) || (PyArray_ISNBO(b1) == PyArray_ISNBO(b2)))
-
-#define PyArray_SimpleNew(nd, dims, typenum) \
- PyArray_New(&PyArray_Type, nd, dims, typenum, NULL, NULL, 0, 0, NULL)
-
-#define PyArray_SimpleNewFromData(nd, dims, typenum, data) \
- PyArray_New(&PyArray_Type, nd, dims, typenum, NULL, \
- data, 0, NPY_ARRAY_CARRAY, NULL)
-
-#define PyArray_SimpleNewFromDescr(nd, dims, descr) \
- PyArray_NewFromDescr(&PyArray_Type, descr, nd, dims, \
- NULL, NULL, 0, NULL)
-
-#define PyArray_ToScalar(data, arr) \
- PyArray_Scalar(data, PyArray_DESCR(arr), (PyObject *)arr)
-
-
-/* These might be faster without the dereferencing of obj
- going on inside -- of course an optimizing compiler should
- inline the constants inside a for loop making it a moot point
-*/
-
-#define PyArray_GETPTR1(obj, i) ((void *)(PyArray_BYTES(obj) + \
- (i)*PyArray_STRIDES(obj)[0]))
-
-#define PyArray_GETPTR2(obj, i, j) ((void *)(PyArray_BYTES(obj) + \
- (i)*PyArray_STRIDES(obj)[0] + \
- (j)*PyArray_STRIDES(obj)[1]))
-
-#define PyArray_GETPTR3(obj, i, j, k) ((void *)(PyArray_BYTES(obj) + \
- (i)*PyArray_STRIDES(obj)[0] + \
- (j)*PyArray_STRIDES(obj)[1] + \
- (k)*PyArray_STRIDES(obj)[2]))
-
-#define PyArray_GETPTR4(obj, i, j, k, l) ((void *)(PyArray_BYTES(obj) + \
- (i)*PyArray_STRIDES(obj)[0] + \
- (j)*PyArray_STRIDES(obj)[1] + \
- (k)*PyArray_STRIDES(obj)[2] + \
- (l)*PyArray_STRIDES(obj)[3]))
-
-static NPY_INLINE void
-PyArray_XDECREF_ERR(PyArrayObject *arr)
-{
- if (arr != NULL) {
- if (PyArray_FLAGS(arr) & NPY_ARRAY_UPDATEIFCOPY) {
- PyArrayObject *base = (PyArrayObject *)PyArray_BASE(arr);
- PyArray_ENABLEFLAGS(base, NPY_ARRAY_WRITEABLE);
- PyArray_CLEARFLAGS(arr, NPY_ARRAY_UPDATEIFCOPY);
- }
- Py_DECREF(arr);
- }
-}
-
-#define PyArray_DESCR_REPLACE(descr) do { \
- PyArray_Descr *_new_; \
- _new_ = PyArray_DescrNew(descr); \
- Py_XDECREF(descr); \
- descr = _new_; \
- } while(0)
-
-/* Copy should always return contiguous array */
-#define PyArray_Copy(obj) PyArray_NewCopy(obj, NPY_CORDER)
-
-#define PyArray_FromObject(op, type, min_depth, max_depth) \
- PyArray_FromAny(op, PyArray_DescrFromType(type), min_depth, \
- max_depth, NPY_ARRAY_BEHAVED | \
- NPY_ARRAY_ENSUREARRAY, NULL)
-
-#define PyArray_ContiguousFromObject(op, type, min_depth, max_depth) \
- PyArray_FromAny(op, PyArray_DescrFromType(type), min_depth, \
- max_depth, NPY_ARRAY_DEFAULT | \
- NPY_ARRAY_ENSUREARRAY, NULL)
-
-#define PyArray_CopyFromObject(op, type, min_depth, max_depth) \
- PyArray_FromAny(op, PyArray_DescrFromType(type), min_depth, \
- max_depth, NPY_ARRAY_ENSURECOPY | \
- NPY_ARRAY_DEFAULT | \
- NPY_ARRAY_ENSUREARRAY, NULL)
-
-#define PyArray_Cast(mp, type_num) \
- PyArray_CastToType(mp, PyArray_DescrFromType(type_num), 0)
-
-#define PyArray_Take(ap, items, axis) \
- PyArray_TakeFrom(ap, items, axis, NULL, NPY_RAISE)
-
-#define PyArray_Put(ap, items, values) \
- PyArray_PutTo(ap, items, values, NPY_RAISE)
-
-/* Compatibility with old Numeric stuff -- don't use in new code */
-
-#define PyArray_FromDimsAndData(nd, d, type, data) \
- PyArray_FromDimsAndDataAndDescr(nd, d, PyArray_DescrFromType(type), \
- data)
-
-
-/*
- Check to see if this key in the dictionary is the "title"
- entry of the tuple (i.e. a duplicate dictionary entry in the fields
- dict.
-*/
-
-#define NPY_TITLE_KEY(key, value) ((PyTuple_GET_SIZE((value))==3) && \
- (PyTuple_GET_ITEM((value), 2) == (key)))
-
-
-/* Define python version independent deprecation macro */
-
-#if PY_VERSION_HEX >= 0x02050000
-#define DEPRECATE(msg) PyErr_WarnEx(PyExc_DeprecationWarning,msg,1)
-#define DEPRECATE_FUTUREWARNING(msg) PyErr_WarnEx(PyExc_FutureWarning,msg,1)
-#else
-#define DEPRECATE(msg) PyErr_Warn(PyExc_DeprecationWarning,msg)
-#define DEPRECATE_FUTUREWARNING(msg) PyErr_Warn(PyExc_FutureWarning,msg)
-#endif
-
-
-#ifdef __cplusplus
-}
-#endif
-
-
-#endif /* NPY_NDARRAYOBJECT_H */
diff --git a/include/numpy/ndarraytypes.h b/include/numpy/ndarraytypes.h
deleted file mode 100644
index 04d037ec8..000000000
--- a/include/numpy/ndarraytypes.h
+++ /dev/null
@@ -1,1731 +0,0 @@
-#ifndef NDARRAYTYPES_H
-#define NDARRAYTYPES_H
-
-/* numpyconfig.h is auto-generated by the installer */
-#include "numpyconfig.h"
-
-#include "npy_common.h"
-#include "npy_endian.h"
-#include "npy_cpu.h"
-#include "utils.h"
-
-#ifdef NPY_ENABLE_SEPARATE_COMPILATION
- #define NPY_NO_EXPORT NPY_VISIBILITY_HIDDEN
-#else
- #define NPY_NO_EXPORT static
-#endif
-
-/* Only use thread if configured in config and python supports it */
-#if defined WITH_THREAD && !NPY_NO_SMP
- #define NPY_ALLOW_THREADS 1
-#else
- #define NPY_ALLOW_THREADS 0
-#endif
-
-
-
-/*
- * There are several places in the code where an array of dimensions
- * is allocated statically. This is the size of that static
- * allocation.
- *
- * The array creation itself could have arbitrary dimensions but all
- * the places where static allocation is used would need to be changed
- * to dynamic (including inside of several structures)
- */
-
-#define NPY_MAXDIMS 32
-#define NPY_MAXARGS 32
-
-/* Used for Converter Functions "O&" code in ParseTuple */
-#define NPY_FAIL 0
-#define NPY_SUCCEED 1
-
-/*
- * Binary compatibility version number. This number is increased
- * whenever the C-API is changed such that binary compatibility is
- * broken, i.e. whenever a recompile of extension modules is needed.
- */
-#define NPY_VERSION NPY_ABI_VERSION
-
-/*
- * Minor API version. This number is increased whenever a change is
- * made to the C-API -- whether it breaks binary compatibility or not.
- * Some changes, such as adding a function pointer to the end of the
- * function table, can be made without breaking binary compatibility.
- * In this case, only the NPY_FEATURE_VERSION (*not* NPY_VERSION)
- * would be increased. Whenever binary compatibility is broken, both
- * NPY_VERSION and NPY_FEATURE_VERSION should be increased.
- */
-#define NPY_FEATURE_VERSION NPY_API_VERSION
-
-enum NPY_TYPES { NPY_BOOL=0,
- NPY_BYTE, NPY_UBYTE,
- NPY_SHORT, NPY_USHORT,
- NPY_INT, NPY_UINT,
- NPY_LONG, NPY_ULONG,
- NPY_LONGLONG, NPY_ULONGLONG,
- NPY_FLOAT, NPY_DOUBLE, NPY_LONGDOUBLE,
- NPY_CFLOAT, NPY_CDOUBLE, NPY_CLONGDOUBLE,
- NPY_OBJECT=17,
- NPY_STRING, NPY_UNICODE,
- NPY_VOID,
- /*
- * New 1.6 types appended, may be integrated
- * into the above in 2.0.
- */
- NPY_DATETIME, NPY_TIMEDELTA, NPY_HALF,
-
- NPY_NTYPES,
- NPY_NOTYPE,
- NPY_CHAR, /* special flag */
- NPY_USERDEF=256, /* leave room for characters */
-
- /* The number of types not including the new 1.6 types */
- NPY_NTYPES_ABI_COMPATIBLE=21
-};
-
-/* basetype array priority */
-#define NPY_PRIORITY 0.0
-
-/* default subtype priority */
-#define NPY_SUBTYPE_PRIORITY 1.0
-
-/* default scalar priority */
-#define NPY_SCALAR_PRIORITY -1000000.0
-
-/* How many floating point types are there (excluding half) */
-#define NPY_NUM_FLOATTYPE 3
-
-/*
- * These characters correspond to the array type and the struct
- * module
- */
-
-enum NPY_TYPECHAR {
- NPY_BOOLLTR = '?',
- NPY_BYTELTR = 'b',
- NPY_UBYTELTR = 'B',
- NPY_SHORTLTR = 'h',
- NPY_USHORTLTR = 'H',
- NPY_INTLTR = 'i',
- NPY_UINTLTR = 'I',
- NPY_LONGLTR = 'l',
- NPY_ULONGLTR = 'L',
- NPY_LONGLONGLTR = 'q',
- NPY_ULONGLONGLTR = 'Q',
- NPY_HALFLTR = 'e',
- NPY_FLOATLTR = 'f',
- NPY_DOUBLELTR = 'd',
- NPY_LONGDOUBLELTR = 'g',
- NPY_CFLOATLTR = 'F',
- NPY_CDOUBLELTR = 'D',
- NPY_CLONGDOUBLELTR = 'G',
- NPY_OBJECTLTR = 'O',
- NPY_STRINGLTR = 'S',
- NPY_STRINGLTR2 = 'a',
- NPY_UNICODELTR = 'U',
- NPY_VOIDLTR = 'V',
- NPY_DATETIMELTR = 'M',
- NPY_TIMEDELTALTR = 'm',
- NPY_CHARLTR = 'c',
-
- /*
- * No Descriptor, just a define -- this let's
- * Python users specify an array of integers
- * large enough to hold a pointer on the
- * platform
- */
- NPY_INTPLTR = 'p',
- NPY_UINTPLTR = 'P',
-
- /*
- * These are for dtype 'kinds', not dtype 'typecodes'
- * as the above are for.
- */
- NPY_GENBOOLLTR ='b',
- NPY_SIGNEDLTR = 'i',
- NPY_UNSIGNEDLTR = 'u',
- NPY_FLOATINGLTR = 'f',
- NPY_COMPLEXLTR = 'c'
-};
-
-typedef enum {
- NPY_QUICKSORT=0,
- NPY_HEAPSORT=1,
- NPY_MERGESORT=2
-} NPY_SORTKIND;
-#define NPY_NSORTS (NPY_MERGESORT + 1)
-
-
-typedef enum {
- NPY_SEARCHLEFT=0,
- NPY_SEARCHRIGHT=1
-} NPY_SEARCHSIDE;
-#define NPY_NSEARCHSIDES (NPY_SEARCHRIGHT + 1)
-
-
-typedef enum {
- NPY_NOSCALAR=-1,
- NPY_BOOL_SCALAR,
- NPY_INTPOS_SCALAR,
- NPY_INTNEG_SCALAR,
- NPY_FLOAT_SCALAR,
- NPY_COMPLEX_SCALAR,
- NPY_OBJECT_SCALAR
-} NPY_SCALARKIND;
-#define NPY_NSCALARKINDS (NPY_OBJECT_SCALAR + 1)
-
-/* For specifying array memory layout or iteration order */
-typedef enum {
- /* Fortran order if inputs are all Fortran, C otherwise */
- NPY_ANYORDER=-1,
- /* C order */
- NPY_CORDER=0,
- /* Fortran order */
- NPY_FORTRANORDER=1,
- /* An order as close to the inputs as possible */
- NPY_KEEPORDER=2
-} NPY_ORDER;
-
-/* For specifying allowed casting in operations which support it */
-typedef enum {
- /* Only allow identical types */
- NPY_NO_CASTING=0,
- /* Allow identical and byte swapped types */
- NPY_EQUIV_CASTING=1,
- /* Only allow safe casts */
- NPY_SAFE_CASTING=2,
- /* Allow safe casts or casts within the same kind */
- NPY_SAME_KIND_CASTING=3,
- /* Allow any casts */
- NPY_UNSAFE_CASTING=4,
-
- /*
- * Temporary internal definition only, will be removed in upcoming
- * release, see below
- * */
- NPY_INTERNAL_UNSAFE_CASTING_BUT_WARN_UNLESS_SAME_KIND = 100,
-} NPY_CASTING;
-
-typedef enum {
- NPY_CLIP=0,
- NPY_WRAP=1,
- NPY_RAISE=2
-} NPY_CLIPMODE;
-
-/* The special not-a-time (NaT) value */
-#define NPY_DATETIME_NAT NPY_MIN_INT64
-
-/*
- * Upper bound on the length of a DATETIME ISO 8601 string
- * YEAR: 21 (64-bit year)
- * MONTH: 3
- * DAY: 3
- * HOURS: 3
- * MINUTES: 3
- * SECONDS: 3
- * ATTOSECONDS: 1 + 3*6
- * TIMEZONE: 5
- * NULL TERMINATOR: 1
- */
-#define NPY_DATETIME_MAX_ISO8601_STRLEN (21+3*5+1+3*6+6+1)
-
-typedef enum {
- NPY_FR_Y = 0, /* Years */
- NPY_FR_M = 1, /* Months */
- NPY_FR_W = 2, /* Weeks */
- /* Gap where 1.6 NPY_FR_B (value 3) was */
- NPY_FR_D = 4, /* Days */
- NPY_FR_h = 5, /* hours */
- NPY_FR_m = 6, /* minutes */
- NPY_FR_s = 7, /* seconds */
- NPY_FR_ms = 8, /* milliseconds */
- NPY_FR_us = 9, /* microseconds */
- NPY_FR_ns = 10,/* nanoseconds */
- NPY_FR_ps = 11,/* picoseconds */
- NPY_FR_fs = 12,/* femtoseconds */
- NPY_FR_as = 13,/* attoseconds */
- NPY_FR_GENERIC = 14 /* Generic, unbound units, can convert to anything */
-} NPY_DATETIMEUNIT;
-
-/*
- * NOTE: With the NPY_FR_B gap for 1.6 ABI compatibility, NPY_DATETIME_NUMUNITS
- * is technically one more than the actual number of units.
- */
-#define NPY_DATETIME_NUMUNITS (NPY_FR_GENERIC + 1)
-#define NPY_DATETIME_DEFAULTUNIT NPY_FR_GENERIC
-
-/*
- * Business day conventions for mapping invalid business
- * days to valid business days.
- */
-typedef enum {
- /* Go forward in time to the following business day. */
- NPY_BUSDAY_FORWARD,
- NPY_BUSDAY_FOLLOWING = NPY_BUSDAY_FORWARD,
- /* Go backward in time to the preceding business day. */
- NPY_BUSDAY_BACKWARD,
- NPY_BUSDAY_PRECEDING = NPY_BUSDAY_BACKWARD,
- /*
- * Go forward in time to the following business day, unless it
- * crosses a month boundary, in which case go backward
- */
- NPY_BUSDAY_MODIFIEDFOLLOWING,
- /*
- * Go backward in time to the preceding business day, unless it
- * crosses a month boundary, in which case go forward.
- */
- NPY_BUSDAY_MODIFIEDPRECEDING,
- /* Produce a NaT for non-business days. */
- NPY_BUSDAY_NAT,
- /* Raise an exception for non-business days. */
- NPY_BUSDAY_RAISE
-} NPY_BUSDAY_ROLL;
-
-/************************************************************
- * NumPy Auxiliary Data for inner loops, sort functions, etc.
- ************************************************************/
-
-/*
- * When creating an auxiliary data struct, this should always appear
- * as the first member, like this:
- *
- * typedef struct {
- * NpyAuxData base;
- * double constant;
- * } constant_multiplier_aux_data;
- */
-typedef struct NpyAuxData_tag NpyAuxData;
-
-/* Function pointers for freeing or cloning auxiliary data */
-typedef void (NpyAuxData_FreeFunc) (NpyAuxData *);
-typedef NpyAuxData *(NpyAuxData_CloneFunc) (NpyAuxData *);
-
-struct NpyAuxData_tag {
- NpyAuxData_FreeFunc *free;
- NpyAuxData_CloneFunc *clone;
- /* To allow for a bit of expansion without breaking the ABI */
- void *reserved[2];
-};
-
-/* Macros to use for freeing and cloning auxiliary data */
-#define NPY_AUXDATA_FREE(auxdata) \
- do { \
- if ((auxdata) != NULL) { \
- (auxdata)->free(auxdata); \
- } \
- } while(0)
-#define NPY_AUXDATA_CLONE(auxdata) \
- ((auxdata)->clone(auxdata))
-
-#define NPY_ERR(str) fprintf(stderr, #str); fflush(stderr);
-#define NPY_ERR2(str) fprintf(stderr, str); fflush(stderr);
-
-#define NPY_STRINGIFY(x) #x
-#define NPY_TOSTRING(x) NPY_STRINGIFY(x)
-
- /*
- * Macros to define how array, and dimension/strides data is
- * allocated.
- */
-
- /* Data buffer - PyDataMem_NEW/FREE/RENEW are in multiarraymodule.c */
-
-#define NPY_USE_PYMEM 1
-
-#if NPY_USE_PYMEM == 1
-#define PyArray_malloc PyMem_Malloc
-#define PyArray_free PyMem_Free
-#define PyArray_realloc PyMem_Realloc
-#else
-#define PyArray_malloc malloc
-#define PyArray_free free
-#define PyArray_realloc realloc
-#endif
-
-/* Dimensions and strides */
-#define PyDimMem_NEW(size) \
- ((npy_intp *)PyArray_malloc(size*sizeof(npy_intp)))
-
-#define PyDimMem_FREE(ptr) PyArray_free(ptr)
-
-#define PyDimMem_RENEW(ptr,size) \
- ((npy_intp *)PyArray_realloc(ptr,size*sizeof(npy_intp)))
-
-/* forward declaration */
-struct _PyArray_Descr;
-
-/* These must deal with unaligned and swapped data if necessary */
-typedef PyObject * (PyArray_GetItemFunc) (void *, void *);
-typedef int (PyArray_SetItemFunc)(PyObject *, void *, void *);
-
-typedef void (PyArray_CopySwapNFunc)(void *, npy_intp, void *, npy_intp,
- npy_intp, int, void *);
-
-typedef void (PyArray_CopySwapFunc)(void *, void *, int, void *);
-typedef npy_bool (PyArray_NonzeroFunc)(void *, void *);
-
-
-/*
- * These assume aligned and notswapped data -- a buffer will be used
- * before or contiguous data will be obtained
- */
-
-typedef int (PyArray_CompareFunc)(const void *, const void *, void *);
-typedef int (PyArray_ArgFunc)(void*, npy_intp, npy_intp*, void *);
-
-typedef void (PyArray_DotFunc)(void *, npy_intp, void *, npy_intp, void *,
- npy_intp, void *);
-
-typedef void (PyArray_VectorUnaryFunc)(void *, void *, npy_intp, void *,
- void *);
-
-/*
- * XXX the ignore argument should be removed next time the API version
- * is bumped. It used to be the separator.
- */
-typedef int (PyArray_ScanFunc)(FILE *fp, void *dptr,
- char *ignore, struct _PyArray_Descr *);
-typedef int (PyArray_FromStrFunc)(char *s, void *dptr, char **endptr,
- struct _PyArray_Descr *);
-
-typedef int (PyArray_FillFunc)(void *, npy_intp, void *);
-
-typedef int (PyArray_SortFunc)(void *, npy_intp, void *);
-typedef int (PyArray_ArgSortFunc)(void *, npy_intp *, npy_intp, void *);
-
-typedef int (PyArray_FillWithScalarFunc)(void *, npy_intp, void *, void *);
-
-typedef int (PyArray_ScalarKindFunc)(void *);
-
-typedef void (PyArray_FastClipFunc)(void *in, npy_intp n_in, void *min,
- void *max, void *out);
-typedef void (PyArray_FastPutmaskFunc)(void *in, void *mask, npy_intp n_in,
- void *values, npy_intp nv);
-typedef int (PyArray_FastTakeFunc)(void *dest, void *src, npy_intp *indarray,
- npy_intp nindarray, npy_intp n_outer,
- npy_intp m_middle, npy_intp nelem,
- NPY_CLIPMODE clipmode);
-
-typedef struct {
- npy_intp *ptr;
- int len;
-} PyArray_Dims;
-
-typedef struct {
- /*
- * Functions to cast to most other standard types
- * Can have some NULL entries. The types
- * DATETIME, TIMEDELTA, and HALF go into the castdict
- * even though they are built-in.
- */
- PyArray_VectorUnaryFunc *cast[NPY_NTYPES_ABI_COMPATIBLE];
-
- /* The next four functions *cannot* be NULL */
-
- /*
- * Functions to get and set items with standard Python types
- * -- not array scalars
- */
- PyArray_GetItemFunc *getitem;
- PyArray_SetItemFunc *setitem;
-
- /*
- * Copy and/or swap data. Memory areas may not overlap
- * Use memmove first if they might
- */
- PyArray_CopySwapNFunc *copyswapn;
- PyArray_CopySwapFunc *copyswap;
-
- /*
- * Function to compare items
- * Can be NULL
- */
- PyArray_CompareFunc *compare;
-
- /*
- * Function to select largest
- * Can be NULL
- */
- PyArray_ArgFunc *argmax;
-
- /*
- * Function to compute dot product
- * Can be NULL
- */
- PyArray_DotFunc *dotfunc;
-
- /*
- * Function to scan an ASCII file and
- * place a single value plus possible separator
- * Can be NULL
- */
- PyArray_ScanFunc *scanfunc;
-
- /*
- * Function to read a single value from a string
- * and adjust the pointer; Can be NULL
- */
- PyArray_FromStrFunc *fromstr;
-
- /*
- * Function to determine if data is zero or not
- * If NULL a default version is
- * used at Registration time.
- */
- PyArray_NonzeroFunc *nonzero;
-
- /*
- * Used for arange.
- * Can be NULL.
- */
- PyArray_FillFunc *fill;
-
- /*
- * Function to fill arrays with scalar values
- * Can be NULL
- */
- PyArray_FillWithScalarFunc *fillwithscalar;
-
- /*
- * Sorting functions
- * Can be NULL
- */
- PyArray_SortFunc *sort[NPY_NSORTS];
- PyArray_ArgSortFunc *argsort[NPY_NSORTS];
-
- /*
- * Dictionary of additional casting functions
- * PyArray_VectorUnaryFuncs
- * which can be populated to support casting
- * to other registered types. Can be NULL
- */
- PyObject *castdict;
-
- /*
- * Functions useful for generalizing
- * the casting rules.
- * Can be NULL;
- */
- PyArray_ScalarKindFunc *scalarkind;
- int **cancastscalarkindto;
- int *cancastto;
-
- PyArray_FastClipFunc *fastclip;
- PyArray_FastPutmaskFunc *fastputmask;
- PyArray_FastTakeFunc *fasttake;
-
- /*
- * Function to select smallest
- * Can be NULL
- */
- PyArray_ArgFunc *argmin;
-
-} PyArray_ArrFuncs;
-
-/* The item must be reference counted when it is inserted or extracted. */
-#define NPY_ITEM_REFCOUNT 0x01
-/* Same as needing REFCOUNT */
-#define NPY_ITEM_HASOBJECT 0x01
-/* Convert to list for pickling */
-#define NPY_LIST_PICKLE 0x02
-/* The item is a POINTER */
-#define NPY_ITEM_IS_POINTER 0x04
-/* memory needs to be initialized for this data-type */
-#define NPY_NEEDS_INIT 0x08
-/* operations need Python C-API so don't give-up thread. */
-#define NPY_NEEDS_PYAPI 0x10
-/* Use f.getitem when extracting elements of this data-type */
-#define NPY_USE_GETITEM 0x20
-/* Use f.setitem when setting creating 0-d array from this data-type.*/
-#define NPY_USE_SETITEM 0x40
-/* A sticky flag specifically for structured arrays */
-#define NPY_ALIGNED_STRUCT 0x80
-
-/*
- *These are inherited for global data-type if any data-types in the
- * field have them
- */
-#define NPY_FROM_FIELDS (NPY_NEEDS_INIT | NPY_LIST_PICKLE | \
- NPY_ITEM_REFCOUNT | NPY_NEEDS_PYAPI)
-
-#define NPY_OBJECT_DTYPE_FLAGS (NPY_LIST_PICKLE | NPY_USE_GETITEM | \
- NPY_ITEM_IS_POINTER | NPY_ITEM_REFCOUNT | \
- NPY_NEEDS_INIT | NPY_NEEDS_PYAPI)
-
-#define PyDataType_FLAGCHK(dtype, flag) \
- (((dtype)->flags & (flag)) == (flag))
-
-#define PyDataType_REFCHK(dtype) \
- PyDataType_FLAGCHK(dtype, NPY_ITEM_REFCOUNT)
-
-typedef struct _PyArray_Descr {
- PyObject_HEAD
- /*
- * the type object representing an
- * instance of this type -- should not
- * be two type_numbers with the same type
- * object.
- */
- PyTypeObject *typeobj;
- /* kind for this type */
- char kind;
- /* unique-character representing this type */
- char type;
- /*
- * '>' (big), '<' (little), '|'
- * (not-applicable), or '=' (native).
- */
- char byteorder;
- /* flags describing data type */
- char flags;
- /* number representing this type */
- int type_num;
- /* element size (itemsize) for this type */
- int elsize;
- /* alignment needed for this type */
- int alignment;
- /*
- * Non-NULL if this type is
- * is an array (C-contiguous)
- * of some other type
- */
- struct _arr_descr *subarray;
- /*
- * The fields dictionary for this type
- * For statically defined descr this
- * is always Py_None
- */
- PyObject *fields;
- /*
- * An ordered tuple of field names or NULL
- * if no fields are defined
- */
- PyObject *names;
- /*
- * a table of functions specific for each
- * basic data descriptor
- */
- PyArray_ArrFuncs *f;
- /* Metadata about this dtype */
- PyObject *metadata;
- /*
- * Metadata specific to the C implementation
- * of the particular dtype. This was added
- * for NumPy 1.7.0.
- */
- NpyAuxData *c_metadata;
-} PyArray_Descr;
-
-typedef struct _arr_descr {
- PyArray_Descr *base;
- PyObject *shape; /* a tuple */
-} PyArray_ArrayDescr;
-
-/*
- * The main array object structure.
- *
- * It has been recommended to use the inline functions defined below
- * (PyArray_DATA and friends) to access fields here for a number of
- * releases. Direct access to the members themselves is deprecated.
- * To ensure that your code does not use deprecated access,
- * #define NPY_NO_DEPRECATED_API NPY_1_7_VERSION
- * (or NPY_1_8_VERSION or higher as required).
- */
-/* This struct will be moved to a private header in a future release */
-typedef struct tagPyArrayObject_fields {
- PyObject_HEAD
- /* Pointer to the raw data buffer */
- char *data;
- /* The number of dimensions, also called 'ndim' */
- int nd;
- /* The size in each dimension, also called 'shape' */
- npy_intp *dimensions;
- /*
- * Number of bytes to jump to get to the
- * next element in each dimension
- */
- npy_intp *strides;
- /*
- * This object is decref'd upon
- * deletion of array. Except in the
- * case of UPDATEIFCOPY which has
- * special handling.
- *
- * For views it points to the original
- * array, collapsed so no chains of
- * views occur.
- *
- * For creation from buffer object it
- * points to an object that shold be
- * decref'd on deletion
- *
- * For UPDATEIFCOPY flag this is an
- * array to-be-updated upon deletion
- * of this one
- */
- PyObject *base;
- /* Pointer to type structure */
- PyArray_Descr *descr;
- /* Flags describing array -- see below */
- int flags;
- /* For weak references */
- PyObject *weakreflist;
-} PyArrayObject_fields;
-
-/*
- * To hide the implementation details, we only expose
- * the Python struct HEAD.
- */
-#if !(defined(NPY_NO_DEPRECATED_API) && (NPY_API_VERSION <= NPY_NO_DEPRECATED_API))
-/*
- * Can't put this in npy_deprecated_api.h like the others.
- * PyArrayObject field access is deprecated as of NumPy 1.7.
- */
-typedef PyArrayObject_fields PyArrayObject;
-#else
-typedef struct tagPyArrayObject {
- PyObject_HEAD
-} PyArrayObject;
-#endif
-
-#define NPY_SIZEOF_PYARRAYOBJECT (sizeof(PyArrayObject_fields))
-
-/* Array Flags Object */
-typedef struct PyArrayFlagsObject {
- PyObject_HEAD
- PyObject *arr;
- int flags;
-} PyArrayFlagsObject;
-
-/* Mirrors buffer object to ptr */
-
-typedef struct {
- PyObject_HEAD
- PyObject *base;
- void *ptr;
- npy_intp len;
- int flags;
-} PyArray_Chunk;
-
-typedef struct {
- NPY_DATETIMEUNIT base;
- int num;
-} PyArray_DatetimeMetaData;
-
-typedef struct {
- NpyAuxData base;
- PyArray_DatetimeMetaData meta;
-} PyArray_DatetimeDTypeMetaData;
-
-/*
- * This structure contains an exploded view of a date-time value.
- * NaT is represented by year == NPY_DATETIME_NAT.
- */
-typedef struct {
- npy_int64 year;
- npy_int32 month, day, hour, min, sec, us, ps, as;
-} npy_datetimestruct;
-
-/* This is not used internally. */
-typedef struct {
- npy_int64 day;
- npy_int32 sec, us, ps, as;
-} npy_timedeltastruct;
-
-typedef int (PyArray_FinalizeFunc)(PyArrayObject *, PyObject *);
-
-/*
- * Means c-style contiguous (last index varies the fastest). The data
- * elements right after each other.
- *
- * This flag may be requested in constructor functions.
- * This flag may be tested for in PyArray_FLAGS(arr).
- */
-#define NPY_ARRAY_C_CONTIGUOUS 0x0001
-
-/*
- * Set if array is a contiguous Fortran array: the first index varies
- * the fastest in memory (strides array is reverse of C-contiguous
- * array)
- *
- * This flag may be requested in constructor functions.
- * This flag may be tested for in PyArray_FLAGS(arr).
- */
-#define NPY_ARRAY_F_CONTIGUOUS 0x0002
-
-/*
- * Note: all 0-d arrays are C_CONTIGUOUS and F_CONTIGUOUS. If a
- * 1-d array is C_CONTIGUOUS it is also F_CONTIGUOUS
- */
-
-/*
- * If set, the array owns the data: it will be free'd when the array
- * is deleted.
- *
- * This flag may be tested for in PyArray_FLAGS(arr).
- */
-#define NPY_ARRAY_OWNDATA 0x0004
-
-/*
- * An array never has the next four set; they're only used as parameter
- * flags to the the various FromAny functions
- *
- * This flag may be requested in constructor functions.
- */
-
-/* Cause a cast to occur regardless of whether or not it is safe. */
-#define NPY_ARRAY_FORCECAST 0x0010
-
-/*
- * Always copy the array. Returned arrays are always CONTIGUOUS,
- * ALIGNED, and WRITEABLE.
- *
- * This flag may be requested in constructor functions.
- */
-#define NPY_ARRAY_ENSURECOPY 0x0020
-
-/*
- * Make sure the returned array is a base-class ndarray
- *
- * This flag may be requested in constructor functions.
- */
-#define NPY_ARRAY_ENSUREARRAY 0x0040
-
-/*
- * Make sure that the strides are in units of the element size Needed
- * for some operations with record-arrays.
- *
- * This flag may be requested in constructor functions.
- */
-#define NPY_ARRAY_ELEMENTSTRIDES 0x0080
-
-/*
- * Array data is aligned on the appropiate memory address for the type
- * stored according to how the compiler would align things (e.g., an
- * array of integers (4 bytes each) starts on a memory address that's
- * a multiple of 4)
- *
- * This flag may be requested in constructor functions.
- * This flag may be tested for in PyArray_FLAGS(arr).
- */
-#define NPY_ARRAY_ALIGNED 0x0100
-
-/*
- * Array data has the native endianness
- *
- * This flag may be requested in constructor functions.
- */
-#define NPY_ARRAY_NOTSWAPPED 0x0200
-
-/*
- * Array data is writeable
- *
- * This flag may be requested in constructor functions.
- * This flag may be tested for in PyArray_FLAGS(arr).
- */
-#define NPY_ARRAY_WRITEABLE 0x0400
-
-/*
- * If this flag is set, then base contains a pointer to an array of
- * the same size that should be updated with the current contents of
- * this array when this array is deallocated
- *
- * This flag may be requested in constructor functions.
- * This flag may be tested for in PyArray_FLAGS(arr).
- */
-#define NPY_ARRAY_UPDATEIFCOPY 0x1000
-
-/*
- * NOTE: there are also internal flags defined in multiarray/arrayobject.h,
- * which start at bit 31 and work down.
- */
-
-#define NPY_ARRAY_BEHAVED (NPY_ARRAY_ALIGNED | \
- NPY_ARRAY_WRITEABLE)
-#define NPY_ARRAY_BEHAVED_NS (NPY_ARRAY_ALIGNED | \
- NPY_ARRAY_WRITEABLE | \
- NPY_ARRAY_NOTSWAPPED)
-#define NPY_ARRAY_CARRAY (NPY_ARRAY_C_CONTIGUOUS | \
- NPY_ARRAY_BEHAVED)
-#define NPY_ARRAY_CARRAY_RO (NPY_ARRAY_C_CONTIGUOUS | \
- NPY_ARRAY_ALIGNED)
-#define NPY_ARRAY_FARRAY (NPY_ARRAY_F_CONTIGUOUS | \
- NPY_ARRAY_BEHAVED)
-#define NPY_ARRAY_FARRAY_RO (NPY_ARRAY_F_CONTIGUOUS | \
- NPY_ARRAY_ALIGNED)
-#define NPY_ARRAY_DEFAULT (NPY_ARRAY_CARRAY)
-#define NPY_ARRAY_IN_ARRAY (NPY_ARRAY_CARRAY_RO)
-#define NPY_ARRAY_OUT_ARRAY (NPY_ARRAY_CARRAY)
-#define NPY_ARRAY_INOUT_ARRAY (NPY_ARRAY_CARRAY | \
- NPY_ARRAY_UPDATEIFCOPY)
-#define NPY_ARRAY_IN_FARRAY (NPY_ARRAY_FARRAY_RO)
-#define NPY_ARRAY_OUT_FARRAY (NPY_ARRAY_FARRAY)
-#define NPY_ARRAY_INOUT_FARRAY (NPY_ARRAY_FARRAY | \
- NPY_ARRAY_UPDATEIFCOPY)
-
-#define NPY_ARRAY_UPDATE_ALL (NPY_ARRAY_C_CONTIGUOUS | \
- NPY_ARRAY_F_CONTIGUOUS | \
- NPY_ARRAY_ALIGNED)
-
-/* This flag is for the array interface, not PyArrayObject */
-#define NPY_ARR_HAS_DESCR 0x0800
-
-
-
-
-/*
- * Size of internal buffers used for alignment Make BUFSIZE a multiple
- * of sizeof(npy_cdouble) -- usually 16 so that ufunc buffers are aligned
- */
-#define NPY_MIN_BUFSIZE ((int)sizeof(npy_cdouble))
-#define NPY_MAX_BUFSIZE (((int)sizeof(npy_cdouble))*1000000)
-#define NPY_BUFSIZE 8192
-/* buffer stress test size: */
-/*#define NPY_BUFSIZE 17*/
-
-#define PyArray_MAX(a,b) (((a)>(b))?(a):(b))
-#define PyArray_MIN(a,b) (((a)<(b))?(a):(b))
-#define PyArray_CLT(p,q) ((((p).real==(q).real) ? ((p).imag < (q).imag) : \
- ((p).real < (q).real)))
-#define PyArray_CGT(p,q) ((((p).real==(q).real) ? ((p).imag > (q).imag) : \
- ((p).real > (q).real)))
-#define PyArray_CLE(p,q) ((((p).real==(q).real) ? ((p).imag <= (q).imag) : \
- ((p).real <= (q).real)))
-#define PyArray_CGE(p,q) ((((p).real==(q).real) ? ((p).imag >= (q).imag) : \
- ((p).real >= (q).real)))
-#define PyArray_CEQ(p,q) (((p).real==(q).real) && ((p).imag == (q).imag))
-#define PyArray_CNE(p,q) (((p).real!=(q).real) || ((p).imag != (q).imag))
-
-/*
- * C API: consists of Macros and functions. The MACROS are defined
- * here.
- */
-
-
-#define PyArray_ISCONTIGUOUS(m) PyArray_CHKFLAGS(m, NPY_ARRAY_C_CONTIGUOUS)
-#define PyArray_ISWRITEABLE(m) PyArray_CHKFLAGS(m, NPY_ARRAY_WRITEABLE)
-#define PyArray_ISALIGNED(m) PyArray_CHKFLAGS(m, NPY_ARRAY_ALIGNED)
-
-#define PyArray_IS_C_CONTIGUOUS(m) PyArray_CHKFLAGS(m, NPY_ARRAY_C_CONTIGUOUS)
-#define PyArray_IS_F_CONTIGUOUS(m) PyArray_CHKFLAGS(m, NPY_ARRAY_F_CONTIGUOUS)
-
-#if NPY_ALLOW_THREADS
-#define NPY_BEGIN_ALLOW_THREADS Py_BEGIN_ALLOW_THREADS
-#define NPY_END_ALLOW_THREADS Py_END_ALLOW_THREADS
-#define NPY_BEGIN_THREADS_DEF PyThreadState *_save=NULL;
-#define NPY_BEGIN_THREADS do {_save = PyEval_SaveThread();} while (0);
-#define NPY_END_THREADS do {if (_save) PyEval_RestoreThread(_save);} while (0);
-
-#define NPY_BEGIN_THREADS_DESCR(dtype) \
- do {if (!(PyDataType_FLAGCHK(dtype, NPY_NEEDS_PYAPI))) \
- NPY_BEGIN_THREADS;} while (0);
-
-#define NPY_END_THREADS_DESCR(dtype) \
- do {if (!(PyDataType_FLAGCHK(dtype, NPY_NEEDS_PYAPI))) \
- NPY_END_THREADS; } while (0);
-
-#define NPY_ALLOW_C_API_DEF PyGILState_STATE __save__;
-#define NPY_ALLOW_C_API do {__save__ = PyGILState_Ensure();} while (0);
-#define NPY_DISABLE_C_API do {PyGILState_Release(__save__);} while (0);
-#else
-#define NPY_BEGIN_ALLOW_THREADS
-#define NPY_END_ALLOW_THREADS
-#define NPY_BEGIN_THREADS_DEF
-#define NPY_BEGIN_THREADS
-#define NPY_END_THREADS
-#define NPY_BEGIN_THREADS_DESCR(dtype)
-#define NPY_END_THREADS_DESCR(dtype)
-#define NPY_ALLOW_C_API_DEF
-#define NPY_ALLOW_C_API
-#define NPY_DISABLE_C_API
-#endif
-
-/**********************************
- * The nditer object, added in 1.6
- **********************************/
-
-/* The actual structure of the iterator is an internal detail */
-typedef struct NpyIter_InternalOnly NpyIter;
-
-/* Iterator function pointers that may be specialized */
-typedef int (NpyIter_IterNextFunc)(NpyIter *iter);
-typedef void (NpyIter_GetMultiIndexFunc)(NpyIter *iter,
- npy_intp *outcoords);
-
-/*** Global flags that may be passed to the iterator constructors ***/
-
-/* Track an index representing C order */
-#define NPY_ITER_C_INDEX 0x00000001
-/* Track an index representing Fortran order */
-#define NPY_ITER_F_INDEX 0x00000002
-/* Track a multi-index */
-#define NPY_ITER_MULTI_INDEX 0x00000004
-/* User code external to the iterator does the 1-dimensional innermost loop */
-#define NPY_ITER_EXTERNAL_LOOP 0x00000008
-/* Convert all the operands to a common data type */
-#define NPY_ITER_COMMON_DTYPE 0x00000010
-/* Operands may hold references, requiring API access during iteration */
-#define NPY_ITER_REFS_OK 0x00000020
-/* Zero-sized operands should be permitted, iteration checks IterSize for 0 */
-#define NPY_ITER_ZEROSIZE_OK 0x00000040
-/* Permits reductions (size-0 stride with dimension size > 1) */
-#define NPY_ITER_REDUCE_OK 0x00000080
-/* Enables sub-range iteration */
-#define NPY_ITER_RANGED 0x00000100
-/* Enables buffering */
-#define NPY_ITER_BUFFERED 0x00000200
-/* When buffering is enabled, grows the inner loop if possible */
-#define NPY_ITER_GROWINNER 0x00000400
-/* Delay allocation of buffers until first Reset* call */
-#define NPY_ITER_DELAY_BUFALLOC 0x00000800
-/* When NPY_KEEPORDER is specified, disable reversing negative-stride axes */
-#define NPY_ITER_DONT_NEGATE_STRIDES 0x00001000
-
-/*** Per-operand flags that may be passed to the iterator constructors ***/
-
-/* The operand will be read from and written to */
-#define NPY_ITER_READWRITE 0x00010000
-/* The operand will only be read from */
-#define NPY_ITER_READONLY 0x00020000
-/* The operand will only be written to */
-#define NPY_ITER_WRITEONLY 0x00040000
-/* The operand's data must be in native byte order */
-#define NPY_ITER_NBO 0x00080000
-/* The operand's data must be aligned */
-#define NPY_ITER_ALIGNED 0x00100000
-/* The operand's data must be contiguous (within the inner loop) */
-#define NPY_ITER_CONTIG 0x00200000
-/* The operand may be copied to satisfy requirements */
-#define NPY_ITER_COPY 0x00400000
-/* The operand may be copied with UPDATEIFCOPY to satisfy requirements */
-#define NPY_ITER_UPDATEIFCOPY 0x00800000
-/* Allocate the operand if it is NULL */
-#define NPY_ITER_ALLOCATE 0x01000000
-/* If an operand is allocated, don't use any subtype */
-#define NPY_ITER_NO_SUBTYPE 0x02000000
-/* This is a virtual array slot, operand is NULL but temporary data is there */
-#define NPY_ITER_VIRTUAL 0x04000000
-/* Require that the dimension match the iterator dimensions exactly */
-#define NPY_ITER_NO_BROADCAST 0x08000000
-/* A mask is being used on this array, affects buffer -> array copy */
-#define NPY_ITER_WRITEMASKED 0x10000000
-/* This array is the mask for all WRITEMASKED operands */
-#define NPY_ITER_ARRAYMASK 0x20000000
-
-#define NPY_ITER_GLOBAL_FLAGS 0x0000ffff
-#define NPY_ITER_PER_OP_FLAGS 0xffff0000
-
-
-/*****************************
- * Basic iterator object
- *****************************/
-
-/* FWD declaration */
-typedef struct PyArrayIterObject_tag PyArrayIterObject;
-
-/*
- * type of the function which translates a set of coordinates to a
- * pointer to the data
- */
-typedef char* (*npy_iter_get_dataptr_t)(PyArrayIterObject* iter, npy_intp*);
-
-struct PyArrayIterObject_tag {
- PyObject_HEAD
- int nd_m1; /* number of dimensions - 1 */
- npy_intp index, size;
- npy_intp coordinates[NPY_MAXDIMS];/* N-dimensional loop */
- npy_intp dims_m1[NPY_MAXDIMS]; /* ao->dimensions - 1 */
- npy_intp strides[NPY_MAXDIMS]; /* ao->strides or fake */
- npy_intp backstrides[NPY_MAXDIMS];/* how far to jump back */
- npy_intp factors[NPY_MAXDIMS]; /* shape factors */
- PyArrayObject *ao;
- char *dataptr; /* pointer to current item*/
- npy_bool contiguous;
-
- npy_intp bounds[NPY_MAXDIMS][2];
- npy_intp limits[NPY_MAXDIMS][2];
- npy_intp limits_sizes[NPY_MAXDIMS];
- npy_iter_get_dataptr_t translate;
-} ;
-
-
-/* Iterator API */
-#define PyArrayIter_Check(op) PyObject_TypeCheck(op, &PyArrayIter_Type)
-
-#define _PyAIT(it) ((PyArrayIterObject *)(it))
-#define PyArray_ITER_RESET(it) do { \
- _PyAIT(it)->index = 0; \
- _PyAIT(it)->dataptr = PyArray_BYTES(_PyAIT(it)->ao); \
- memset(_PyAIT(it)->coordinates, 0, \
- (_PyAIT(it)->nd_m1+1)*sizeof(npy_intp)); \
-} while (0)
-
-#define _PyArray_ITER_NEXT1(it) do { \
- (it)->dataptr += _PyAIT(it)->strides[0]; \
- (it)->coordinates[0]++; \
-} while (0)
-
-#define _PyArray_ITER_NEXT2(it) do { \
- if ((it)->coordinates[1] < (it)->dims_m1[1]) { \
- (it)->coordinates[1]++; \
- (it)->dataptr += (it)->strides[1]; \
- } \
- else { \
- (it)->coordinates[1] = 0; \
- (it)->coordinates[0]++; \
- (it)->dataptr += (it)->strides[0] - \
- (it)->backstrides[1]; \
- } \
-} while (0)
-
-#define _PyArray_ITER_NEXT3(it) do { \
- if ((it)->coordinates[2] < (it)->dims_m1[2]) { \
- (it)->coordinates[2]++; \
- (it)->dataptr += (it)->strides[2]; \
- } \
- else { \
- (it)->coordinates[2] = 0; \
- (it)->dataptr -= (it)->backstrides[2]; \
- if ((it)->coordinates[1] < (it)->dims_m1[1]) { \
- (it)->coordinates[1]++; \
- (it)->dataptr += (it)->strides[1]; \
- } \
- else { \
- (it)->coordinates[1] = 0; \
- (it)->coordinates[0]++; \
- (it)->dataptr += (it)->strides[0] \
- (it)->backstrides[1]; \
- } \
- } \
-} while (0)
-
-#define PyArray_ITER_NEXT(it) do { \
- _PyAIT(it)->index++; \
- if (_PyAIT(it)->nd_m1 == 0) { \
- _PyArray_ITER_NEXT1(_PyAIT(it)); \
- } \
- else if (_PyAIT(it)->contiguous) \
- _PyAIT(it)->dataptr += PyArray_DESCR(_PyAIT(it)->ao)->elsize; \
- else if (_PyAIT(it)->nd_m1 == 1) { \
- _PyArray_ITER_NEXT2(_PyAIT(it)); \
- } \
- else { \
- int __npy_i; \
- for (__npy_i=_PyAIT(it)->nd_m1; __npy_i >= 0; __npy_i--) { \
- if (_PyAIT(it)->coordinates[__npy_i] < \
- _PyAIT(it)->dims_m1[__npy_i]) { \
- _PyAIT(it)->coordinates[__npy_i]++; \
- _PyAIT(it)->dataptr += \
- _PyAIT(it)->strides[__npy_i]; \
- break; \
- } \
- else { \
- _PyAIT(it)->coordinates[__npy_i] = 0; \
- _PyAIT(it)->dataptr -= \
- _PyAIT(it)->backstrides[__npy_i]; \
- } \
- } \
- } \
-} while (0)
-
-#define PyArray_ITER_GOTO(it, destination) do { \
- int __npy_i; \
- _PyAIT(it)->index = 0; \
- _PyAIT(it)->dataptr = PyArray_BYTES(_PyAIT(it)->ao); \
- for (__npy_i = _PyAIT(it)->nd_m1; __npy_i>=0; __npy_i--) { \
- if (destination[__npy_i] < 0) { \
- destination[__npy_i] += \
- _PyAIT(it)->dims_m1[__npy_i]+1; \
- } \
- _PyAIT(it)->dataptr += destination[__npy_i] * \
- _PyAIT(it)->strides[__npy_i]; \
- _PyAIT(it)->coordinates[__npy_i] = \
- destination[__npy_i]; \
- _PyAIT(it)->index += destination[__npy_i] * \
- ( __npy_i==_PyAIT(it)->nd_m1 ? 1 : \
- _PyAIT(it)->dims_m1[__npy_i+1]+1) ; \
- } \
-} while (0)
-
-#define PyArray_ITER_GOTO1D(it, ind) do { \
- int __npy_i; \
- npy_intp __npy_ind = (npy_intp) (ind); \
- if (__npy_ind < 0) __npy_ind += _PyAIT(it)->size; \
- _PyAIT(it)->index = __npy_ind; \
- if (_PyAIT(it)->nd_m1 == 0) { \
- _PyAIT(it)->dataptr = PyArray_BYTES(_PyAIT(it)->ao) + \
- __npy_ind * _PyAIT(it)->strides[0]; \
- } \
- else if (_PyAIT(it)->contiguous) \
- _PyAIT(it)->dataptr = PyArray_BYTES(_PyAIT(it)->ao) + \
- __npy_ind * PyArray_DESCR(_PyAIT(it)->ao)->elsize; \
- else { \
- _PyAIT(it)->dataptr = PyArray_BYTES(_PyAIT(it)->ao); \
- for (__npy_i = 0; __npy_i<=_PyAIT(it)->nd_m1; \
- __npy_i++) { \
- _PyAIT(it)->dataptr += \
- (__npy_ind / _PyAIT(it)->factors[__npy_i]) \
- * _PyAIT(it)->strides[__npy_i]; \
- __npy_ind %= _PyAIT(it)->factors[__npy_i]; \
- } \
- } \
-} while (0)
-
-#define PyArray_ITER_DATA(it) ((void *)(_PyAIT(it)->dataptr))
-
-#define PyArray_ITER_NOTDONE(it) (_PyAIT(it)->index < _PyAIT(it)->size)
-
-
-/*
- * Any object passed to PyArray_Broadcast must be binary compatible
- * with this structure.
- */
-
-typedef struct {
- PyObject_HEAD
- int numiter; /* number of iters */
- npy_intp size; /* broadcasted size */
- npy_intp index; /* current index */
- int nd; /* number of dims */
- npy_intp dimensions[NPY_MAXDIMS]; /* dimensions */
- PyArrayIterObject *iters[NPY_MAXARGS]; /* iterators */
-} PyArrayMultiIterObject;
-
-#define _PyMIT(m) ((PyArrayMultiIterObject *)(m))
-#define PyArray_MultiIter_RESET(multi) do { \
- int __npy_mi; \
- _PyMIT(multi)->index = 0; \
- for (__npy_mi=0; __npy_mi < _PyMIT(multi)->numiter; __npy_mi++) { \
- PyArray_ITER_RESET(_PyMIT(multi)->iters[__npy_mi]); \
- } \
-} while (0)
-
-#define PyArray_MultiIter_NEXT(multi) do { \
- int __npy_mi; \
- _PyMIT(multi)->index++; \
- for (__npy_mi=0; __npy_mi < _PyMIT(multi)->numiter; __npy_mi++) { \
- PyArray_ITER_NEXT(_PyMIT(multi)->iters[__npy_mi]); \
- } \
-} while (0)
-
-#define PyArray_MultiIter_GOTO(multi, dest) do { \
- int __npy_mi; \
- for (__npy_mi=0; __npy_mi < _PyMIT(multi)->numiter; __npy_mi++) { \
- PyArray_ITER_GOTO(_PyMIT(multi)->iters[__npy_mi], dest); \
- } \
- _PyMIT(multi)->index = _PyMIT(multi)->iters[0]->index; \
-} while (0)
-
-#define PyArray_MultiIter_GOTO1D(multi, ind) do { \
- int __npy_mi; \
- for (__npy_mi=0; __npy_mi < _PyMIT(multi)->numiter; __npy_mi++) { \
- PyArray_ITER_GOTO1D(_PyMIT(multi)->iters[__npy_mi], ind); \
- } \
- _PyMIT(multi)->index = _PyMIT(multi)->iters[0]->index; \
-} while (0)
-
-#define PyArray_MultiIter_DATA(multi, i) \
- ((void *)(_PyMIT(multi)->iters[i]->dataptr))
-
-#define PyArray_MultiIter_NEXTi(multi, i) \
- PyArray_ITER_NEXT(_PyMIT(multi)->iters[i])
-
-#define PyArray_MultiIter_NOTDONE(multi) \
- (_PyMIT(multi)->index < _PyMIT(multi)->size)
-
-/* Store the information needed for fancy-indexing over an array */
-
-typedef struct {
- PyObject_HEAD
- /*
- * Multi-iterator portion --- needs to be present in this
- * order to work with PyArray_Broadcast
- */
-
- int numiter; /* number of index-array
- iterators */
- npy_intp size; /* size of broadcasted
- result */
- npy_intp index; /* current index */
- int nd; /* number of dims */
- npy_intp dimensions[NPY_MAXDIMS]; /* dimensions */
- PyArrayIterObject *iters[NPY_MAXDIMS]; /* index object
- iterators */
- PyArrayIterObject *ait; /* flat Iterator for
- underlying array */
-
- /* flat iterator for subspace (when numiter < nd) */
- PyArrayIterObject *subspace;
-
- /*
- * if subspace iteration, then this is the array of axes in
- * the underlying array represented by the index objects
- */
- int iteraxes[NPY_MAXDIMS];
- /*
- * if subspace iteration, the these are the coordinates to the
- * start of the subspace.
- */
- npy_intp bscoord[NPY_MAXDIMS];
-
- PyObject *indexobj; /* creating obj */
- int consec;
- char *dataptr;
-
-} PyArrayMapIterObject;
-
-enum {
- NPY_NEIGHBORHOOD_ITER_ZERO_PADDING,
- NPY_NEIGHBORHOOD_ITER_ONE_PADDING,
- NPY_NEIGHBORHOOD_ITER_CONSTANT_PADDING,
- NPY_NEIGHBORHOOD_ITER_CIRCULAR_PADDING,
- NPY_NEIGHBORHOOD_ITER_MIRROR_PADDING
-};
-
-typedef struct {
- PyObject_HEAD
-
- /*
- * PyArrayIterObject part: keep this in this exact order
- */
- int nd_m1; /* number of dimensions - 1 */
- npy_intp index, size;
- npy_intp coordinates[NPY_MAXDIMS];/* N-dimensional loop */
- npy_intp dims_m1[NPY_MAXDIMS]; /* ao->dimensions - 1 */
- npy_intp strides[NPY_MAXDIMS]; /* ao->strides or fake */
- npy_intp backstrides[NPY_MAXDIMS];/* how far to jump back */
- npy_intp factors[NPY_MAXDIMS]; /* shape factors */
- PyArrayObject *ao;
- char *dataptr; /* pointer to current item*/
- npy_bool contiguous;
-
- npy_intp bounds[NPY_MAXDIMS][2];
- npy_intp limits[NPY_MAXDIMS][2];
- npy_intp limits_sizes[NPY_MAXDIMS];
- npy_iter_get_dataptr_t translate;
-
- /*
- * New members
- */
- npy_intp nd;
-
- /* Dimensions is the dimension of the array */
- npy_intp dimensions[NPY_MAXDIMS];
-
- /*
- * Neighborhood points coordinates are computed relatively to the
- * point pointed by _internal_iter
- */
- PyArrayIterObject* _internal_iter;
- /*
- * To keep a reference to the representation of the constant value
- * for constant padding
- */
- char* constant;
-
- int mode;
-} PyArrayNeighborhoodIterObject;
-
-/*
- * Neighborhood iterator API
- */
-
-/* General: those work for any mode */
-static NPY_INLINE int
-PyArrayNeighborhoodIter_Reset(PyArrayNeighborhoodIterObject* iter);
-static NPY_INLINE int
-PyArrayNeighborhoodIter_Next(PyArrayNeighborhoodIterObject* iter);
-#if 0
-static NPY_INLINE int
-PyArrayNeighborhoodIter_Next2D(PyArrayNeighborhoodIterObject* iter);
-#endif
-
-/*
- * Include inline implementations - functions defined there are not
- * considered public API
- */
-#define _NPY_INCLUDE_NEIGHBORHOOD_IMP
-#include "_neighborhood_iterator_imp.h"
-#undef _NPY_INCLUDE_NEIGHBORHOOD_IMP
-
-/* The default array type */
-#define NPY_DEFAULT_TYPE NPY_DOUBLE
-
-/*
- * All sorts of useful ways to look into a PyArrayObject. It is recommended
- * to use PyArrayObject * objects instead of always casting from PyObject *,
- * for improved type checking.
- *
- * In many cases here the macro versions of the accessors are deprecated,
- * but can't be immediately changed to inline functions because the
- * preexisting macros accept PyObject * and do automatic casts. Inline
- * functions accepting PyArrayObject * provides for some compile-time
- * checking of correctness when working with these objects in C.
- */
-
-#define PyArray_ISONESEGMENT(m) (PyArray_NDIM(m) == 0 || \
- PyArray_CHKFLAGS(m, NPY_ARRAY_C_CONTIGUOUS) || \
- PyArray_CHKFLAGS(m, NPY_ARRAY_F_CONTIGUOUS))
-
-#define PyArray_ISFORTRAN(m) (PyArray_CHKFLAGS(m, NPY_ARRAY_F_CONTIGUOUS) && \
- (PyArray_NDIM(m) > 1))
-
-#define PyArray_FORTRAN_IF(m) ((PyArray_CHKFLAGS(m, NPY_ARRAY_F_CONTIGUOUS) ? \
- NPY_ARRAY_F_CONTIGUOUS : 0))
-
-#if (defined(NPY_NO_DEPRECATED_API) && (NPY_API_VERSION <= NPY_NO_DEPRECATED_API))
-/*
- * Changing access macros into functions, to allow for future hiding
- * of the internal memory layout. This later hiding will allow the 2.x series
- * to change the internal representation of arrays without affecting
- * ABI compatibility.
- */
-
-static NPY_INLINE int
-PyArray_NDIM(const PyArrayObject *arr)
-{
- return ((PyArrayObject_fields *)arr)->nd;
-}
-
-static NPY_INLINE void *
-PyArray_DATA(PyArrayObject *arr)
-{
- return ((PyArrayObject_fields *)arr)->data;
-}
-
-static NPY_INLINE char *
-PyArray_BYTES(PyArrayObject *arr)
-{
- return ((PyArrayObject_fields *)arr)->data;
-}
-
-static NPY_INLINE npy_intp *
-PyArray_DIMS(PyArrayObject *arr)
-{
- return ((PyArrayObject_fields *)arr)->dimensions;
-}
-
-static NPY_INLINE npy_intp *
-PyArray_STRIDES(PyArrayObject *arr)
-{
- return ((PyArrayObject_fields *)arr)->strides;
-}
-
-static NPY_INLINE npy_intp
-PyArray_DIM(const PyArrayObject *arr, int idim)
-{
- return ((PyArrayObject_fields *)arr)->dimensions[idim];
-}
-
-static NPY_INLINE npy_intp
-PyArray_STRIDE(const PyArrayObject *arr, int istride)
-{
- return ((PyArrayObject_fields *)arr)->strides[istride];
-}
-
-static NPY_INLINE PyObject *
-PyArray_BASE(PyArrayObject *arr)
-{
- return ((PyArrayObject_fields *)arr)->base;
-}
-
-static NPY_INLINE PyArray_Descr *
-PyArray_DESCR(PyArrayObject *arr)
-{
- return ((PyArrayObject_fields *)arr)->descr;
-}
-
-static NPY_INLINE int
-PyArray_FLAGS(const PyArrayObject *arr)
-{
- return ((PyArrayObject_fields *)arr)->flags;
-}
-
-static NPY_INLINE npy_intp
-PyArray_ITEMSIZE(const PyArrayObject *arr)
-{
- return ((PyArrayObject_fields *)arr)->descr->elsize;
-}
-
-static NPY_INLINE int
-PyArray_TYPE(const PyArrayObject *arr)
-{
- return ((PyArrayObject_fields *)arr)->descr->type_num;
-}
-
-static NPY_INLINE int
-PyArray_CHKFLAGS(const PyArrayObject *arr, int flags)
-{
- return (PyArray_FLAGS(arr) & flags) == flags;
-}
-
-static NPY_INLINE PyObject *
-PyArray_GETITEM(const PyArrayObject *arr, const char *itemptr)
-{
- return ((PyArrayObject_fields *)arr)->descr->f->getitem(
- (void *)itemptr, (PyArrayObject *)arr);
-}
-
-static NPY_INLINE int
-PyArray_SETITEM(PyArrayObject *arr, char *itemptr, PyObject *v)
-{
- return ((PyArrayObject_fields *)arr)->descr->f->setitem(
- v, itemptr, arr);
-}
-
-#else
-
-/* These macros are deprecated as of NumPy 1.7. */
-#define PyArray_NDIM(obj) (((PyArrayObject_fields *)(obj))->nd)
-#define PyArray_BYTES(obj) (((PyArrayObject_fields *)(obj))->data)
-#define PyArray_DATA(obj) ((void *)((PyArrayObject_fields *)(obj))->data)
-#define PyArray_DIMS(obj) (((PyArrayObject_fields *)(obj))->dimensions)
-#define PyArray_STRIDES(obj) (((PyArrayObject_fields *)(obj))->strides)
-#define PyArray_DIM(obj,n) (PyArray_DIMS(obj)[n])
-#define PyArray_STRIDE(obj,n) (PyArray_STRIDES(obj)[n])
-#define PyArray_BASE(obj) (((PyArrayObject_fields *)(obj))->base)
-#define PyArray_DESCR(obj) (((PyArrayObject_fields *)(obj))->descr)
-#define PyArray_FLAGS(obj) (((PyArrayObject_fields *)(obj))->flags)
-#define PyArray_CHKFLAGS(m, FLAGS) \
- ((((PyArrayObject_fields *)(m))->flags & (FLAGS)) == (FLAGS))
-#define PyArray_ITEMSIZE(obj) \
- (((PyArrayObject_fields *)(obj))->descr->elsize)
-#define PyArray_TYPE(obj) \
- (((PyArrayObject_fields *)(obj))->descr->type_num)
-#define PyArray_GETITEM(obj,itemptr) \
- PyArray_DESCR(obj)->f->getitem((char *)(itemptr), \
- (PyArrayObject *)(obj))
-
-#define PyArray_SETITEM(obj,itemptr,v) \
- PyArray_DESCR(obj)->f->setitem((PyObject *)(v), \
- (char *)(itemptr), \
- (PyArrayObject *)(obj))
-#endif
-
-static NPY_INLINE PyArray_Descr *
-PyArray_DTYPE(PyArrayObject *arr)
-{
- return ((PyArrayObject_fields *)arr)->descr;
-}
-
-static NPY_INLINE npy_intp *
-PyArray_SHAPE(PyArrayObject *arr)
-{
- return ((PyArrayObject_fields *)arr)->dimensions;
-}
-
-/*
- * Enables the specified array flags. Does no checking,
- * assumes you know what you're doing.
- */
-static NPY_INLINE void
-PyArray_ENABLEFLAGS(PyArrayObject *arr, int flags)
-{
- ((PyArrayObject_fields *)arr)->flags |= flags;
-}
-
-/*
- * Clears the specified array flags. Does no checking,
- * assumes you know what you're doing.
- */
-static NPY_INLINE void
-PyArray_CLEARFLAGS(PyArrayObject *arr, int flags)
-{
- ((PyArrayObject_fields *)arr)->flags &= ~flags;
-}
-
-#define PyTypeNum_ISBOOL(type) ((type) == NPY_BOOL)
-
-#define PyTypeNum_ISUNSIGNED(type) (((type) == NPY_UBYTE) || \
- ((type) == NPY_USHORT) || \
- ((type) == NPY_UINT) || \
- ((type) == NPY_ULONG) || \
- ((type) == NPY_ULONGLONG))
-
-#define PyTypeNum_ISSIGNED(type) (((type) == NPY_BYTE) || \
- ((type) == NPY_SHORT) || \
- ((type) == NPY_INT) || \
- ((type) == NPY_LONG) || \
- ((type) == NPY_LONGLONG))
-
-#define PyTypeNum_ISINTEGER(type) (((type) >= NPY_BYTE) && \
- ((type) <= NPY_ULONGLONG))
-
-#define PyTypeNum_ISFLOAT(type) ((((type) >= NPY_FLOAT) && \
- ((type) <= NPY_LONGDOUBLE)) || \
- ((type) == NPY_HALF))
-
-#define PyTypeNum_ISNUMBER(type) (((type) <= NPY_CLONGDOUBLE) || \
- ((type) == NPY_HALF))
-
-#define PyTypeNum_ISSTRING(type) (((type) == NPY_STRING) || \
- ((type) == NPY_UNICODE))
-
-#define PyTypeNum_ISCOMPLEX(type) (((type) >= NPY_CFLOAT) && \
- ((type) <= NPY_CLONGDOUBLE))
-
-#define PyTypeNum_ISPYTHON(type) (((type) == NPY_LONG) || \
- ((type) == NPY_DOUBLE) || \
- ((type) == NPY_CDOUBLE) || \
- ((type) == NPY_BOOL) || \
- ((type) == NPY_OBJECT ))
-
-#define PyTypeNum_ISFLEXIBLE(type) (((type) >=NPY_STRING) && \
- ((type) <=NPY_VOID))
-
-#define PyTypeNum_ISDATETIME(type) (((type) >=NPY_DATETIME) && \
- ((type) <=NPY_TIMEDELTA))
-
-#define PyTypeNum_ISUSERDEF(type) (((type) >= NPY_USERDEF) && \
- ((type) < NPY_USERDEF+ \
- NPY_NUMUSERTYPES))
-
-#define PyTypeNum_ISEXTENDED(type) (PyTypeNum_ISFLEXIBLE(type) || \
- PyTypeNum_ISUSERDEF(type))
-
-#define PyTypeNum_ISOBJECT(type) ((type) == NPY_OBJECT)
-
-
-#define PyDataType_ISBOOL(obj) PyTypeNum_ISBOOL(_PyADt(obj))
-#define PyDataType_ISUNSIGNED(obj) PyTypeNum_ISUNSIGNED(((PyArray_Descr*)(obj))->type_num)
-#define PyDataType_ISSIGNED(obj) PyTypeNum_ISSIGNED(((PyArray_Descr*)(obj))->type_num)
-#define PyDataType_ISINTEGER(obj) PyTypeNum_ISINTEGER(((PyArray_Descr*)(obj))->type_num )
-#define PyDataType_ISFLOAT(obj) PyTypeNum_ISFLOAT(((PyArray_Descr*)(obj))->type_num)
-#define PyDataType_ISNUMBER(obj) PyTypeNum_ISNUMBER(((PyArray_Descr*)(obj))->type_num)
-#define PyDataType_ISSTRING(obj) PyTypeNum_ISSTRING(((PyArray_Descr*)(obj))->type_num)
-#define PyDataType_ISCOMPLEX(obj) PyTypeNum_ISCOMPLEX(((PyArray_Descr*)(obj))->type_num)
-#define PyDataType_ISPYTHON(obj) PyTypeNum_ISPYTHON(((PyArray_Descr*)(obj))->type_num)
-#define PyDataType_ISFLEXIBLE(obj) PyTypeNum_ISFLEXIBLE(((PyArray_Descr*)(obj))->type_num)
-#define PyDataType_ISDATETIME(obj) PyTypeNum_ISDATETIME(((PyArray_Descr*)(obj))->type_num)
-#define PyDataType_ISUSERDEF(obj) PyTypeNum_ISUSERDEF(((PyArray_Descr*)(obj))->type_num)
-#define PyDataType_ISEXTENDED(obj) PyTypeNum_ISEXTENDED(((PyArray_Descr*)(obj))->type_num)
-#define PyDataType_ISOBJECT(obj) PyTypeNum_ISOBJECT(((PyArray_Descr*)(obj))->type_num)
-#define PyDataType_HASFIELDS(obj) (((PyArray_Descr *)(obj))->names != NULL)
-#define PyDataType_HASSUBARRAY(dtype) ((dtype)->subarray != NULL)
-
-#define PyArray_ISBOOL(obj) PyTypeNum_ISBOOL(PyArray_TYPE(obj))
-#define PyArray_ISUNSIGNED(obj) PyTypeNum_ISUNSIGNED(PyArray_TYPE(obj))
-#define PyArray_ISSIGNED(obj) PyTypeNum_ISSIGNED(PyArray_TYPE(obj))
-#define PyArray_ISINTEGER(obj) PyTypeNum_ISINTEGER(PyArray_TYPE(obj))
-#define PyArray_ISFLOAT(obj) PyTypeNum_ISFLOAT(PyArray_TYPE(obj))
-#define PyArray_ISNUMBER(obj) PyTypeNum_ISNUMBER(PyArray_TYPE(obj))
-#define PyArray_ISSTRING(obj) PyTypeNum_ISSTRING(PyArray_TYPE(obj))
-#define PyArray_ISCOMPLEX(obj) PyTypeNum_ISCOMPLEX(PyArray_TYPE(obj))
-#define PyArray_ISPYTHON(obj) PyTypeNum_ISPYTHON(PyArray_TYPE(obj))
-#define PyArray_ISFLEXIBLE(obj) PyTypeNum_ISFLEXIBLE(PyArray_TYPE(obj))
-#define PyArray_ISDATETIME(obj) PyTypeNum_ISDATETIME(PyArray_TYPE(obj))
-#define PyArray_ISUSERDEF(obj) PyTypeNum_ISUSERDEF(PyArray_TYPE(obj))
-#define PyArray_ISEXTENDED(obj) PyTypeNum_ISEXTENDED(PyArray_TYPE(obj))
-#define PyArray_ISOBJECT(obj) PyTypeNum_ISOBJECT(PyArray_TYPE(obj))
-#define PyArray_HASFIELDS(obj) PyDataType_HASFIELDS(PyArray_DESCR(obj))
-
- /*
- * FIXME: This should check for a flag on the data-type that
- * states whether or not it is variable length. Because the
- * ISFLEXIBLE check is hard-coded to the built-in data-types.
- */
-#define PyArray_ISVARIABLE(obj) PyTypeNum_ISFLEXIBLE(PyArray_TYPE(obj))
-
-#define PyArray_SAFEALIGNEDCOPY(obj) (PyArray_ISALIGNED(obj) && !PyArray_ISVARIABLE(obj))
-
-
-#define NPY_LITTLE '<'
-#define NPY_BIG '>'
-#define NPY_NATIVE '='
-#define NPY_SWAP 's'
-#define NPY_IGNORE '|'
-
-#if NPY_BYTE_ORDER == NPY_BIG_ENDIAN
-#define NPY_NATBYTE NPY_BIG
-#define NPY_OPPBYTE NPY_LITTLE
-#else
-#define NPY_NATBYTE NPY_LITTLE
-#define NPY_OPPBYTE NPY_BIG
-#endif
-
-#define PyArray_ISNBO(arg) ((arg) != NPY_OPPBYTE)
-#define PyArray_IsNativeByteOrder PyArray_ISNBO
-#define PyArray_ISNOTSWAPPED(m) PyArray_ISNBO(PyArray_DESCR(m)->byteorder)
-#define PyArray_ISBYTESWAPPED(m) (!PyArray_ISNOTSWAPPED(m))
-
-#define PyArray_FLAGSWAP(m, flags) (PyArray_CHKFLAGS(m, flags) && \
- PyArray_ISNOTSWAPPED(m))
-
-#define PyArray_ISCARRAY(m) PyArray_FLAGSWAP(m, NPY_ARRAY_CARRAY)
-#define PyArray_ISCARRAY_RO(m) PyArray_FLAGSWAP(m, NPY_ARRAY_CARRAY_RO)
-#define PyArray_ISFARRAY(m) PyArray_FLAGSWAP(m, NPY_ARRAY_FARRAY)
-#define PyArray_ISFARRAY_RO(m) PyArray_FLAGSWAP(m, NPY_ARRAY_FARRAY_RO)
-#define PyArray_ISBEHAVED(m) PyArray_FLAGSWAP(m, NPY_ARRAY_BEHAVED)
-#define PyArray_ISBEHAVED_RO(m) PyArray_FLAGSWAP(m, NPY_ARRAY_ALIGNED)
-
-
-#define PyDataType_ISNOTSWAPPED(d) PyArray_ISNBO(((PyArray_Descr *)(d))->byteorder)
-#define PyDataType_ISBYTESWAPPED(d) (!PyDataType_ISNOTSWAPPED(d))
-
-/************************************************************
- * A struct used by PyArray_CreateSortedStridePerm, new in 1.7.
- ************************************************************/
-
-typedef struct {
- npy_intp perm, stride;
-} npy_stride_sort_item;
-
-/************************************************************
- * This is the form of the struct that's returned pointed by the
- * PyCObject attribute of an array __array_struct__. See
- * http://docs.scipy.org/doc/numpy/reference/arrays.interface.html for the full
- * documentation.
- ************************************************************/
-typedef struct {
- int two; /*
- * contains the integer 2 as a sanity
- * check
- */
-
- int nd; /* number of dimensions */
-
- char typekind; /*
- * kind in array --- character code of
- * typestr
- */
-
- int itemsize; /* size of each element */
-
- int flags; /*
- * how should be data interpreted. Valid
- * flags are CONTIGUOUS (1), F_CONTIGUOUS (2),
- * ALIGNED (0x100), NOTSWAPPED (0x200), and
- * WRITEABLE (0x400). ARR_HAS_DESCR (0x800)
- * states that arrdescr field is present in
- * structure
- */
-
- npy_intp *shape; /*
- * A length-nd array of shape
- * information
- */
-
- npy_intp *strides; /* A length-nd array of stride information */
-
- void *data; /* A pointer to the first element of the array */
-
- PyObject *descr; /*
- * A list of fields or NULL (ignored if flags
- * does not have ARR_HAS_DESCR flag set)
- */
-} PyArrayInterface;
-
-/*
- * This is a function for hooking into the PyDataMem_NEW/FREE/RENEW functions.
- * See the documentation for PyDataMem_SetEventHook.
- */
-typedef void (PyDataMem_EventHookFunc)(void *inp, void *outp, size_t size,
- void *user_data);
-
-#if !(defined(NPY_NO_DEPRECATED_API) && (NPY_API_VERSION <= NPY_NO_DEPRECATED_API))
-#include "npy_deprecated_api.h"
-#endif
-
-#endif /* NPY_ARRAYTYPES_H */
diff --git a/include/numpy/noprefix.h b/include/numpy/noprefix.h
deleted file mode 100644
index b3e57480e..000000000
--- a/include/numpy/noprefix.h
+++ /dev/null
@@ -1,209 +0,0 @@
-#ifndef NPY_NOPREFIX_H
-#define NPY_NOPREFIX_H
-
-/*
- * You can directly include noprefix.h as a backward
- * compatibility measure
- */
-#ifndef NPY_NO_PREFIX
-#include "ndarrayobject.h"
-#include "npy_interrupt.h"
-#endif
-
-#define SIGSETJMP NPY_SIGSETJMP
-#define SIGLONGJMP NPY_SIGLONGJMP
-#define SIGJMP_BUF NPY_SIGJMP_BUF
-
-#define MAX_DIMS NPY_MAXDIMS
-
-#define longlong npy_longlong
-#define ulonglong npy_ulonglong
-#define Bool npy_bool
-#define longdouble npy_longdouble
-#define byte npy_byte
-
-#ifndef _BSD_SOURCE
-#define ushort npy_ushort
-#define uint npy_uint
-#define ulong npy_ulong
-#endif
-
-#define ubyte npy_ubyte
-#define ushort npy_ushort
-#define uint npy_uint
-#define ulong npy_ulong
-#define cfloat npy_cfloat
-#define cdouble npy_cdouble
-#define clongdouble npy_clongdouble
-#define Int8 npy_int8
-#define UInt8 npy_uint8
-#define Int16 npy_int16
-#define UInt16 npy_uint16
-#define Int32 npy_int32
-#define UInt32 npy_uint32
-#define Int64 npy_int64
-#define UInt64 npy_uint64
-#define Int128 npy_int128
-#define UInt128 npy_uint128
-#define Int256 npy_int256
-#define UInt256 npy_uint256
-#define Float16 npy_float16
-#define Complex32 npy_complex32
-#define Float32 npy_float32
-#define Complex64 npy_complex64
-#define Float64 npy_float64
-#define Complex128 npy_complex128
-#define Float80 npy_float80
-#define Complex160 npy_complex160
-#define Float96 npy_float96
-#define Complex192 npy_complex192
-#define Float128 npy_float128
-#define Complex256 npy_complex256
-#define intp npy_intp
-#define uintp npy_uintp
-#define datetime npy_datetime
-#define timedelta npy_timedelta
-
-#define SIZEOF_INTP NPY_SIZEOF_INTP
-#define SIZEOF_UINTP NPY_SIZEOF_UINTP
-#define SIZEOF_DATETIME NPY_SIZEOF_DATETIME
-#define SIZEOF_TIMEDELTA NPY_SIZEOF_TIMEDELTA
-
-#define LONGLONG_FMT NPY_LONGLONG_FMT
-#define ULONGLONG_FMT NPY_ULONGLONG_FMT
-#define LONGLONG_SUFFIX NPY_LONGLONG_SUFFIX
-#define ULONGLONG_SUFFIX NPY_ULONGLONG_SUFFIX
-
-#define MAX_INT8 127
-#define MIN_INT8 -128
-#define MAX_UINT8 255
-#define MAX_INT16 32767
-#define MIN_INT16 -32768
-#define MAX_UINT16 65535
-#define MAX_INT32 2147483647
-#define MIN_INT32 (-MAX_INT32 - 1)
-#define MAX_UINT32 4294967295U
-#define MAX_INT64 LONGLONG_SUFFIX(9223372036854775807)
-#define MIN_INT64 (-MAX_INT64 - LONGLONG_SUFFIX(1))
-#define MAX_UINT64 ULONGLONG_SUFFIX(18446744073709551615)
-#define MAX_INT128 LONGLONG_SUFFIX(85070591730234615865843651857942052864)
-#define MIN_INT128 (-MAX_INT128 - LONGLONG_SUFFIX(1))
-#define MAX_UINT128 ULONGLONG_SUFFIX(170141183460469231731687303715884105728)
-#define MAX_INT256 LONGLONG_SUFFIX(57896044618658097711785492504343953926634992332820282019728792003956564819967)
-#define MIN_INT256 (-MAX_INT256 - LONGLONG_SUFFIX(1))
-#define MAX_UINT256 ULONGLONG_SUFFIX(115792089237316195423570985008687907853269984665640564039457584007913129639935)
-
-#define MAX_BYTE NPY_MAX_BYTE
-#define MIN_BYTE NPY_MIN_BYTE
-#define MAX_UBYTE NPY_MAX_UBYTE
-#define MAX_SHORT NPY_MAX_SHORT
-#define MIN_SHORT NPY_MIN_SHORT
-#define MAX_USHORT NPY_MAX_USHORT
-#define MAX_INT NPY_MAX_INT
-#define MIN_INT NPY_MIN_INT
-#define MAX_UINT NPY_MAX_UINT
-#define MAX_LONG NPY_MAX_LONG
-#define MIN_LONG NPY_MIN_LONG
-#define MAX_ULONG NPY_MAX_ULONG
-#define MAX_LONGLONG NPY_MAX_LONGLONG
-#define MIN_LONGLONG NPY_MIN_LONGLONG
-#define MAX_ULONGLONG NPY_MAX_ULONGLONG
-#define MIN_DATETIME NPY_MIN_DATETIME
-#define MAX_DATETIME NPY_MAX_DATETIME
-#define MIN_TIMEDELTA NPY_MIN_TIMEDELTA
-#define MAX_TIMEDELTA NPY_MAX_TIMEDELTA
-
-#define SIZEOF_LONGDOUBLE NPY_SIZEOF_LONGDOUBLE
-#define SIZEOF_LONGLONG NPY_SIZEOF_LONGLONG
-#define SIZEOF_HALF NPY_SIZEOF_HALF
-#define BITSOF_BOOL NPY_BITSOF_BOOL
-#define BITSOF_CHAR NPY_BITSOF_CHAR
-#define BITSOF_SHORT NPY_BITSOF_SHORT
-#define BITSOF_INT NPY_BITSOF_INT
-#define BITSOF_LONG NPY_BITSOF_LONG
-#define BITSOF_LONGLONG NPY_BITSOF_LONGLONG
-#define BITSOF_HALF NPY_BITSOF_HALF
-#define BITSOF_FLOAT NPY_BITSOF_FLOAT
-#define BITSOF_DOUBLE NPY_BITSOF_DOUBLE
-#define BITSOF_LONGDOUBLE NPY_BITSOF_LONGDOUBLE
-#define BITSOF_DATETIME NPY_BITSOF_DATETIME
-#define BITSOF_TIMEDELTA NPY_BITSOF_TIMEDELTA
-
-#define _pya_malloc PyArray_malloc
-#define _pya_free PyArray_free
-#define _pya_realloc PyArray_realloc
-
-#define BEGIN_THREADS_DEF NPY_BEGIN_THREADS_DEF
-#define BEGIN_THREADS NPY_BEGIN_THREADS
-#define END_THREADS NPY_END_THREADS
-#define ALLOW_C_API_DEF NPY_ALLOW_C_API_DEF
-#define ALLOW_C_API NPY_ALLOW_C_API
-#define DISABLE_C_API NPY_DISABLE_C_API
-
-#define PY_FAIL NPY_FAIL
-#define PY_SUCCEED NPY_SUCCEED
-
-#ifndef TRUE
-#define TRUE NPY_TRUE
-#endif
-
-#ifndef FALSE
-#define FALSE NPY_FALSE
-#endif
-
-#define LONGDOUBLE_FMT NPY_LONGDOUBLE_FMT
-
-#define CONTIGUOUS NPY_CONTIGUOUS
-#define C_CONTIGUOUS NPY_C_CONTIGUOUS
-#define FORTRAN NPY_FORTRAN
-#define F_CONTIGUOUS NPY_F_CONTIGUOUS
-#define OWNDATA NPY_OWNDATA
-#define FORCECAST NPY_FORCECAST
-#define ENSURECOPY NPY_ENSURECOPY
-#define ENSUREARRAY NPY_ENSUREARRAY
-#define ELEMENTSTRIDES NPY_ELEMENTSTRIDES
-#define ALIGNED NPY_ALIGNED
-#define NOTSWAPPED NPY_NOTSWAPPED
-#define WRITEABLE NPY_WRITEABLE
-#define UPDATEIFCOPY NPY_UPDATEIFCOPY
-#define ARR_HAS_DESCR NPY_ARR_HAS_DESCR
-#define BEHAVED NPY_BEHAVED
-#define BEHAVED_NS NPY_BEHAVED_NS
-#define CARRAY NPY_CARRAY
-#define CARRAY_RO NPY_CARRAY_RO
-#define FARRAY NPY_FARRAY
-#define FARRAY_RO NPY_FARRAY_RO
-#define DEFAULT NPY_DEFAULT
-#define IN_ARRAY NPY_IN_ARRAY
-#define OUT_ARRAY NPY_OUT_ARRAY
-#define INOUT_ARRAY NPY_INOUT_ARRAY
-#define IN_FARRAY NPY_IN_FARRAY
-#define OUT_FARRAY NPY_OUT_FARRAY
-#define INOUT_FARRAY NPY_INOUT_FARRAY
-#define UPDATE_ALL NPY_UPDATE_ALL
-
-#define OWN_DATA NPY_OWNDATA
-#define BEHAVED_FLAGS NPY_BEHAVED
-#define BEHAVED_FLAGS_NS NPY_BEHAVED_NS
-#define CARRAY_FLAGS_RO NPY_CARRAY_RO
-#define CARRAY_FLAGS NPY_CARRAY
-#define FARRAY_FLAGS NPY_FARRAY
-#define FARRAY_FLAGS_RO NPY_FARRAY_RO
-#define DEFAULT_FLAGS NPY_DEFAULT
-#define UPDATE_ALL_FLAGS NPY_UPDATE_ALL_FLAGS
-
-#ifndef MIN
-#define MIN PyArray_MIN
-#endif
-#ifndef MAX
-#define MAX PyArray_MAX
-#endif
-#define MAX_INTP NPY_MAX_INTP
-#define MIN_INTP NPY_MIN_INTP
-#define MAX_UINTP NPY_MAX_UINTP
-#define INTP_FMT NPY_INTP_FMT
-
-#define REFCOUNT PyArray_REFCOUNT
-#define MAX_ELSIZE NPY_MAX_ELSIZE
-
-#endif
diff --git a/include/numpy/npy_3kcompat.h b/include/numpy/npy_3kcompat.h
deleted file mode 100644
index d0cd9ac1a..000000000
--- a/include/numpy/npy_3kcompat.h
+++ /dev/null
@@ -1,417 +0,0 @@
-/*
- * This is a convenience header file providing compatibility utilities
- * for supporting Python 2 and Python 3 in the same code base.
- *
- * If you want to use this for your own projects, it's recommended to make a
- * copy of it. Although the stuff below is unlikely to change, we don't provide
- * strong backwards compatibility guarantees at the moment.
- */
-
-#ifndef _NPY_3KCOMPAT_H_
-#define _NPY_3KCOMPAT_H_
-
-#include
-#include
-
-#if PY_VERSION_HEX >= 0x03000000
-#ifndef NPY_PY3K
-#define NPY_PY3K 1
-#endif
-#endif
-
-#include "numpy/npy_common.h"
-#include "numpy/ndarrayobject.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * PyInt -> PyLong
- */
-
-#if defined(NPY_PY3K)
-/* Return True only if the long fits in a C long */
-static NPY_INLINE int PyInt_Check(PyObject *op) {
- int overflow = 0;
- if (!PyLong_Check(op)) {
- return 0;
- }
- PyLong_AsLongAndOverflow(op, &overflow);
- return (overflow == 0);
-}
-
-#define PyInt_FromLong PyLong_FromLong
-#define PyInt_AsLong PyLong_AsLong
-#define PyInt_AS_LONG PyLong_AsLong
-#define PyInt_AsSsize_t PyLong_AsSsize_t
-
-/* NOTE:
- *
- * Since the PyLong type is very different from the fixed-range PyInt,
- * we don't define PyInt_Type -> PyLong_Type.
- */
-#endif /* NPY_PY3K */
-
-/*
- * PyString -> PyBytes
- */
-
-#if defined(NPY_PY3K)
-
-#define PyString_Type PyBytes_Type
-#define PyString_Check PyBytes_Check
-#define PyStringObject PyBytesObject
-#define PyString_FromString PyBytes_FromString
-#define PyString_FromStringAndSize PyBytes_FromStringAndSize
-#define PyString_AS_STRING PyBytes_AS_STRING
-#define PyString_AsStringAndSize PyBytes_AsStringAndSize
-#define PyString_FromFormat PyBytes_FromFormat
-#define PyString_Concat PyBytes_Concat
-#define PyString_ConcatAndDel PyBytes_ConcatAndDel
-#define PyString_AsString PyBytes_AsString
-#define PyString_GET_SIZE PyBytes_GET_SIZE
-#define PyString_Size PyBytes_Size
-
-#define PyUString_Type PyUnicode_Type
-#define PyUString_Check PyUnicode_Check
-#define PyUStringObject PyUnicodeObject
-#define PyUString_FromString PyUnicode_FromString
-#define PyUString_FromStringAndSize PyUnicode_FromStringAndSize
-#define PyUString_FromFormat PyUnicode_FromFormat
-#define PyUString_Concat PyUnicode_Concat2
-#define PyUString_ConcatAndDel PyUnicode_ConcatAndDel
-#define PyUString_GET_SIZE PyUnicode_GET_SIZE
-#define PyUString_Size PyUnicode_Size
-#define PyUString_InternFromString PyUnicode_InternFromString
-#define PyUString_Format PyUnicode_Format
-
-#else
-
-#define PyBytes_Type PyString_Type
-#define PyBytes_Check PyString_Check
-#define PyBytesObject PyStringObject
-#define PyBytes_FromString PyString_FromString
-#define PyBytes_FromStringAndSize PyString_FromStringAndSize
-#define PyBytes_AS_STRING PyString_AS_STRING
-#define PyBytes_AsStringAndSize PyString_AsStringAndSize
-#define PyBytes_FromFormat PyString_FromFormat
-#define PyBytes_Concat PyString_Concat
-#define PyBytes_ConcatAndDel PyString_ConcatAndDel
-#define PyBytes_AsString PyString_AsString
-#define PyBytes_GET_SIZE PyString_GET_SIZE
-#define PyBytes_Size PyString_Size
-
-#define PyUString_Type PyString_Type
-#define PyUString_Check PyString_Check
-#define PyUStringObject PyStringObject
-#define PyUString_FromString PyString_FromString
-#define PyUString_FromStringAndSize PyString_FromStringAndSize
-#define PyUString_FromFormat PyString_FromFormat
-#define PyUString_Concat PyString_Concat
-#define PyUString_ConcatAndDel PyString_ConcatAndDel
-#define PyUString_GET_SIZE PyString_GET_SIZE
-#define PyUString_Size PyString_Size
-#define PyUString_InternFromString PyString_InternFromString
-#define PyUString_Format PyString_Format
-
-#endif /* NPY_PY3K */
-
-
-static NPY_INLINE void
-PyUnicode_ConcatAndDel(PyObject **left, PyObject *right)
-{
- PyObject *newobj;
- newobj = PyUnicode_Concat(*left, right);
- Py_DECREF(*left);
- Py_DECREF(right);
- *left = newobj;
-}
-
-static NPY_INLINE void
-PyUnicode_Concat2(PyObject **left, PyObject *right)
-{
- PyObject *newobj;
- newobj = PyUnicode_Concat(*left, right);
- Py_DECREF(*left);
- *left = newobj;
-}
-
-/*
- * PyFile_* compatibility
- */
-#if defined(NPY_PY3K)
-
-/*
- * Get a FILE* handle to the file represented by the Python object
- */
-static NPY_INLINE FILE*
-npy_PyFile_Dup(PyObject *file, char *mode)
-{
- int fd, fd2;
- PyObject *ret, *os;
- Py_ssize_t pos;
- FILE *handle;
- /* Flush first to ensure things end up in the file in the correct order */
- ret = PyObject_CallMethod(file, "flush", "");
- if (ret == NULL) {
- return NULL;
- }
- Py_DECREF(ret);
- fd = PyObject_AsFileDescriptor(file);
- if (fd == -1) {
- return NULL;
- }
- os = PyImport_ImportModule("os");
- if (os == NULL) {
- return NULL;
- }
- ret = PyObject_CallMethod(os, "dup", "i", fd);
- Py_DECREF(os);
- if (ret == NULL) {
- return NULL;
- }
- fd2 = PyNumber_AsSsize_t(ret, NULL);
- Py_DECREF(ret);
-#ifdef _WIN32
- handle = _fdopen(fd2, mode);
-#else
- handle = fdopen(fd2, mode);
-#endif
- if (handle == NULL) {
- PyErr_SetString(PyExc_IOError,
- "Getting a FILE* from a Python file object failed");
- }
- ret = PyObject_CallMethod(file, "tell", "");
- if (ret == NULL) {
- fclose(handle);
- return NULL;
- }
- pos = PyNumber_AsSsize_t(ret, PyExc_OverflowError);
- Py_DECREF(ret);
- if (PyErr_Occurred()) {
- fclose(handle);
- return NULL;
- }
- npy_fseek(handle, pos, SEEK_SET);
- return handle;
-}
-
-/*
- * Close the dup-ed file handle, and seek the Python one to the current position
- */
-static NPY_INLINE int
-npy_PyFile_DupClose(PyObject *file, FILE* handle)
-{
- PyObject *ret;
- Py_ssize_t position;
- position = npy_ftell(handle);
- fclose(handle);
-
- ret = PyObject_CallMethod(file, "seek", NPY_SSIZE_T_PYFMT "i", position, 0);
- if (ret == NULL) {
- return -1;
- }
- Py_DECREF(ret);
- return 0;
-}
-
-static NPY_INLINE int
-npy_PyFile_Check(PyObject *file)
-{
- int fd;
- fd = PyObject_AsFileDescriptor(file);
- if (fd == -1) {
- PyErr_Clear();
- return 0;
- }
- return 1;
-}
-
-#else
-
-#define npy_PyFile_Dup(file, mode) PyFile_AsFile(file)
-#define npy_PyFile_DupClose(file, handle) (0)
-#define npy_PyFile_Check PyFile_Check
-
-#endif
-
-static NPY_INLINE PyObject*
-npy_PyFile_OpenFile(PyObject *filename, const char *mode)
-{
- PyObject *open;
- open = PyDict_GetItemString(PyEval_GetBuiltins(), "open");
- if (open == NULL) {
- return NULL;
- }
- return PyObject_CallFunction(open, "Os", filename, mode);
-}
-
-static NPY_INLINE int
-npy_PyFile_CloseFile(PyObject *file)
-{
- PyObject *ret;
-
- ret = PyObject_CallMethod(file, "close", NULL);
- if (ret == NULL) {
- return -1;
- }
- Py_DECREF(ret);
- return 0;
-}
-
-/*
- * PyObject_Cmp
- */
-#if defined(NPY_PY3K)
-static NPY_INLINE int
-PyObject_Cmp(PyObject *i1, PyObject *i2, int *cmp)
-{
- int v;
- v = PyObject_RichCompareBool(i1, i2, Py_LT);
- if (v == 0) {
- *cmp = -1;
- return 1;
- }
- else if (v == -1) {
- return -1;
- }
-
- v = PyObject_RichCompareBool(i1, i2, Py_GT);
- if (v == 0) {
- *cmp = 1;
- return 1;
- }
- else if (v == -1) {
- return -1;
- }
-
- v = PyObject_RichCompareBool(i1, i2, Py_EQ);
- if (v == 0) {
- *cmp = 0;
- return 1;
- }
- else {
- *cmp = 0;
- return -1;
- }
-}
-#endif
-
-/*
- * PyCObject functions adapted to PyCapsules.
- *
- * The main job here is to get rid of the improved error handling
- * of PyCapsules. It's a shame...
- */
-#if PY_VERSION_HEX >= 0x03000000
-
-static NPY_INLINE PyObject *
-NpyCapsule_FromVoidPtr(void *ptr, void (*dtor)(PyObject *))
-{
- PyObject *ret = PyCapsule_New(ptr, NULL, dtor);
- if (ret == NULL) {
- PyErr_Clear();
- }
- return ret;
-}
-
-static NPY_INLINE PyObject *
-NpyCapsule_FromVoidPtrAndDesc(void *ptr, void* context, void (*dtor)(PyObject *))
-{
- PyObject *ret = NpyCapsule_FromVoidPtr(ptr, dtor);
- if (ret != NULL && PyCapsule_SetContext(ret, context) != 0) {
- PyErr_Clear();
- Py_DECREF(ret);
- ret = NULL;
- }
- return ret;
-}
-
-static NPY_INLINE void *
-NpyCapsule_AsVoidPtr(PyObject *obj)
-{
- void *ret = PyCapsule_GetPointer(obj, NULL);
- if (ret == NULL) {
- PyErr_Clear();
- }
- return ret;
-}
-
-static NPY_INLINE void *
-NpyCapsule_GetDesc(PyObject *obj)
-{
- return PyCapsule_GetContext(obj);
-}
-
-static NPY_INLINE int
-NpyCapsule_Check(PyObject *ptr)
-{
- return PyCapsule_CheckExact(ptr);
-}
-
-static NPY_INLINE void
-simple_capsule_dtor(PyObject *cap)
-{
- PyArray_free(PyCapsule_GetPointer(cap, NULL));
-}
-
-#else
-
-static NPY_INLINE PyObject *
-NpyCapsule_FromVoidPtr(void *ptr, void (*dtor)(void *))
-{
- return PyCObject_FromVoidPtr(ptr, dtor);
-}
-
-static NPY_INLINE PyObject *
-NpyCapsule_FromVoidPtrAndDesc(void *ptr, void* context,
- void (*dtor)(void *, void *))
-{
- return PyCObject_FromVoidPtrAndDesc(ptr, context, dtor);
-}
-
-static NPY_INLINE void *
-NpyCapsule_AsVoidPtr(PyObject *ptr)
-{
- return PyCObject_AsVoidPtr(ptr);
-}
-
-static NPY_INLINE void *
-NpyCapsule_GetDesc(PyObject *obj)
-{
- return PyCObject_GetDesc(obj);
-}
-
-static NPY_INLINE int
-NpyCapsule_Check(PyObject *ptr)
-{
- return PyCObject_Check(ptr);
-}
-
-static NPY_INLINE void
-simple_capsule_dtor(void *ptr)
-{
- PyArray_free(ptr);
-}
-
-#endif
-
-/*
- * Hash value compatibility.
- * As of Python 3.2 hash values are of type Py_hash_t.
- * Previous versions use C long.
- */
-#if PY_VERSION_HEX < 0x03020000
-typedef long npy_hash_t;
-#define NPY_SIZEOF_HASH_T NPY_SIZEOF_LONG
-#else
-typedef Py_hash_t npy_hash_t;
-#define NPY_SIZEOF_HASH_T NPY_SIZEOF_INTP
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NPY_3KCOMPAT_H_ */
diff --git a/include/numpy/npy_common.h b/include/numpy/npy_common.h
deleted file mode 100644
index 7fca7e220..000000000
--- a/include/numpy/npy_common.h
+++ /dev/null
@@ -1,930 +0,0 @@
-#ifndef _NPY_COMMON_H_
-#define _NPY_COMMON_H_
-
-/* numpconfig.h is auto-generated */
-#include "numpyconfig.h"
-
-#if defined(_MSC_VER)
- #define NPY_INLINE __inline
-#elif defined(__GNUC__)
- #if defined(__STRICT_ANSI__)
- #define NPY_INLINE __inline__
- #else
- #define NPY_INLINE inline
- #endif
-#else
- #define NPY_INLINE
-#endif
-
-/* Enable 64 bit file position support on win-amd64. Ticket #1660 */
-#if defined(_MSC_VER) && defined(_WIN64) && (_MSC_VER > 1400)
- #define npy_fseek _fseeki64
- #define npy_ftell _ftelli64
-#else
- #define npy_fseek fseek
- #define npy_ftell ftell
-#endif
-
-/* enums for detected endianness */
-enum {
- NPY_CPU_UNKNOWN_ENDIAN,
- NPY_CPU_LITTLE,
- NPY_CPU_BIG
-};
-
-/*
- * This is to typedef npy_intp to the appropriate pointer size for
- * this platform. Py_intptr_t, Py_uintptr_t are defined in pyport.h.
- */
-typedef Py_intptr_t npy_intp;
-typedef Py_uintptr_t npy_uintp;
-#define NPY_SIZEOF_CHAR 1
-#define NPY_SIZEOF_BYTE 1
-#define NPY_SIZEOF_INTP NPY_SIZEOF_PY_INTPTR_T
-#define NPY_SIZEOF_UINTP NPY_SIZEOF_PY_INTPTR_T
-#define NPY_SIZEOF_CFLOAT NPY_SIZEOF_COMPLEX_FLOAT
-#define NPY_SIZEOF_CDOUBLE NPY_SIZEOF_COMPLEX_DOUBLE
-#define NPY_SIZEOF_CLONGDOUBLE NPY_SIZEOF_COMPLEX_LONGDOUBLE
-
-#ifdef constchar
-#undef constchar
-#endif
-
-#if (PY_VERSION_HEX < 0x02050000)
- #ifndef PY_SSIZE_T_MIN
- typedef int Py_ssize_t;
- #define PY_SSIZE_T_MAX INT_MAX
- #define PY_SSIZE_T_MIN INT_MIN
- #endif
-#define NPY_SSIZE_T_PYFMT "i"
-#define constchar const char
-#else
-#define NPY_SSIZE_T_PYFMT "n"
-#define constchar char
-#endif
-
-/* NPY_INTP_FMT Note:
- * Unlike the other NPY_*_FMT macros which are used with
- * PyOS_snprintf, NPY_INTP_FMT is used with PyErr_Format and
- * PyString_Format. These functions use different formatting
- * codes which are portably specified according to the Python
- * documentation. See ticket #1795.
- *
- * On Windows x64, the LONGLONG formatter should be used, but
- * in Python 2.6 the %lld formatter is not supported. In this
- * case we work around the problem by using the %zd formatter.
- */
-#if NPY_SIZEOF_PY_INTPTR_T == NPY_SIZEOF_INT
- #define NPY_INTP NPY_INT
- #define NPY_UINTP NPY_UINT
- #define PyIntpArrType_Type PyIntArrType_Type
- #define PyUIntpArrType_Type PyUIntArrType_Type
- #define NPY_MAX_INTP NPY_MAX_INT
- #define NPY_MIN_INTP NPY_MIN_INT
- #define NPY_MAX_UINTP NPY_MAX_UINT
- #define NPY_INTP_FMT "d"
-#elif NPY_SIZEOF_PY_INTPTR_T == NPY_SIZEOF_LONG
- #define NPY_INTP NPY_LONG
- #define NPY_UINTP NPY_ULONG
- #define PyIntpArrType_Type PyLongArrType_Type
- #define PyUIntpArrType_Type PyULongArrType_Type
- #define NPY_MAX_INTP NPY_MAX_LONG
- #define NPY_MIN_INTP NPY_MIN_LONG
- #define NPY_MAX_UINTP NPY_MAX_ULONG
- #define NPY_INTP_FMT "ld"
-#elif defined(PY_LONG_LONG) && (NPY_SIZEOF_PY_INTPTR_T == NPY_SIZEOF_LONGLONG)
- #define NPY_INTP NPY_LONGLONG
- #define NPY_UINTP NPY_ULONGLONG
- #define PyIntpArrType_Type PyLongLongArrType_Type
- #define PyUIntpArrType_Type PyULongLongArrType_Type
- #define NPY_MAX_INTP NPY_MAX_LONGLONG
- #define NPY_MIN_INTP NPY_MIN_LONGLONG
- #define NPY_MAX_UINTP NPY_MAX_ULONGLONG
- #if (PY_VERSION_HEX >= 0x02070000)
- #define NPY_INTP_FMT "lld"
- #else
- #define NPY_INTP_FMT "zd"
- #endif
-#endif
-
-/*
- * We can only use C99 formats for npy_int_p if it is the same as
- * intp_t, hence the condition on HAVE_UNITPTR_T
- */
-#if (NPY_USE_C99_FORMATS) == 1 \
- && (defined HAVE_UINTPTR_T) \
- && (defined HAVE_INTTYPES_H)
- #include
- #undef NPY_INTP_FMT
- #define NPY_INTP_FMT PRIdPTR
-#endif
-
-
-/*
- * Some platforms don't define bool, long long, or long double.
- * Handle that here.
- */
-#define NPY_BYTE_FMT "hhd"
-#define NPY_UBYTE_FMT "hhu"
-#define NPY_SHORT_FMT "hd"
-#define NPY_USHORT_FMT "hu"
-#define NPY_INT_FMT "d"
-#define NPY_UINT_FMT "u"
-#define NPY_LONG_FMT "ld"
-#define NPY_ULONG_FMT "lu"
-#define NPY_HALF_FMT "g"
-#define NPY_FLOAT_FMT "g"
-#define NPY_DOUBLE_FMT "g"
-
-
-#ifdef PY_LONG_LONG
-typedef PY_LONG_LONG npy_longlong;
-typedef unsigned PY_LONG_LONG npy_ulonglong;
-# ifdef _MSC_VER
-# define NPY_LONGLONG_FMT "I64d"
-# define NPY_ULONGLONG_FMT "I64u"
-# elif defined(__APPLE__) || defined(__FreeBSD__)
-/* "%Ld" only parses 4 bytes -- "L" is floating modifier on MacOS X/BSD */
-# define NPY_LONGLONG_FMT "lld"
-# define NPY_ULONGLONG_FMT "llu"
-/*
- another possible variant -- *quad_t works on *BSD, but is deprecated:
- #define LONGLONG_FMT "qd"
- #define ULONGLONG_FMT "qu"
-*/
-# else
-# define NPY_LONGLONG_FMT "Ld"
-# define NPY_ULONGLONG_FMT "Lu"
-# endif
-# ifdef _MSC_VER
-# define NPY_LONGLONG_SUFFIX(x) (x##i64)
-# define NPY_ULONGLONG_SUFFIX(x) (x##Ui64)
-# else
-# define NPY_LONGLONG_SUFFIX(x) (x##LL)
-# define NPY_ULONGLONG_SUFFIX(x) (x##ULL)
-# endif
-#else
-typedef long npy_longlong;
-typedef unsigned long npy_ulonglong;
-# define NPY_LONGLONG_SUFFIX(x) (x##L)
-# define NPY_ULONGLONG_SUFFIX(x) (x##UL)
-#endif
-
-
-typedef unsigned char npy_bool;
-#define NPY_FALSE 0
-#define NPY_TRUE 1
-
-
-#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE
- typedef double npy_longdouble;
- #define NPY_LONGDOUBLE_FMT "g"
-#else
- typedef long double npy_longdouble;
- #define NPY_LONGDOUBLE_FMT "Lg"
-#endif
-
-#ifndef Py_USING_UNICODE
-#error Must use Python with unicode enabled.
-#endif
-
-
-typedef signed char npy_byte;
-typedef unsigned char npy_ubyte;
-typedef unsigned short npy_ushort;
-typedef unsigned int npy_uint;
-typedef unsigned long npy_ulong;
-
-/* These are for completeness */
-typedef char npy_char;
-typedef short npy_short;
-typedef int npy_int;
-typedef long npy_long;
-typedef float npy_float;
-typedef double npy_double;
-
-/*
- * Disabling C99 complex usage: a lot of C code in numpy/scipy rely on being
- * able to do .real/.imag. Will have to convert code first.
- */
-#if 0
-#if defined(NPY_USE_C99_COMPLEX) && defined(NPY_HAVE_COMPLEX_DOUBLE)
-typedef complex npy_cdouble;
-#else
-typedef struct { double real, imag; } npy_cdouble;
-#endif
-
-#if defined(NPY_USE_C99_COMPLEX) && defined(NPY_HAVE_COMPLEX_FLOAT)
-typedef complex float npy_cfloat;
-#else
-typedef struct { float real, imag; } npy_cfloat;
-#endif
-
-#if defined(NPY_USE_C99_COMPLEX) && defined(NPY_HAVE_COMPLEX_LONG_DOUBLE)
-typedef complex long double npy_clongdouble;
-#else
-typedef struct {npy_longdouble real, imag;} npy_clongdouble;
-#endif
-#endif
-#if NPY_SIZEOF_COMPLEX_DOUBLE != 2 * NPY_SIZEOF_DOUBLE
-#error npy_cdouble definition is not compatible with C99 complex definition ! \
- Please contact Numpy maintainers and give detailed information about your \
- compiler and platform
-#endif
-typedef struct { double real, imag; } npy_cdouble;
-
-#if NPY_SIZEOF_COMPLEX_FLOAT != 2 * NPY_SIZEOF_FLOAT
-#error npy_cfloat definition is not compatible with C99 complex definition ! \
- Please contact Numpy maintainers and give detailed information about your \
- compiler and platform
-#endif
-typedef struct { float real, imag; } npy_cfloat;
-
-#if NPY_SIZEOF_COMPLEX_LONGDOUBLE != 2 * NPY_SIZEOF_LONGDOUBLE
-#error npy_clongdouble definition is not compatible with C99 complex definition ! \
- Please contact Numpy maintainers and give detailed information about your \
- compiler and platform
-#endif
-typedef struct { npy_longdouble real, imag; } npy_clongdouble;
-
-/*
- * numarray-style bit-width typedefs
- */
-#define NPY_MAX_INT8 127
-#define NPY_MIN_INT8 -128
-#define NPY_MAX_UINT8 255
-#define NPY_MAX_INT16 32767
-#define NPY_MIN_INT16 -32768
-#define NPY_MAX_UINT16 65535
-#define NPY_MAX_INT32 2147483647
-#define NPY_MIN_INT32 (-NPY_MAX_INT32 - 1)
-#define NPY_MAX_UINT32 4294967295U
-#define NPY_MAX_INT64 NPY_LONGLONG_SUFFIX(9223372036854775807)
-#define NPY_MIN_INT64 (-NPY_MAX_INT64 - NPY_LONGLONG_SUFFIX(1))
-#define NPY_MAX_UINT64 NPY_ULONGLONG_SUFFIX(18446744073709551615)
-#define NPY_MAX_INT128 NPY_LONGLONG_SUFFIX(85070591730234615865843651857942052864)
-#define NPY_MIN_INT128 (-NPY_MAX_INT128 - NPY_LONGLONG_SUFFIX(1))
-#define NPY_MAX_UINT128 NPY_ULONGLONG_SUFFIX(170141183460469231731687303715884105728)
-#define NPY_MAX_INT256 NPY_LONGLONG_SUFFIX(57896044618658097711785492504343953926634992332820282019728792003956564819967)
-#define NPY_MIN_INT256 (-NPY_MAX_INT256 - NPY_LONGLONG_SUFFIX(1))
-#define NPY_MAX_UINT256 NPY_ULONGLONG_SUFFIX(115792089237316195423570985008687907853269984665640564039457584007913129639935)
-#define NPY_MIN_DATETIME NPY_MIN_INT64
-#define NPY_MAX_DATETIME NPY_MAX_INT64
-#define NPY_MIN_TIMEDELTA NPY_MIN_INT64
-#define NPY_MAX_TIMEDELTA NPY_MAX_INT64
-
- /* Need to find the number of bits for each type and
- make definitions accordingly.
-
- C states that sizeof(char) == 1 by definition
-
- So, just using the sizeof keyword won't help.
-
- It also looks like Python itself uses sizeof(char) quite a
- bit, which by definition should be 1 all the time.
-
- Idea: Make Use of CHAR_BIT which should tell us how many
- BITS per CHARACTER
- */
-
- /* Include platform definitions -- These are in the C89/90 standard */
-#include
-#define NPY_MAX_BYTE SCHAR_MAX
-#define NPY_MIN_BYTE SCHAR_MIN
-#define NPY_MAX_UBYTE UCHAR_MAX
-#define NPY_MAX_SHORT SHRT_MAX
-#define NPY_MIN_SHORT SHRT_MIN
-#define NPY_MAX_USHORT USHRT_MAX
-#define NPY_MAX_INT INT_MAX
-#ifndef INT_MIN
-#define INT_MIN (-INT_MAX - 1)
-#endif
-#define NPY_MIN_INT INT_MIN
-#define NPY_MAX_UINT UINT_MAX
-#define NPY_MAX_LONG LONG_MAX
-#define NPY_MIN_LONG LONG_MIN
-#define NPY_MAX_ULONG ULONG_MAX
-
-#define NPY_SIZEOF_HALF 2
-#define NPY_SIZEOF_DATETIME 8
-#define NPY_SIZEOF_TIMEDELTA 8
-
-#define NPY_BITSOF_BOOL (sizeof(npy_bool) * CHAR_BIT)
-#define NPY_BITSOF_CHAR CHAR_BIT
-#define NPY_BITSOF_BYTE (NPY_SIZEOF_BYTE * CHAR_BIT)
-#define NPY_BITSOF_SHORT (NPY_SIZEOF_SHORT * CHAR_BIT)
-#define NPY_BITSOF_INT (NPY_SIZEOF_INT * CHAR_BIT)
-#define NPY_BITSOF_LONG (NPY_SIZEOF_LONG * CHAR_BIT)
-#define NPY_BITSOF_LONGLONG (NPY_SIZEOF_LONGLONG * CHAR_BIT)
-#define NPY_BITSOF_INTP (NPY_SIZEOF_INTP * CHAR_BIT)
-#define NPY_BITSOF_HALF (NPY_SIZEOF_HALF * CHAR_BIT)
-#define NPY_BITSOF_FLOAT (NPY_SIZEOF_FLOAT * CHAR_BIT)
-#define NPY_BITSOF_DOUBLE (NPY_SIZEOF_DOUBLE * CHAR_BIT)
-#define NPY_BITSOF_LONGDOUBLE (NPY_SIZEOF_LONGDOUBLE * CHAR_BIT)
-#define NPY_BITSOF_CFLOAT (NPY_SIZEOF_CFLOAT * CHAR_BIT)
-#define NPY_BITSOF_CDOUBLE (NPY_SIZEOF_CDOUBLE * CHAR_BIT)
-#define NPY_BITSOF_CLONGDOUBLE (NPY_SIZEOF_CLONGDOUBLE * CHAR_BIT)
-#define NPY_BITSOF_DATETIME (NPY_SIZEOF_DATETIME * CHAR_BIT)
-#define NPY_BITSOF_TIMEDELTA (NPY_SIZEOF_TIMEDELTA * CHAR_BIT)
-
-#if NPY_BITSOF_LONG == 8
-#define NPY_INT8 NPY_LONG
-#define NPY_UINT8 NPY_ULONG
- typedef long npy_int8;
- typedef unsigned long npy_uint8;
-#define PyInt8ScalarObject PyLongScalarObject
-#define PyInt8ArrType_Type PyLongArrType_Type
-#define PyUInt8ScalarObject PyULongScalarObject
-#define PyUInt8ArrType_Type PyULongArrType_Type
-#define NPY_INT8_FMT NPY_LONG_FMT
-#define NPY_UINT8_FMT NPY_ULONG_FMT
-#elif NPY_BITSOF_LONG == 16
-#define NPY_INT16 NPY_LONG
-#define NPY_UINT16 NPY_ULONG
- typedef long npy_int16;
- typedef unsigned long npy_uint16;
-#define PyInt16ScalarObject PyLongScalarObject
-#define PyInt16ArrType_Type PyLongArrType_Type
-#define PyUInt16ScalarObject PyULongScalarObject
-#define PyUInt16ArrType_Type PyULongArrType_Type
-#define NPY_INT16_FMT NPY_LONG_FMT
-#define NPY_UINT16_FMT NPY_ULONG_FMT
-#elif NPY_BITSOF_LONG == 32
-#define NPY_INT32 NPY_LONG
-#define NPY_UINT32 NPY_ULONG
- typedef long npy_int32;
- typedef unsigned long npy_uint32;
- typedef unsigned long npy_ucs4;
-#define PyInt32ScalarObject PyLongScalarObject
-#define PyInt32ArrType_Type PyLongArrType_Type
-#define PyUInt32ScalarObject PyULongScalarObject
-#define PyUInt32ArrType_Type PyULongArrType_Type
-#define NPY_INT32_FMT NPY_LONG_FMT
-#define NPY_UINT32_FMT NPY_ULONG_FMT
-#elif NPY_BITSOF_LONG == 64
-#define NPY_INT64 NPY_LONG
-#define NPY_UINT64 NPY_ULONG
- typedef long npy_int64;
- typedef unsigned long npy_uint64;
-#define PyInt64ScalarObject PyLongScalarObject
-#define PyInt64ArrType_Type PyLongArrType_Type
-#define PyUInt64ScalarObject PyULongScalarObject
-#define PyUInt64ArrType_Type PyULongArrType_Type
-#define NPY_INT64_FMT NPY_LONG_FMT
-#define NPY_UINT64_FMT NPY_ULONG_FMT
-#define MyPyLong_FromInt64 PyLong_FromLong
-#define MyPyLong_AsInt64 PyLong_AsLong
-#elif NPY_BITSOF_LONG == 128
-#define NPY_INT128 NPY_LONG
-#define NPY_UINT128 NPY_ULONG
- typedef long npy_int128;
- typedef unsigned long npy_uint128;
-#define PyInt128ScalarObject PyLongScalarObject
-#define PyInt128ArrType_Type PyLongArrType_Type
-#define PyUInt128ScalarObject PyULongScalarObject
-#define PyUInt128ArrType_Type PyULongArrType_Type
-#define NPY_INT128_FMT NPY_LONG_FMT
-#define NPY_UINT128_FMT NPY_ULONG_FMT
-#endif
-
-#if NPY_BITSOF_LONGLONG == 8
-# ifndef NPY_INT8
-# define NPY_INT8 NPY_LONGLONG
-# define NPY_UINT8 NPY_ULONGLONG
- typedef npy_longlong npy_int8;
- typedef npy_ulonglong npy_uint8;
-# define PyInt8ScalarObject PyLongLongScalarObject
-# define PyInt8ArrType_Type PyLongLongArrType_Type
-# define PyUInt8ScalarObject PyULongLongScalarObject
-# define PyUInt8ArrType_Type PyULongLongArrType_Type
-#define NPY_INT8_FMT NPY_LONGLONG_FMT
-#define NPY_UINT8_FMT NPY_ULONGLONG_FMT
-# endif
-# define NPY_MAX_LONGLONG NPY_MAX_INT8
-# define NPY_MIN_LONGLONG NPY_MIN_INT8
-# define NPY_MAX_ULONGLONG NPY_MAX_UINT8
-#elif NPY_BITSOF_LONGLONG == 16
-# ifndef NPY_INT16
-# define NPY_INT16 NPY_LONGLONG
-# define NPY_UINT16 NPY_ULONGLONG
- typedef npy_longlong npy_int16;
- typedef npy_ulonglong npy_uint16;
-# define PyInt16ScalarObject PyLongLongScalarObject
-# define PyInt16ArrType_Type PyLongLongArrType_Type
-# define PyUInt16ScalarObject PyULongLongScalarObject
-# define PyUInt16ArrType_Type PyULongLongArrType_Type
-#define NPY_INT16_FMT NPY_LONGLONG_FMT
-#define NPY_UINT16_FMT NPY_ULONGLONG_FMT
-# endif
-# define NPY_MAX_LONGLONG NPY_MAX_INT16
-# define NPY_MIN_LONGLONG NPY_MIN_INT16
-# define NPY_MAX_ULONGLONG NPY_MAX_UINT16
-#elif NPY_BITSOF_LONGLONG == 32
-# ifndef NPY_INT32
-# define NPY_INT32 NPY_LONGLONG
-# define NPY_UINT32 NPY_ULONGLONG
- typedef npy_longlong npy_int32;
- typedef npy_ulonglong npy_uint32;
- typedef npy_ulonglong npy_ucs4;
-# define PyInt32ScalarObject PyLongLongScalarObject
-# define PyInt32ArrType_Type PyLongLongArrType_Type
-# define PyUInt32ScalarObject PyULongLongScalarObject
-# define PyUInt32ArrType_Type PyULongLongArrType_Type
-#define NPY_INT32_FMT NPY_LONGLONG_FMT
-#define NPY_UINT32_FMT NPY_ULONGLONG_FMT
-# endif
-# define NPY_MAX_LONGLONG NPY_MAX_INT32
-# define NPY_MIN_LONGLONG NPY_MIN_INT32
-# define NPY_MAX_ULONGLONG NPY_MAX_UINT32
-#elif NPY_BITSOF_LONGLONG == 64
-# ifndef NPY_INT64
-# define NPY_INT64 NPY_LONGLONG
-# define NPY_UINT64 NPY_ULONGLONG
- typedef npy_longlong npy_int64;
- typedef npy_ulonglong npy_uint64;
-# define PyInt64ScalarObject PyLongLongScalarObject
-# define PyInt64ArrType_Type PyLongLongArrType_Type
-# define PyUInt64ScalarObject PyULongLongScalarObject
-# define PyUInt64ArrType_Type PyULongLongArrType_Type
-#define NPY_INT64_FMT NPY_LONGLONG_FMT
-#define NPY_UINT64_FMT NPY_ULONGLONG_FMT
-# define MyPyLong_FromInt64 PyLong_FromLongLong
-# define MyPyLong_AsInt64 PyLong_AsLongLong
-# endif
-# define NPY_MAX_LONGLONG NPY_MAX_INT64
-# define NPY_MIN_LONGLONG NPY_MIN_INT64
-# define NPY_MAX_ULONGLONG NPY_MAX_UINT64
-#elif NPY_BITSOF_LONGLONG == 128
-# ifndef NPY_INT128
-# define NPY_INT128 NPY_LONGLONG
-# define NPY_UINT128 NPY_ULONGLONG
- typedef npy_longlong npy_int128;
- typedef npy_ulonglong npy_uint128;
-# define PyInt128ScalarObject PyLongLongScalarObject
-# define PyInt128ArrType_Type PyLongLongArrType_Type
-# define PyUInt128ScalarObject PyULongLongScalarObject
-# define PyUInt128ArrType_Type PyULongLongArrType_Type
-#define NPY_INT128_FMT NPY_LONGLONG_FMT
-#define NPY_UINT128_FMT NPY_ULONGLONG_FMT
-# endif
-# define NPY_MAX_LONGLONG NPY_MAX_INT128
-# define NPY_MIN_LONGLONG NPY_MIN_INT128
-# define NPY_MAX_ULONGLONG NPY_MAX_UINT128
-#elif NPY_BITSOF_LONGLONG == 256
-# define NPY_INT256 NPY_LONGLONG
-# define NPY_UINT256 NPY_ULONGLONG
- typedef npy_longlong npy_int256;
- typedef npy_ulonglong npy_uint256;
-# define PyInt256ScalarObject PyLongLongScalarObject
-# define PyInt256ArrType_Type PyLongLongArrType_Type
-# define PyUInt256ScalarObject PyULongLongScalarObject
-# define PyUInt256ArrType_Type PyULongLongArrType_Type
-#define NPY_INT256_FMT NPY_LONGLONG_FMT
-#define NPY_UINT256_FMT NPY_ULONGLONG_FMT
-# define NPY_MAX_LONGLONG NPY_MAX_INT256
-# define NPY_MIN_LONGLONG NPY_MIN_INT256
-# define NPY_MAX_ULONGLONG NPY_MAX_UINT256
-#endif
-
-#if NPY_BITSOF_INT == 8
-#ifndef NPY_INT8
-#define NPY_INT8 NPY_INT
-#define NPY_UINT8 NPY_UINT
- typedef int npy_int8;
- typedef unsigned int npy_uint8;
-# define PyInt8ScalarObject PyIntScalarObject
-# define PyInt8ArrType_Type PyIntArrType_Type
-# define PyUInt8ScalarObject PyUIntScalarObject
-# define PyUInt8ArrType_Type PyUIntArrType_Type
-#define NPY_INT8_FMT NPY_INT_FMT
-#define NPY_UINT8_FMT NPY_UINT_FMT
-#endif
-#elif NPY_BITSOF_INT == 16
-#ifndef NPY_INT16
-#define NPY_INT16 NPY_INT
-#define NPY_UINT16 NPY_UINT
- typedef int npy_int16;
- typedef unsigned int npy_uint16;
-# define PyInt16ScalarObject PyIntScalarObject
-# define PyInt16ArrType_Type PyIntArrType_Type
-# define PyUInt16ScalarObject PyIntUScalarObject
-# define PyUInt16ArrType_Type PyIntUArrType_Type
-#define NPY_INT16_FMT NPY_INT_FMT
-#define NPY_UINT16_FMT NPY_UINT_FMT
-#endif
-#elif NPY_BITSOF_INT == 32
-#ifndef NPY_INT32
-#define NPY_INT32 NPY_INT
-#define NPY_UINT32 NPY_UINT
- typedef int npy_int32;
- typedef unsigned int npy_uint32;
- typedef unsigned int npy_ucs4;
-# define PyInt32ScalarObject PyIntScalarObject
-# define PyInt32ArrType_Type PyIntArrType_Type
-# define PyUInt32ScalarObject PyUIntScalarObject
-# define PyUInt32ArrType_Type PyUIntArrType_Type
-#define NPY_INT32_FMT NPY_INT_FMT
-#define NPY_UINT32_FMT NPY_UINT_FMT
-#endif
-#elif NPY_BITSOF_INT == 64
-#ifndef NPY_INT64
-#define NPY_INT64 NPY_INT
-#define NPY_UINT64 NPY_UINT
- typedef int npy_int64;
- typedef unsigned int npy_uint64;
-# define PyInt64ScalarObject PyIntScalarObject
-# define PyInt64ArrType_Type PyIntArrType_Type
-# define PyUInt64ScalarObject PyUIntScalarObject
-# define PyUInt64ArrType_Type PyUIntArrType_Type
-#define NPY_INT64_FMT NPY_INT_FMT
-#define NPY_UINT64_FMT NPY_UINT_FMT
-# define MyPyLong_FromInt64 PyLong_FromLong
-# define MyPyLong_AsInt64 PyLong_AsLong
-#endif
-#elif NPY_BITSOF_INT == 128
-#ifndef NPY_INT128
-#define NPY_INT128 NPY_INT
-#define NPY_UINT128 NPY_UINT
- typedef int npy_int128;
- typedef unsigned int npy_uint128;
-# define PyInt128ScalarObject PyIntScalarObject
-# define PyInt128ArrType_Type PyIntArrType_Type
-# define PyUInt128ScalarObject PyUIntScalarObject
-# define PyUInt128ArrType_Type PyUIntArrType_Type
-#define NPY_INT128_FMT NPY_INT_FMT
-#define NPY_UINT128_FMT NPY_UINT_FMT
-#endif
-#endif
-
-#if NPY_BITSOF_SHORT == 8
-#ifndef NPY_INT8
-#define NPY_INT8 NPY_SHORT
-#define NPY_UINT8 NPY_USHORT
- typedef short npy_int8;
- typedef unsigned short npy_uint8;
-# define PyInt8ScalarObject PyShortScalarObject
-# define PyInt8ArrType_Type PyShortArrType_Type
-# define PyUInt8ScalarObject PyUShortScalarObject
-# define PyUInt8ArrType_Type PyUShortArrType_Type
-#define NPY_INT8_FMT NPY_SHORT_FMT
-#define NPY_UINT8_FMT NPY_USHORT_FMT
-#endif
-#elif NPY_BITSOF_SHORT == 16
-#ifndef NPY_INT16
-#define NPY_INT16 NPY_SHORT
-#define NPY_UINT16 NPY_USHORT
- typedef short npy_int16;
- typedef unsigned short npy_uint16;
-# define PyInt16ScalarObject PyShortScalarObject
-# define PyInt16ArrType_Type PyShortArrType_Type
-# define PyUInt16ScalarObject PyUShortScalarObject
-# define PyUInt16ArrType_Type PyUShortArrType_Type
-#define NPY_INT16_FMT NPY_SHORT_FMT
-#define NPY_UINT16_FMT NPY_USHORT_FMT
-#endif
-#elif NPY_BITSOF_SHORT == 32
-#ifndef NPY_INT32
-#define NPY_INT32 NPY_SHORT
-#define NPY_UINT32 NPY_USHORT
- typedef short npy_int32;
- typedef unsigned short npy_uint32;
- typedef unsigned short npy_ucs4;
-# define PyInt32ScalarObject PyShortScalarObject
-# define PyInt32ArrType_Type PyShortArrType_Type
-# define PyUInt32ScalarObject PyUShortScalarObject
-# define PyUInt32ArrType_Type PyUShortArrType_Type
-#define NPY_INT32_FMT NPY_SHORT_FMT
-#define NPY_UINT32_FMT NPY_USHORT_FMT
-#endif
-#elif NPY_BITSOF_SHORT == 64
-#ifndef NPY_INT64
-#define NPY_INT64 NPY_SHORT
-#define NPY_UINT64 NPY_USHORT
- typedef short npy_int64;
- typedef unsigned short npy_uint64;
-# define PyInt64ScalarObject PyShortScalarObject
-# define PyInt64ArrType_Type PyShortArrType_Type
-# define PyUInt64ScalarObject PyUShortScalarObject
-# define PyUInt64ArrType_Type PyUShortArrType_Type
-#define NPY_INT64_FMT NPY_SHORT_FMT
-#define NPY_UINT64_FMT NPY_USHORT_FMT
-# define MyPyLong_FromInt64 PyLong_FromLong
-# define MyPyLong_AsInt64 PyLong_AsLong
-#endif
-#elif NPY_BITSOF_SHORT == 128
-#ifndef NPY_INT128
-#define NPY_INT128 NPY_SHORT
-#define NPY_UINT128 NPY_USHORT
- typedef short npy_int128;
- typedef unsigned short npy_uint128;
-# define PyInt128ScalarObject PyShortScalarObject
-# define PyInt128ArrType_Type PyShortArrType_Type
-# define PyUInt128ScalarObject PyUShortScalarObject
-# define PyUInt128ArrType_Type PyUShortArrType_Type
-#define NPY_INT128_FMT NPY_SHORT_FMT
-#define NPY_UINT128_FMT NPY_USHORT_FMT
-#endif
-#endif
-
-
-#if NPY_BITSOF_CHAR == 8
-#ifndef NPY_INT8
-#define NPY_INT8 NPY_BYTE
-#define NPY_UINT8 NPY_UBYTE
- typedef signed char npy_int8;
- typedef unsigned char npy_uint8;
-# define PyInt8ScalarObject PyByteScalarObject
-# define PyInt8ArrType_Type PyByteArrType_Type
-# define PyUInt8ScalarObject PyUByteScalarObject
-# define PyUInt8ArrType_Type PyUByteArrType_Type
-#define NPY_INT8_FMT NPY_BYTE_FMT
-#define NPY_UINT8_FMT NPY_UBYTE_FMT
-#endif
-#elif NPY_BITSOF_CHAR == 16
-#ifndef NPY_INT16
-#define NPY_INT16 NPY_BYTE
-#define NPY_UINT16 NPY_UBYTE
- typedef signed char npy_int16;
- typedef unsigned char npy_uint16;
-# define PyInt16ScalarObject PyByteScalarObject
-# define PyInt16ArrType_Type PyByteArrType_Type
-# define PyUInt16ScalarObject PyUByteScalarObject
-# define PyUInt16ArrType_Type PyUByteArrType_Type
-#define NPY_INT16_FMT NPY_BYTE_FMT
-#define NPY_UINT16_FMT NPY_UBYTE_FMT
-#endif
-#elif NPY_BITSOF_CHAR == 32
-#ifndef NPY_INT32
-#define NPY_INT32 NPY_BYTE
-#define NPY_UINT32 NPY_UBYTE
- typedef signed char npy_int32;
- typedef unsigned char npy_uint32;
- typedef unsigned char npy_ucs4;
-# define PyInt32ScalarObject PyByteScalarObject
-# define PyInt32ArrType_Type PyByteArrType_Type
-# define PyUInt32ScalarObject PyUByteScalarObject
-# define PyUInt32ArrType_Type PyUByteArrType_Type
-#define NPY_INT32_FMT NPY_BYTE_FMT
-#define NPY_UINT32_FMT NPY_UBYTE_FMT
-#endif
-#elif NPY_BITSOF_CHAR == 64
-#ifndef NPY_INT64
-#define NPY_INT64 NPY_BYTE
-#define NPY_UINT64 NPY_UBYTE
- typedef signed char npy_int64;
- typedef unsigned char npy_uint64;
-# define PyInt64ScalarObject PyByteScalarObject
-# define PyInt64ArrType_Type PyByteArrType_Type
-# define PyUInt64ScalarObject PyUByteScalarObject
-# define PyUInt64ArrType_Type PyUByteArrType_Type
-#define NPY_INT64_FMT NPY_BYTE_FMT
-#define NPY_UINT64_FMT NPY_UBYTE_FMT
-# define MyPyLong_FromInt64 PyLong_FromLong
-# define MyPyLong_AsInt64 PyLong_AsLong
-#endif
-#elif NPY_BITSOF_CHAR == 128
-#ifndef NPY_INT128
-#define NPY_INT128 NPY_BYTE
-#define NPY_UINT128 NPY_UBYTE
- typedef signed char npy_int128;
- typedef unsigned char npy_uint128;
-# define PyInt128ScalarObject PyByteScalarObject
-# define PyInt128ArrType_Type PyByteArrType_Type
-# define PyUInt128ScalarObject PyUByteScalarObject
-# define PyUInt128ArrType_Type PyUByteArrType_Type
-#define NPY_INT128_FMT NPY_BYTE_FMT
-#define NPY_UINT128_FMT NPY_UBYTE_FMT
-#endif
-#endif
-
-
-
-#if NPY_BITSOF_DOUBLE == 32
-#ifndef NPY_FLOAT32
-#define NPY_FLOAT32 NPY_DOUBLE
-#define NPY_COMPLEX64 NPY_CDOUBLE
- typedef double npy_float32;
- typedef npy_cdouble npy_complex64;
-# define PyFloat32ScalarObject PyDoubleScalarObject
-# define PyComplex64ScalarObject PyCDoubleScalarObject
-# define PyFloat32ArrType_Type PyDoubleArrType_Type
-# define PyComplex64ArrType_Type PyCDoubleArrType_Type
-#define NPY_FLOAT32_FMT NPY_DOUBLE_FMT
-#define NPY_COMPLEX64_FMT NPY_CDOUBLE_FMT
-#endif
-#elif NPY_BITSOF_DOUBLE == 64
-#ifndef NPY_FLOAT64
-#define NPY_FLOAT64 NPY_DOUBLE
-#define NPY_COMPLEX128 NPY_CDOUBLE
- typedef double npy_float64;
- typedef npy_cdouble npy_complex128;
-# define PyFloat64ScalarObject PyDoubleScalarObject
-# define PyComplex128ScalarObject PyCDoubleScalarObject
-# define PyFloat64ArrType_Type PyDoubleArrType_Type
-# define PyComplex128ArrType_Type PyCDoubleArrType_Type
-#define NPY_FLOAT64_FMT NPY_DOUBLE_FMT
-#define NPY_COMPLEX128_FMT NPY_CDOUBLE_FMT
-#endif
-#elif NPY_BITSOF_DOUBLE == 80
-#ifndef NPY_FLOAT80
-#define NPY_FLOAT80 NPY_DOUBLE
-#define NPY_COMPLEX160 NPY_CDOUBLE
- typedef double npy_float80;
- typedef npy_cdouble npy_complex160;
-# define PyFloat80ScalarObject PyDoubleScalarObject
-# define PyComplex160ScalarObject PyCDoubleScalarObject
-# define PyFloat80ArrType_Type PyDoubleArrType_Type
-# define PyComplex160ArrType_Type PyCDoubleArrType_Type
-#define NPY_FLOAT80_FMT NPY_DOUBLE_FMT
-#define NPY_COMPLEX160_FMT NPY_CDOUBLE_FMT
-#endif
-#elif NPY_BITSOF_DOUBLE == 96
-#ifndef NPY_FLOAT96
-#define NPY_FLOAT96 NPY_DOUBLE
-#define NPY_COMPLEX192 NPY_CDOUBLE
- typedef double npy_float96;
- typedef npy_cdouble npy_complex192;
-# define PyFloat96ScalarObject PyDoubleScalarObject
-# define PyComplex192ScalarObject PyCDoubleScalarObject
-# define PyFloat96ArrType_Type PyDoubleArrType_Type
-# define PyComplex192ArrType_Type PyCDoubleArrType_Type
-#define NPY_FLOAT96_FMT NPY_DOUBLE_FMT
-#define NPY_COMPLEX192_FMT NPY_CDOUBLE_FMT
-#endif
-#elif NPY_BITSOF_DOUBLE == 128
-#ifndef NPY_FLOAT128
-#define NPY_FLOAT128 NPY_DOUBLE
-#define NPY_COMPLEX256 NPY_CDOUBLE
- typedef double npy_float128;
- typedef npy_cdouble npy_complex256;
-# define PyFloat128ScalarObject PyDoubleScalarObject
-# define PyComplex256ScalarObject PyCDoubleScalarObject
-# define PyFloat128ArrType_Type PyDoubleArrType_Type
-# define PyComplex256ArrType_Type PyCDoubleArrType_Type
-#define NPY_FLOAT128_FMT NPY_DOUBLE_FMT
-#define NPY_COMPLEX256_FMT NPY_CDOUBLE_FMT
-#endif
-#endif
-
-
-
-#if NPY_BITSOF_FLOAT == 32
-#ifndef NPY_FLOAT32
-#define NPY_FLOAT32 NPY_FLOAT
-#define NPY_COMPLEX64 NPY_CFLOAT
- typedef float npy_float32;
- typedef npy_cfloat npy_complex64;
-# define PyFloat32ScalarObject PyFloatScalarObject
-# define PyComplex64ScalarObject PyCFloatScalarObject
-# define PyFloat32ArrType_Type PyFloatArrType_Type
-# define PyComplex64ArrType_Type PyCFloatArrType_Type
-#define NPY_FLOAT32_FMT NPY_FLOAT_FMT
-#define NPY_COMPLEX64_FMT NPY_CFLOAT_FMT
-#endif
-#elif NPY_BITSOF_FLOAT == 64
-#ifndef NPY_FLOAT64
-#define NPY_FLOAT64 NPY_FLOAT
-#define NPY_COMPLEX128 NPY_CFLOAT
- typedef float npy_float64;
- typedef npy_cfloat npy_complex128;
-# define PyFloat64ScalarObject PyFloatScalarObject
-# define PyComplex128ScalarObject PyCFloatScalarObject
-# define PyFloat64ArrType_Type PyFloatArrType_Type
-# define PyComplex128ArrType_Type PyCFloatArrType_Type
-#define NPY_FLOAT64_FMT NPY_FLOAT_FMT
-#define NPY_COMPLEX128_FMT NPY_CFLOAT_FMT
-#endif
-#elif NPY_BITSOF_FLOAT == 80
-#ifndef NPY_FLOAT80
-#define NPY_FLOAT80 NPY_FLOAT
-#define NPY_COMPLEX160 NPY_CFLOAT
- typedef float npy_float80;
- typedef npy_cfloat npy_complex160;
-# define PyFloat80ScalarObject PyFloatScalarObject
-# define PyComplex160ScalarObject PyCFloatScalarObject
-# define PyFloat80ArrType_Type PyFloatArrType_Type
-# define PyComplex160ArrType_Type PyCFloatArrType_Type
-#define NPY_FLOAT80_FMT NPY_FLOAT_FMT
-#define NPY_COMPLEX160_FMT NPY_CFLOAT_FMT
-#endif
-#elif NPY_BITSOF_FLOAT == 96
-#ifndef NPY_FLOAT96
-#define NPY_FLOAT96 NPY_FLOAT
-#define NPY_COMPLEX192 NPY_CFLOAT
- typedef float npy_float96;
- typedef npy_cfloat npy_complex192;
-# define PyFloat96ScalarObject PyFloatScalarObject
-# define PyComplex192ScalarObject PyCFloatScalarObject
-# define PyFloat96ArrType_Type PyFloatArrType_Type
-# define PyComplex192ArrType_Type PyCFloatArrType_Type
-#define NPY_FLOAT96_FMT NPY_FLOAT_FMT
-#define NPY_COMPLEX192_FMT NPY_CFLOAT_FMT
-#endif
-#elif NPY_BITSOF_FLOAT == 128
-#ifndef NPY_FLOAT128
-#define NPY_FLOAT128 NPY_FLOAT
-#define NPY_COMPLEX256 NPY_CFLOAT
- typedef float npy_float128;
- typedef npy_cfloat npy_complex256;
-# define PyFloat128ScalarObject PyFloatScalarObject
-# define PyComplex256ScalarObject PyCFloatScalarObject
-# define PyFloat128ArrType_Type PyFloatArrType_Type
-# define PyComplex256ArrType_Type PyCFloatArrType_Type
-#define NPY_FLOAT128_FMT NPY_FLOAT_FMT
-#define NPY_COMPLEX256_FMT NPY_CFLOAT_FMT
-#endif
-#endif
-
-/* half/float16 isn't a floating-point type in C */
-#define NPY_FLOAT16 NPY_HALF
-typedef npy_uint16 npy_half;
-typedef npy_half npy_float16;
-
-#if NPY_BITSOF_LONGDOUBLE == 32
-#ifndef NPY_FLOAT32
-#define NPY_FLOAT32 NPY_LONGDOUBLE
-#define NPY_COMPLEX64 NPY_CLONGDOUBLE
- typedef npy_longdouble npy_float32;
- typedef npy_clongdouble npy_complex64;
-# define PyFloat32ScalarObject PyLongDoubleScalarObject
-# define PyComplex64ScalarObject PyCLongDoubleScalarObject
-# define PyFloat32ArrType_Type PyLongDoubleArrType_Type
-# define PyComplex64ArrType_Type PyCLongDoubleArrType_Type
-#define NPY_FLOAT32_FMT NPY_LONGDOUBLE_FMT
-#define NPY_COMPLEX64_FMT NPY_CLONGDOUBLE_FMT
-#endif
-#elif NPY_BITSOF_LONGDOUBLE == 64
-#ifndef NPY_FLOAT64
-#define NPY_FLOAT64 NPY_LONGDOUBLE
-#define NPY_COMPLEX128 NPY_CLONGDOUBLE
- typedef npy_longdouble npy_float64;
- typedef npy_clongdouble npy_complex128;
-# define PyFloat64ScalarObject PyLongDoubleScalarObject
-# define PyComplex128ScalarObject PyCLongDoubleScalarObject
-# define PyFloat64ArrType_Type PyLongDoubleArrType_Type
-# define PyComplex128ArrType_Type PyCLongDoubleArrType_Type
-#define NPY_FLOAT64_FMT NPY_LONGDOUBLE_FMT
-#define NPY_COMPLEX128_FMT NPY_CLONGDOUBLE_FMT
-#endif
-#elif NPY_BITSOF_LONGDOUBLE == 80
-#ifndef NPY_FLOAT80
-#define NPY_FLOAT80 NPY_LONGDOUBLE
-#define NPY_COMPLEX160 NPY_CLONGDOUBLE
- typedef npy_longdouble npy_float80;
- typedef npy_clongdouble npy_complex160;
-# define PyFloat80ScalarObject PyLongDoubleScalarObject
-# define PyComplex160ScalarObject PyCLongDoubleScalarObject
-# define PyFloat80ArrType_Type PyLongDoubleArrType_Type
-# define PyComplex160ArrType_Type PyCLongDoubleArrType_Type
-#define NPY_FLOAT80_FMT NPY_LONGDOUBLE_FMT
-#define NPY_COMPLEX160_FMT NPY_CLONGDOUBLE_FMT
-#endif
-#elif NPY_BITSOF_LONGDOUBLE == 96
-#ifndef NPY_FLOAT96
-#define NPY_FLOAT96 NPY_LONGDOUBLE
-#define NPY_COMPLEX192 NPY_CLONGDOUBLE
- typedef npy_longdouble npy_float96;
- typedef npy_clongdouble npy_complex192;
-# define PyFloat96ScalarObject PyLongDoubleScalarObject
-# define PyComplex192ScalarObject PyCLongDoubleScalarObject
-# define PyFloat96ArrType_Type PyLongDoubleArrType_Type
-# define PyComplex192ArrType_Type PyCLongDoubleArrType_Type
-#define NPY_FLOAT96_FMT NPY_LONGDOUBLE_FMT
-#define NPY_COMPLEX192_FMT NPY_CLONGDOUBLE_FMT
-#endif
-#elif NPY_BITSOF_LONGDOUBLE == 128
-#ifndef NPY_FLOAT128
-#define NPY_FLOAT128 NPY_LONGDOUBLE
-#define NPY_COMPLEX256 NPY_CLONGDOUBLE
- typedef npy_longdouble npy_float128;
- typedef npy_clongdouble npy_complex256;
-# define PyFloat128ScalarObject PyLongDoubleScalarObject
-# define PyComplex256ScalarObject PyCLongDoubleScalarObject
-# define PyFloat128ArrType_Type PyLongDoubleArrType_Type
-# define PyComplex256ArrType_Type PyCLongDoubleArrType_Type
-#define NPY_FLOAT128_FMT NPY_LONGDOUBLE_FMT
-#define NPY_COMPLEX256_FMT NPY_CLONGDOUBLE_FMT
-#endif
-#elif NPY_BITSOF_LONGDOUBLE == 256
-#define NPY_FLOAT256 NPY_LONGDOUBLE
-#define NPY_COMPLEX512 NPY_CLONGDOUBLE
- typedef npy_longdouble npy_float256;
- typedef npy_clongdouble npy_complex512;
-# define PyFloat256ScalarObject PyLongDoubleScalarObject
-# define PyComplex512ScalarObject PyCLongDoubleScalarObject
-# define PyFloat256ArrType_Type PyLongDoubleArrType_Type
-# define PyComplex512ArrType_Type PyCLongDoubleArrType_Type
-#define NPY_FLOAT256_FMT NPY_LONGDOUBLE_FMT
-#define NPY_COMPLEX512_FMT NPY_CLONGDOUBLE_FMT
-#endif
-
-/* datetime typedefs */
-typedef npy_int64 npy_timedelta;
-typedef npy_int64 npy_datetime;
-#define NPY_DATETIME_FMT NPY_INT64_FMT
-#define NPY_TIMEDELTA_FMT NPY_INT64_FMT
-
-/* End of typedefs for numarray style bit-width names */
-
-#endif
-
diff --git a/include/numpy/npy_cpu.h b/include/numpy/npy_cpu.h
deleted file mode 100644
index 9707a7adf..000000000
--- a/include/numpy/npy_cpu.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * This set (target) cpu specific macros:
- * - Possible values:
- * NPY_CPU_X86
- * NPY_CPU_AMD64
- * NPY_CPU_PPC
- * NPY_CPU_PPC64
- * NPY_CPU_SPARC
- * NPY_CPU_S390
- * NPY_CPU_IA64
- * NPY_CPU_HPPA
- * NPY_CPU_ALPHA
- * NPY_CPU_ARMEL
- * NPY_CPU_ARMEB
- * NPY_CPU_SH_LE
- * NPY_CPU_SH_BE
- */
-#ifndef _NPY_CPUARCH_H_
-#define _NPY_CPUARCH_H_
-
-#include "numpyconfig.h"
-
-#if defined( __i386__ ) || defined(i386) || defined(_M_IX86)
- /*
- * __i386__ is defined by gcc and Intel compiler on Linux,
- * _M_IX86 by VS compiler,
- * i386 by Sun compilers on opensolaris at least
- */
- #define NPY_CPU_X86
-#elif defined(__x86_64__) || defined(__amd64__) || defined(__x86_64) || defined(_M_AMD64)
- /*
- * both __x86_64__ and __amd64__ are defined by gcc
- * __x86_64 defined by sun compiler on opensolaris at least
- * _M_AMD64 defined by MS compiler
- */
- #define NPY_CPU_AMD64
-#elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC)
- /*
- * __ppc__ is defined by gcc, I remember having seen __powerpc__ once,
- * but can't find it ATM
- * _ARCH_PPC is used by at least gcc on AIX
- */
- #define NPY_CPU_PPC
-#elif defined(__ppc64__)
- #define NPY_CPU_PPC64
-#elif defined(__sparc__) || defined(__sparc)
- /* __sparc__ is defined by gcc and Forte (e.g. Sun) compilers */
- #define NPY_CPU_SPARC
-#elif defined(__s390__)
- #define NPY_CPU_S390
-#elif defined(__ia64)
- #define NPY_CPU_IA64
-#elif defined(__hppa)
- #define NPY_CPU_HPPA
-#elif defined(__alpha__)
- #define NPY_CPU_ALPHA
-#elif defined(__arm__) && defined(__ARMEL__)
- #define NPY_CPU_ARMEL
-#elif defined(__arm__) && defined(__ARMEB__)
- #define NPY_CPU_ARMEB
-#elif defined(__sh__) && defined(__LITTLE_ENDIAN__)
- #define NPY_CPU_SH_LE
-#elif defined(__sh__) && defined(__BIG_ENDIAN__)
- #define NPY_CPU_SH_BE
-#elif defined(__MIPSEL__)
- #define NPY_CPU_MIPSEL
-#elif defined(__MIPSEB__)
- #define NPY_CPU_MIPSEB
-#elif defined(__aarch64__)
- #define NPY_CPU_AARCH64
-#else
- #error Unknown CPU, please report this to numpy maintainers with \
- information about your platform (OS, CPU and compiler)
-#endif
-
-/*
- This "white-lists" the architectures that we know don't require
- pointer alignment. We white-list, since the memcpy version will
- work everywhere, whereas assignment will only work where pointer
- dereferencing doesn't require alignment.
-
- TODO: There may be more architectures we can white list.
-*/
-#if defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64)
- #define NPY_COPY_PYOBJECT_PTR(dst, src) (*((PyObject **)(dst)) = *((PyObject **)(src)))
-#else
- #if NPY_SIZEOF_PY_INTPTR_T == 4
- #define NPY_COPY_PYOBJECT_PTR(dst, src) \
- ((char*)(dst))[0] = ((char*)(src))[0]; \
- ((char*)(dst))[1] = ((char*)(src))[1]; \
- ((char*)(dst))[2] = ((char*)(src))[2]; \
- ((char*)(dst))[3] = ((char*)(src))[3];
- #elif NPY_SIZEOF_PY_INTPTR_T == 8
- #define NPY_COPY_PYOBJECT_PTR(dst, src) \
- ((char*)(dst))[0] = ((char*)(src))[0]; \
- ((char*)(dst))[1] = ((char*)(src))[1]; \
- ((char*)(dst))[2] = ((char*)(src))[2]; \
- ((char*)(dst))[3] = ((char*)(src))[3]; \
- ((char*)(dst))[4] = ((char*)(src))[4]; \
- ((char*)(dst))[5] = ((char*)(src))[5]; \
- ((char*)(dst))[6] = ((char*)(src))[6]; \
- ((char*)(dst))[7] = ((char*)(src))[7];
- #else
- #error Unknown architecture, please report this to numpy maintainers with \
- information about your platform (OS, CPU and compiler)
- #endif
-#endif
-
-#endif
diff --git a/include/numpy/npy_deprecated_api.h b/include/numpy/npy_deprecated_api.h
deleted file mode 100644
index c27b4a4c9..000000000
--- a/include/numpy/npy_deprecated_api.h
+++ /dev/null
@@ -1,129 +0,0 @@
-#ifndef _NPY_DEPRECATED_API_H
-#define _NPY_DEPRECATED_API_H
-
-#if defined(_WIN32)
-#define _WARN___STR2__(x) #x
-#define _WARN___STR1__(x) _WARN___STR2__(x)
-#define _WARN___LOC__ __FILE__ "(" _WARN___STR1__(__LINE__) ") : Warning Msg: "
-#pragma message(_WARN___LOC__"Using deprecated NumPy API, disable it by " \
- "#defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION")
-#elif defined(__GNUC__)
-#warning "Using deprecated NumPy API, disable it by #defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION"
-#endif
-/* TODO: How to do this warning message for other compilers? */
-
-/*
- * This header exists to collect all dangerous/deprecated NumPy API.
- *
- * This is an attempt to remove bad API, the proliferation of macros,
- * and namespace pollution currently produced by the NumPy headers.
- */
-
-#if defined(NPY_NO_DEPRECATED_API)
-#error Should never include npy_deprecated_api directly.
-#endif
-
-/* These array flags are deprecated as of NumPy 1.7 */
-#define NPY_CONTIGUOUS NPY_ARRAY_C_CONTIGUOUS
-#define NPY_FORTRAN NPY_ARRAY_F_CONTIGUOUS
-
-/*
- * The consistent NPY_ARRAY_* names which don't pollute the NPY_*
- * namespace were added in NumPy 1.7.
- *
- * These versions of the carray flags are deprecated, but
- * probably should only be removed after two releases instead of one.
- */
-#define NPY_C_CONTIGUOUS NPY_ARRAY_C_CONTIGUOUS
-#define NPY_F_CONTIGUOUS NPY_ARRAY_F_CONTIGUOUS
-#define NPY_OWNDATA NPY_ARRAY_OWNDATA
-#define NPY_FORCECAST NPY_ARRAY_FORCECAST
-#define NPY_ENSURECOPY NPY_ARRAY_ENSURECOPY
-#define NPY_ENSUREARRAY NPY_ARRAY_ENSUREARRAY
-#define NPY_ELEMENTSTRIDES NPY_ARRAY_ELEMENTSTRIDES
-#define NPY_ALIGNED NPY_ARRAY_ALIGNED
-#define NPY_NOTSWAPPED NPY_ARRAY_NOTSWAPPED
-#define NPY_WRITEABLE NPY_ARRAY_WRITEABLE
-#define NPY_UPDATEIFCOPY NPY_ARRAY_UPDATEIFCOPY
-#define NPY_BEHAVED NPY_ARRAY_BEHAVED
-#define NPY_BEHAVED_NS NPY_ARRAY_BEHAVED_NS
-#define NPY_CARRAY NPY_ARRAY_CARRAY
-#define NPY_CARRAY_RO NPY_ARRAY_CARRAY_RO
-#define NPY_FARRAY NPY_ARRAY_FARRAY
-#define NPY_FARRAY_RO NPY_ARRAY_FARRAY_RO
-#define NPY_DEFAULT NPY_ARRAY_DEFAULT
-#define NPY_IN_ARRAY NPY_ARRAY_IN_ARRAY
-#define NPY_OUT_ARRAY NPY_ARRAY_OUT_ARRAY
-#define NPY_INOUT_ARRAY NPY_ARRAY_INOUT_ARRAY
-#define NPY_IN_FARRAY NPY_ARRAY_IN_FARRAY
-#define NPY_OUT_FARRAY NPY_ARRAY_OUT_FARRAY
-#define NPY_INOUT_FARRAY NPY_ARRAY_INOUT_FARRAY
-#define NPY_UPDATE_ALL NPY_ARRAY_UPDATE_ALL
-
-/* This way of accessing the default type is deprecated as of NumPy 1.7 */
-#define PyArray_DEFAULT NPY_DEFAULT_TYPE
-
-/* These DATETIME bits aren't used internally */
-#if PY_VERSION_HEX >= 0x03000000
-#define PyDataType_GetDatetimeMetaData(descr) \
- ((descr->metadata == NULL) ? NULL : \
- ((PyArray_DatetimeMetaData *)(PyCapsule_GetPointer( \
- PyDict_GetItemString( \
- descr->metadata, NPY_METADATA_DTSTR), NULL))))
-#else
-#define PyDataType_GetDatetimeMetaData(descr) \
- ((descr->metadata == NULL) ? NULL : \
- ((PyArray_DatetimeMetaData *)(PyCObject_AsVoidPtr( \
- PyDict_GetItemString(descr->metadata, NPY_METADATA_DTSTR)))))
-#endif
-
-/*
- * Deprecated as of NumPy 1.7, this kind of shortcut doesn't
- * belong in the public API.
- */
-#define NPY_AO PyArrayObject
-
-/*
- * Deprecated as of NumPy 1.7, an all-lowercase macro doesn't
- * belong in the public API.
- */
-#define fortran fortran_
-
-/*
- * Deprecated as of NumPy 1.7, as it is a namespace-polluting
- * macro.
- */
-#define FORTRAN_IF PyArray_FORTRAN_IF
-
-/* Deprecated as of NumPy 1.7, datetime64 uses c_metadata instead */
-#define NPY_METADATA_DTSTR "__timeunit__"
-
-/*
- * Deprecated as of NumPy 1.7.
- * The reasoning:
- * - These are for datetime, but there's no datetime "namespace".
- * - They just turn NPY_STR_ into "", which is just
- * making something simple be indirected.
- */
-#define NPY_STR_Y "Y"
-#define NPY_STR_M "M"
-#define NPY_STR_W "W"
-#define NPY_STR_D "D"
-#define NPY_STR_h "h"
-#define NPY_STR_m "m"
-#define NPY_STR_s "s"
-#define NPY_STR_ms "ms"
-#define NPY_STR_us "us"
-#define NPY_STR_ns "ns"
-#define NPY_STR_ps "ps"
-#define NPY_STR_fs "fs"
-#define NPY_STR_as "as"
-
-/*
- * The macros in old_defines.h are Deprecated as of NumPy 1.7 and will be
- * removed in the next major release.
- */
-#include "old_defines.h"
-
-
-#endif
diff --git a/include/numpy/npy_endian.h b/include/numpy/npy_endian.h
deleted file mode 100644
index 4e3349ffe..000000000
--- a/include/numpy/npy_endian.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#ifndef _NPY_ENDIAN_H_
-#define _NPY_ENDIAN_H_
-
-/*
- * NPY_BYTE_ORDER is set to the same value as BYTE_ORDER set by glibc in
- * endian.h
- */
-
-#ifdef NPY_HAVE_ENDIAN_H
- /* Use endian.h if available */
- #include
-
- #define NPY_BYTE_ORDER __BYTE_ORDER
- #define NPY_LITTLE_ENDIAN __LITTLE_ENDIAN
- #define NPY_BIG_ENDIAN __BIG_ENDIAN
-#else
- /* Set endianness info using target CPU */
- #include "npy_cpu.h"
-
- #define NPY_LITTLE_ENDIAN 1234
- #define NPY_BIG_ENDIAN 4321
-
- #if defined(NPY_CPU_X86) \
- || defined(NPY_CPU_AMD64) \
- || defined(NPY_CPU_IA64) \
- || defined(NPY_CPU_ALPHA) \
- || defined(NPY_CPU_ARMEL) \
- || defined(NPY_CPU_AARCH64) \
- || defined(NPY_CPU_SH_LE) \
- || defined(NPY_CPU_MIPSEL)
- #define NPY_BYTE_ORDER NPY_LITTLE_ENDIAN
- #elif defined(NPY_CPU_PPC) \
- || defined(NPY_CPU_SPARC) \
- || defined(NPY_CPU_S390) \
- || defined(NPY_CPU_HPPA) \
- || defined(NPY_CPU_PPC64) \
- || defined(NPY_CPU_ARMEB) \
- || defined(NPY_CPU_SH_BE) \
- || defined(NPY_CPU_MIPSEB)
- #define NPY_BYTE_ORDER NPY_BIG_ENDIAN
- #else
- #error Unknown CPU: can not set endianness
- #endif
-#endif
-
-#endif
diff --git a/include/numpy/npy_interrupt.h b/include/numpy/npy_interrupt.h
deleted file mode 100644
index f71fd689e..000000000
--- a/include/numpy/npy_interrupt.h
+++ /dev/null
@@ -1,117 +0,0 @@
-
-/* Signal handling:
-
-This header file defines macros that allow your code to handle
-interrupts received during processing. Interrupts that
-could reasonably be handled:
-
-SIGINT, SIGABRT, SIGALRM, SIGSEGV
-
-****Warning***************
-
-Do not allow code that creates temporary memory or increases reference
-counts of Python objects to be interrupted unless you handle it
-differently.
-
-**************************
-
-The mechanism for handling interrupts is conceptually simple:
-
- - replace the signal handler with our own home-grown version
- and store the old one.
- - run the code to be interrupted -- if an interrupt occurs
- the handler should basically just cause a return to the
- calling function for finish work.
- - restore the old signal handler
-
-Of course, every code that allows interrupts must account for
-returning via the interrupt and handle clean-up correctly. But,
-even still, the simple paradigm is complicated by at least three
-factors.
-
- 1) platform portability (i.e. Microsoft says not to use longjmp
- to return from signal handling. They have a __try and __except
- extension to C instead but what about mingw?).
-
- 2) how to handle threads: apparently whether signals are delivered to
- every thread of the process or the "invoking" thread is platform
- dependent. --- we don't handle threads for now.
-
- 3) do we need to worry about re-entrance. For now, assume the
- code will not call-back into itself.
-
-Ideas:
-
- 1) Start by implementing an approach that works on platforms that
- can use setjmp and longjmp functionality and does nothing
- on other platforms.
-
- 2) Ignore threads --- i.e. do not mix interrupt handling and threads
-
- 3) Add a default signal_handler function to the C-API but have the rest
- use macros.
-
-
-Simple Interface:
-
-
-In your C-extension: around a block of code you want to be interruptable
-with a SIGINT
-
-NPY_SIGINT_ON
-[code]
-NPY_SIGINT_OFF
-
-In order for this to work correctly, the
-[code] block must not allocate any memory or alter the reference count of any
-Python objects. In other words [code] must be interruptible so that continuation
-after NPY_SIGINT_OFF will only be "missing some computations"
-
-Interrupt handling does not work well with threads.
-
-*/
-
-/* Add signal handling macros
- Make the global variable and signal handler part of the C-API
-*/
-
-#ifndef NPY_INTERRUPT_H
-#define NPY_INTERRUPT_H
-
-#ifndef NPY_NO_SIGNAL
-
-#include
-#include
-
-#ifndef sigsetjmp
-
-#define NPY_SIGSETJMP(arg1, arg2) setjmp(arg1)
-#define NPY_SIGLONGJMP(arg1, arg2) longjmp(arg1, arg2)
-#define NPY_SIGJMP_BUF jmp_buf
-
-#else
-
-#define NPY_SIGSETJMP(arg1, arg2) sigsetjmp(arg1, arg2)
-#define NPY_SIGLONGJMP(arg1, arg2) siglongjmp(arg1, arg2)
-#define NPY_SIGJMP_BUF sigjmp_buf
-
-#endif
-
-# define NPY_SIGINT_ON { \
- PyOS_sighandler_t _npy_sig_save; \
- _npy_sig_save = PyOS_setsig(SIGINT, _PyArray_SigintHandler); \
- if (NPY_SIGSETJMP(*((NPY_SIGJMP_BUF *)_PyArray_GetSigintBuf()), \
- 1) == 0) { \
-
-# define NPY_SIGINT_OFF } \
- PyOS_setsig(SIGINT, _npy_sig_save); \
- }
-
-#else /* NPY_NO_SIGNAL */
-
-#define NPY_SIGINT_ON
-#define NPY_SIGINT_OFF
-
-#endif /* HAVE_SIGSETJMP */
-
-#endif /* NPY_INTERRUPT_H */
diff --git a/include/numpy/npy_math.h b/include/numpy/npy_math.h
deleted file mode 100644
index 7ae166e54..000000000
--- a/include/numpy/npy_math.h
+++ /dev/null
@@ -1,438 +0,0 @@
-#ifndef __NPY_MATH_C99_H_
-#define __NPY_MATH_C99_H_
-
-#include
-#ifdef __SUNPRO_CC
-#include
-#endif
-#include
-
-/*
- * NAN and INFINITY like macros (same behavior as glibc for NAN, same as C99
- * for INFINITY)
- *
- * XXX: I should test whether INFINITY and NAN are available on the platform
- */
-NPY_INLINE static float __npy_inff(void)
-{
- const union { npy_uint32 __i; float __f;} __bint = {0x7f800000UL};
- return __bint.__f;
-}
-
-NPY_INLINE static float __npy_nanf(void)
-{
- const union { npy_uint32 __i; float __f;} __bint = {0x7fc00000UL};
- return __bint.__f;
-}
-
-NPY_INLINE static float __npy_pzerof(void)
-{
- const union { npy_uint32 __i; float __f;} __bint = {0x00000000UL};
- return __bint.__f;
-}
-
-NPY_INLINE static float __npy_nzerof(void)
-{
- const union { npy_uint32 __i; float __f;} __bint = {0x80000000UL};
- return __bint.__f;
-}
-
-#define NPY_INFINITYF __npy_inff()
-#define NPY_NANF __npy_nanf()
-#define NPY_PZEROF __npy_pzerof()
-#define NPY_NZEROF __npy_nzerof()
-
-#define NPY_INFINITY ((npy_double)NPY_INFINITYF)
-#define NPY_NAN ((npy_double)NPY_NANF)
-#define NPY_PZERO ((npy_double)NPY_PZEROF)
-#define NPY_NZERO ((npy_double)NPY_NZEROF)
-
-#define NPY_INFINITYL ((npy_longdouble)NPY_INFINITYF)
-#define NPY_NANL ((npy_longdouble)NPY_NANF)
-#define NPY_PZEROL ((npy_longdouble)NPY_PZEROF)
-#define NPY_NZEROL ((npy_longdouble)NPY_NZEROF)
-
-/*
- * Useful constants
- */
-#define NPY_E 2.718281828459045235360287471352662498 /* e */
-#define NPY_LOG2E 1.442695040888963407359924681001892137 /* log_2 e */
-#define NPY_LOG10E 0.434294481903251827651128918916605082 /* log_10 e */
-#define NPY_LOGE2 0.693147180559945309417232121458176568 /* log_e 2 */
-#define NPY_LOGE10 2.302585092994045684017991454684364208 /* log_e 10 */
-#define NPY_PI 3.141592653589793238462643383279502884 /* pi */
-#define NPY_PI_2 1.570796326794896619231321691639751442 /* pi/2 */
-#define NPY_PI_4 0.785398163397448309615660845819875721 /* pi/4 */
-#define NPY_1_PI 0.318309886183790671537767526745028724 /* 1/pi */
-#define NPY_2_PI 0.636619772367581343075535053490057448 /* 2/pi */
-#define NPY_EULER 0.577215664901532860606512090082402431 /* Euler constant */
-#define NPY_SQRT2 1.414213562373095048801688724209698079 /* sqrt(2) */
-#define NPY_SQRT1_2 0.707106781186547524400844362104849039 /* 1/sqrt(2) */
-
-#define NPY_Ef 2.718281828459045235360287471352662498F /* e */
-#define NPY_LOG2Ef 1.442695040888963407359924681001892137F /* log_2 e */
-#define NPY_LOG10Ef 0.434294481903251827651128918916605082F /* log_10 e */
-#define NPY_LOGE2f 0.693147180559945309417232121458176568F /* log_e 2 */
-#define NPY_LOGE10f 2.302585092994045684017991454684364208F /* log_e 10 */
-#define NPY_PIf 3.141592653589793238462643383279502884F /* pi */
-#define NPY_PI_2f 1.570796326794896619231321691639751442F /* pi/2 */
-#define NPY_PI_4f 0.785398163397448309615660845819875721F /* pi/4 */
-#define NPY_1_PIf 0.318309886183790671537767526745028724F /* 1/pi */
-#define NPY_2_PIf 0.636619772367581343075535053490057448F /* 2/pi */
-#define NPY_EULERf 0.577215664901532860606512090082402431F /* Euler constan*/
-#define NPY_SQRT2f 1.414213562373095048801688724209698079F /* sqrt(2) */
-#define NPY_SQRT1_2f 0.707106781186547524400844362104849039F /* 1/sqrt(2) */
-
-#define NPY_El 2.718281828459045235360287471352662498L /* e */
-#define NPY_LOG2El 1.442695040888963407359924681001892137L /* log_2 e */
-#define NPY_LOG10El 0.434294481903251827651128918916605082L /* log_10 e */
-#define NPY_LOGE2l 0.693147180559945309417232121458176568L /* log_e 2 */
-#define NPY_LOGE10l 2.302585092994045684017991454684364208L /* log_e 10 */
-#define NPY_PIl 3.141592653589793238462643383279502884L /* pi */
-#define NPY_PI_2l 1.570796326794896619231321691639751442L /* pi/2 */
-#define NPY_PI_4l 0.785398163397448309615660845819875721L /* pi/4 */
-#define NPY_1_PIl 0.318309886183790671537767526745028724L /* 1/pi */
-#define NPY_2_PIl 0.636619772367581343075535053490057448L /* 2/pi */
-#define NPY_EULERl 0.577215664901532860606512090082402431L /* Euler constan*/
-#define NPY_SQRT2l 1.414213562373095048801688724209698079L /* sqrt(2) */
-#define NPY_SQRT1_2l 0.707106781186547524400844362104849039L /* 1/sqrt(2) */
-
-/*
- * C99 double math funcs
- */
-double npy_sin(double x);
-double npy_cos(double x);
-double npy_tan(double x);
-double npy_sinh(double x);
-double npy_cosh(double x);
-double npy_tanh(double x);
-
-double npy_asin(double x);
-double npy_acos(double x);
-double npy_atan(double x);
-double npy_aexp(double x);
-double npy_alog(double x);
-double npy_asqrt(double x);
-double npy_afabs(double x);
-
-double npy_log(double x);
-double npy_log10(double x);
-double npy_exp(double x);
-double npy_sqrt(double x);
-
-double npy_fabs(double x);
-double npy_ceil(double x);
-double npy_fmod(double x, double y);
-double npy_floor(double x);
-
-double npy_expm1(double x);
-double npy_log1p(double x);
-double npy_hypot(double x, double y);
-double npy_acosh(double x);
-double npy_asinh(double xx);
-double npy_atanh(double x);
-double npy_rint(double x);
-double npy_trunc(double x);
-double npy_exp2(double x);
-double npy_log2(double x);
-
-double npy_atan2(double x, double y);
-double npy_pow(double x, double y);
-double npy_modf(double x, double* y);
-
-double npy_copysign(double x, double y);
-double npy_nextafter(double x, double y);
-double npy_spacing(double x);
-
-/*
- * IEEE 754 fpu handling. Those are guaranteed to be macros
- */
-#ifndef NPY_HAVE_DECL_ISNAN
- #define npy_isnan(x) ((x) != (x))
-#else
- #ifdef _MSC_VER
- #define npy_isnan(x) _isnan((x))
- #else
- #define npy_isnan(x) isnan((x))
- #endif
-#endif
-
-#ifndef NPY_HAVE_DECL_ISFINITE
- #ifdef _MSC_VER
- #define npy_isfinite(x) _finite((x))
- #else
- #define npy_isfinite(x) !npy_isnan((x) + (-x))
- #endif
-#else
- #define npy_isfinite(x) isfinite((x))
-#endif
-
-#ifndef NPY_HAVE_DECL_ISINF
- #define npy_isinf(x) (!npy_isfinite(x) && !npy_isnan(x))
-#else
- #ifdef _MSC_VER
- #define npy_isinf(x) (!_finite((x)) && !_isnan((x)))
- #else
- #define npy_isinf(x) isinf((x))
- #endif
-#endif
-
-#ifndef NPY_HAVE_DECL_SIGNBIT
- int _npy_signbit_f(float x);
- int _npy_signbit_d(double x);
- int _npy_signbit_ld(long double x);
- #define npy_signbit(x) \
- (sizeof (x) == sizeof (long double) ? _npy_signbit_ld (x) \
- : sizeof (x) == sizeof (double) ? _npy_signbit_d (x) \
- : _npy_signbit_f (x))
-#else
- #define npy_signbit(x) signbit((x))
-#endif
-
-/*
- * float C99 math functions
- */
-
-float npy_sinf(float x);
-float npy_cosf(float x);
-float npy_tanf(float x);
-float npy_sinhf(float x);
-float npy_coshf(float x);
-float npy_tanhf(float x);
-float npy_fabsf(float x);
-float npy_floorf(float x);
-float npy_ceilf(float x);
-float npy_rintf(float x);
-float npy_truncf(float x);
-float npy_sqrtf(float x);
-float npy_log10f(float x);
-float npy_logf(float x);
-float npy_expf(float x);
-float npy_expm1f(float x);
-float npy_asinf(float x);
-float npy_acosf(float x);
-float npy_atanf(float x);
-float npy_asinhf(float x);
-float npy_acoshf(float x);
-float npy_atanhf(float x);
-float npy_log1pf(float x);
-float npy_exp2f(float x);
-float npy_log2f(float x);
-
-float npy_atan2f(float x, float y);
-float npy_hypotf(float x, float y);
-float npy_powf(float x, float y);
-float npy_fmodf(float x, float y);
-
-float npy_modff(float x, float* y);
-
-float npy_copysignf(float x, float y);
-float npy_nextafterf(float x, float y);
-float npy_spacingf(float x);
-
-/*
- * float C99 math functions
- */
-
-npy_longdouble npy_sinl(npy_longdouble x);
-npy_longdouble npy_cosl(npy_longdouble x);
-npy_longdouble npy_tanl(npy_longdouble x);
-npy_longdouble npy_sinhl(npy_longdouble x);
-npy_longdouble npy_coshl(npy_longdouble x);
-npy_longdouble npy_tanhl(npy_longdouble x);
-npy_longdouble npy_fabsl(npy_longdouble x);
-npy_longdouble npy_floorl(npy_longdouble x);
-npy_longdouble npy_ceill(npy_longdouble x);
-npy_longdouble npy_rintl(npy_longdouble x);
-npy_longdouble npy_truncl(npy_longdouble x);
-npy_longdouble npy_sqrtl(npy_longdouble x);
-npy_longdouble npy_log10l(npy_longdouble x);
-npy_longdouble npy_logl(npy_longdouble x);
-npy_longdouble npy_expl(npy_longdouble x);
-npy_longdouble npy_expm1l(npy_longdouble x);
-npy_longdouble npy_asinl(npy_longdouble x);
-npy_longdouble npy_acosl(npy_longdouble x);
-npy_longdouble npy_atanl(npy_longdouble x);
-npy_longdouble npy_asinhl(npy_longdouble x);
-npy_longdouble npy_acoshl(npy_longdouble x);
-npy_longdouble npy_atanhl(npy_longdouble x);
-npy_longdouble npy_log1pl(npy_longdouble x);
-npy_longdouble npy_exp2l(npy_longdouble x);
-npy_longdouble npy_log2l(npy_longdouble x);
-
-npy_longdouble npy_atan2l(npy_longdouble x, npy_longdouble y);
-npy_longdouble npy_hypotl(npy_longdouble x, npy_longdouble y);
-npy_longdouble npy_powl(npy_longdouble x, npy_longdouble y);
-npy_longdouble npy_fmodl(npy_longdouble x, npy_longdouble y);
-
-npy_longdouble npy_modfl(npy_longdouble x, npy_longdouble* y);
-
-npy_longdouble npy_copysignl(npy_longdouble x, npy_longdouble y);
-npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y);
-npy_longdouble npy_spacingl(npy_longdouble x);
-
-/*
- * Non standard functions
- */
-double npy_deg2rad(double x);
-double npy_rad2deg(double x);
-double npy_logaddexp(double x, double y);
-double npy_logaddexp2(double x, double y);
-
-float npy_deg2radf(float x);
-float npy_rad2degf(float x);
-float npy_logaddexpf(float x, float y);
-float npy_logaddexp2f(float x, float y);
-
-npy_longdouble npy_deg2radl(npy_longdouble x);
-npy_longdouble npy_rad2degl(npy_longdouble x);
-npy_longdouble npy_logaddexpl(npy_longdouble x, npy_longdouble y);
-npy_longdouble npy_logaddexp2l(npy_longdouble x, npy_longdouble y);
-
-#define npy_degrees npy_rad2deg
-#define npy_degreesf npy_rad2degf
-#define npy_degreesl npy_rad2degl
-
-#define npy_radians npy_deg2rad
-#define npy_radiansf npy_deg2radf
-#define npy_radiansl npy_deg2radl
-
-/*
- * Complex declarations
- */
-
-/*
- * C99 specifies that complex numbers have the same representation as
- * an array of two elements, where the first element is the real part
- * and the second element is the imaginary part.
- */
-#define __NPY_CPACK_IMP(x, y, type, ctype) \
- union { \
- ctype z; \
- type a[2]; \
- } z1;; \
- \
- z1.a[0] = (x); \
- z1.a[1] = (y); \
- \
- return z1.z;
-
-static NPY_INLINE npy_cdouble npy_cpack(double x, double y)
-{
- __NPY_CPACK_IMP(x, y, double, npy_cdouble);
-}
-
-static NPY_INLINE npy_cfloat npy_cpackf(float x, float y)
-{
- __NPY_CPACK_IMP(x, y, float, npy_cfloat);
-}
-
-static NPY_INLINE npy_clongdouble npy_cpackl(npy_longdouble x, npy_longdouble y)
-{
- __NPY_CPACK_IMP(x, y, npy_longdouble, npy_clongdouble);
-}
-#undef __NPY_CPACK_IMP
-
-/*
- * Same remark as above, but in the other direction: extract first/second
- * member of complex number, assuming a C99-compatible representation
- *
- * Those are defineds as static inline, and such as a reasonable compiler would
- * most likely compile this to one or two instructions (on CISC at least)
- */
-#define __NPY_CEXTRACT_IMP(z, index, type, ctype) \
- union { \
- ctype z; \
- type a[2]; \
- } __z_repr; \
- __z_repr.z = z; \
- \
- return __z_repr.a[index];
-
-static NPY_INLINE double npy_creal(npy_cdouble z)
-{
- __NPY_CEXTRACT_IMP(z, 0, double, npy_cdouble);
-}
-
-static NPY_INLINE double npy_cimag(npy_cdouble z)
-{
- __NPY_CEXTRACT_IMP(z, 1, double, npy_cdouble);
-}
-
-static NPY_INLINE float npy_crealf(npy_cfloat z)
-{
- __NPY_CEXTRACT_IMP(z, 0, float, npy_cfloat);
-}
-
-static NPY_INLINE float npy_cimagf(npy_cfloat z)
-{
- __NPY_CEXTRACT_IMP(z, 1, float, npy_cfloat);
-}
-
-static NPY_INLINE npy_longdouble npy_creall(npy_clongdouble z)
-{
- __NPY_CEXTRACT_IMP(z, 0, npy_longdouble, npy_clongdouble);
-}
-
-static NPY_INLINE npy_longdouble npy_cimagl(npy_clongdouble z)
-{
- __NPY_CEXTRACT_IMP(z, 1, npy_longdouble, npy_clongdouble);
-}
-#undef __NPY_CEXTRACT_IMP
-
-/*
- * Double precision complex functions
- */
-double npy_cabs(npy_cdouble z);
-double npy_carg(npy_cdouble z);
-
-npy_cdouble npy_cexp(npy_cdouble z);
-npy_cdouble npy_clog(npy_cdouble z);
-npy_cdouble npy_cpow(npy_cdouble x, npy_cdouble y);
-
-npy_cdouble npy_csqrt(npy_cdouble z);
-
-npy_cdouble npy_ccos(npy_cdouble z);
-npy_cdouble npy_csin(npy_cdouble z);
-
-/*
- * Single precision complex functions
- */
-float npy_cabsf(npy_cfloat z);
-float npy_cargf(npy_cfloat z);
-
-npy_cfloat npy_cexpf(npy_cfloat z);
-npy_cfloat npy_clogf(npy_cfloat z);
-npy_cfloat npy_cpowf(npy_cfloat x, npy_cfloat y);
-
-npy_cfloat npy_csqrtf(npy_cfloat z);
-
-npy_cfloat npy_ccosf(npy_cfloat z);
-npy_cfloat npy_csinf(npy_cfloat z);
-
-/*
- * Extended precision complex functions
- */
-npy_longdouble npy_cabsl(npy_clongdouble z);
-npy_longdouble npy_cargl(npy_clongdouble z);
-
-npy_clongdouble npy_cexpl(npy_clongdouble z);
-npy_clongdouble npy_clogl(npy_clongdouble z);
-npy_clongdouble npy_cpowl(npy_clongdouble x, npy_clongdouble y);
-
-npy_clongdouble npy_csqrtl(npy_clongdouble z);
-
-npy_clongdouble npy_ccosl(npy_clongdouble z);
-npy_clongdouble npy_csinl(npy_clongdouble z);
-
-/*
- * Functions that set the floating point error
- * status word.
- */
-
-void npy_set_floatstatus_divbyzero(void);
-void npy_set_floatstatus_overflow(void);
-void npy_set_floatstatus_underflow(void);
-void npy_set_floatstatus_invalid(void);
-
-#endif
diff --git a/include/numpy/npy_no_deprecated_api.h b/include/numpy/npy_no_deprecated_api.h
deleted file mode 100644
index 6183dc278..000000000
--- a/include/numpy/npy_no_deprecated_api.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * This include file is provided for inclusion in Cython *.pyd files where
- * one would like to define the NPY_NO_DEPRECATED_API macro. It can be
- * included by
- *
- * cdef extern from "npy_no_deprecated_api.h": pass
- *
- */
-#ifndef NPY_NO_DEPRECATED_API
-
-/* put this check here since there may be multiple includes in C extensions. */
-#if defined(NDARRAYTYPES_H) || defined(_NPY_DEPRECATED_API_H) || \
- defined(OLD_DEFINES_H)
-#error "npy_no_deprecated_api.h" must be first among numpy includes.
-#else
-#define NPY_NO_DEPRECATED_API NPY_API_VERSION
-#endif
-
-#endif
diff --git a/include/numpy/npy_os.h b/include/numpy/npy_os.h
deleted file mode 100644
index 9228c3916..000000000
--- a/include/numpy/npy_os.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef _NPY_OS_H_
-#define _NPY_OS_H_
-
-#if defined(linux) || defined(__linux) || defined(__linux__)
- #define NPY_OS_LINUX
-#elif defined(__FreeBSD__) || defined(__NetBSD__) || \
- defined(__OpenBSD__) || defined(__DragonFly__)
- #define NPY_OS_BSD
- #ifdef __FreeBSD__
- #define NPY_OS_FREEBSD
- #elif defined(__NetBSD__)
- #define NPY_OS_NETBSD
- #elif defined(__OpenBSD__)
- #define NPY_OS_OPENBSD
- #elif defined(__DragonFly__)
- #define NPY_OS_DRAGONFLY
- #endif
-#elif defined(sun) || defined(__sun)
- #define NPY_OS_SOLARIS
-#elif defined(__CYGWIN__)
- #define NPY_OS_CYGWIN
-#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
- #define NPY_OS_WIN32
-#elif defined(__APPLE__)
- #define NPY_OS_DARWIN
-#else
- #define NPY_OS_UNKNOWN
-#endif
-
-#endif
diff --git a/include/numpy/numpyconfig.h b/include/numpy/numpyconfig.h
deleted file mode 100644
index 401d19fd7..000000000
--- a/include/numpy/numpyconfig.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef _NPY_NUMPYCONFIG_H_
-#define _NPY_NUMPYCONFIG_H_
-
-#include "_numpyconfig.h"
-
-/*
- * On Mac OS X, because there is only one configuration stage for all the archs
- * in universal builds, any macro which depends on the arch needs to be
- * harcoded
- */
-#ifdef __APPLE__
- #undef NPY_SIZEOF_LONG
- #undef NPY_SIZEOF_PY_INTPTR_T
-
- #ifdef __LP64__
- #define NPY_SIZEOF_LONG 8
- #define NPY_SIZEOF_PY_INTPTR_T 8
- #else
- #define NPY_SIZEOF_LONG 4
- #define NPY_SIZEOF_PY_INTPTR_T 4
- #endif
-#endif
-
-/**
- * To help with the NPY_NO_DEPRECATED_API macro, we include API version
- * numbers for specific versions of NumPy. To exclude all API that was
- * deprecated as of 1.7, add the following before #including any NumPy
- * headers:
- * #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
- */
-#define NPY_1_7_API_VERSION 0x00000007
-
-#endif
diff --git a/include/numpy/old_defines.h b/include/numpy/old_defines.h
deleted file mode 100644
index abf81595a..000000000
--- a/include/numpy/old_defines.h
+++ /dev/null
@@ -1,187 +0,0 @@
-/* This header is deprecated as of NumPy 1.7 */
-#ifndef OLD_DEFINES_H
-#define OLD_DEFINES_H
-
-#if defined(NPY_NO_DEPRECATED_API) && NPY_NO_DEPRECATED_API >= NPY_1_7_API_VERSION
-#error The header "old_defines.h" is deprecated as of NumPy 1.7.
-#endif
-
-#define NDARRAY_VERSION NPY_VERSION
-
-#define PyArray_MIN_BUFSIZE NPY_MIN_BUFSIZE
-#define PyArray_MAX_BUFSIZE NPY_MAX_BUFSIZE
-#define PyArray_BUFSIZE NPY_BUFSIZE
-
-#define PyArray_PRIORITY NPY_PRIORITY
-#define PyArray_SUBTYPE_PRIORITY NPY_PRIORITY
-#define PyArray_NUM_FLOATTYPE NPY_NUM_FLOATTYPE
-
-#define NPY_MAX PyArray_MAX
-#define NPY_MIN PyArray_MIN
-
-#define PyArray_TYPES NPY_TYPES
-#define PyArray_BOOL NPY_BOOL
-#define PyArray_BYTE NPY_BYTE
-#define PyArray_UBYTE NPY_UBYTE
-#define PyArray_SHORT NPY_SHORT
-#define PyArray_USHORT NPY_USHORT
-#define PyArray_INT NPY_INT
-#define PyArray_UINT NPY_UINT
-#define PyArray_LONG NPY_LONG
-#define PyArray_ULONG NPY_ULONG
-#define PyArray_LONGLONG NPY_LONGLONG
-#define PyArray_ULONGLONG NPY_ULONGLONG
-#define PyArray_HALF NPY_HALF
-#define PyArray_FLOAT NPY_FLOAT
-#define PyArray_DOUBLE NPY_DOUBLE
-#define PyArray_LONGDOUBLE NPY_LONGDOUBLE
-#define PyArray_CFLOAT NPY_CFLOAT
-#define PyArray_CDOUBLE NPY_CDOUBLE
-#define PyArray_CLONGDOUBLE NPY_CLONGDOUBLE
-#define PyArray_OBJECT NPY_OBJECT
-#define PyArray_STRING NPY_STRING
-#define PyArray_UNICODE NPY_UNICODE
-#define PyArray_VOID NPY_VOID
-#define PyArray_DATETIME NPY_DATETIME
-#define PyArray_TIMEDELTA NPY_TIMEDELTA
-#define PyArray_NTYPES NPY_NTYPES
-#define PyArray_NOTYPE NPY_NOTYPE
-#define PyArray_CHAR NPY_CHAR
-#define PyArray_USERDEF NPY_USERDEF
-#define PyArray_NUMUSERTYPES NPY_NUMUSERTYPES
-
-#define PyArray_INTP NPY_INTP
-#define PyArray_UINTP NPY_UINTP
-
-#define PyArray_INT8 NPY_INT8
-#define PyArray_UINT8 NPY_UINT8
-#define PyArray_INT16 NPY_INT16
-#define PyArray_UINT16 NPY_UINT16
-#define PyArray_INT32 NPY_INT32
-#define PyArray_UINT32 NPY_UINT32
-
-#ifdef NPY_INT64
-#define PyArray_INT64 NPY_INT64
-#define PyArray_UINT64 NPY_UINT64
-#endif
-
-#ifdef NPY_INT128
-#define PyArray_INT128 NPY_INT128
-#define PyArray_UINT128 NPY_UINT128
-#endif
-
-#ifdef NPY_FLOAT16
-#define PyArray_FLOAT16 NPY_FLOAT16
-#define PyArray_COMPLEX32 NPY_COMPLEX32
-#endif
-
-#ifdef NPY_FLOAT80
-#define PyArray_FLOAT80 NPY_FLOAT80
-#define PyArray_COMPLEX160 NPY_COMPLEX160
-#endif
-
-#ifdef NPY_FLOAT96
-#define PyArray_FLOAT96 NPY_FLOAT96
-#define PyArray_COMPLEX192 NPY_COMPLEX192
-#endif
-
-#ifdef NPY_FLOAT128
-#define PyArray_FLOAT128 NPY_FLOAT128
-#define PyArray_COMPLEX256 NPY_COMPLEX256
-#endif
-
-#define PyArray_FLOAT32 NPY_FLOAT32
-#define PyArray_COMPLEX64 NPY_COMPLEX64
-#define PyArray_FLOAT64 NPY_FLOAT64
-#define PyArray_COMPLEX128 NPY_COMPLEX128
-
-
-#define PyArray_TYPECHAR NPY_TYPECHAR
-#define PyArray_BOOLLTR NPY_BOOLLTR
-#define PyArray_BYTELTR NPY_BYTELTR
-#define PyArray_UBYTELTR NPY_UBYTELTR
-#define PyArray_SHORTLTR NPY_SHORTLTR
-#define PyArray_USHORTLTR NPY_USHORTLTR
-#define PyArray_INTLTR NPY_INTLTR
-#define PyArray_UINTLTR NPY_UINTLTR
-#define PyArray_LONGLTR NPY_LONGLTR
-#define PyArray_ULONGLTR NPY_ULONGLTR
-#define PyArray_LONGLONGLTR NPY_LONGLONGLTR
-#define PyArray_ULONGLONGLTR NPY_ULONGLONGLTR
-#define PyArray_HALFLTR NPY_HALFLTR
-#define PyArray_FLOATLTR NPY_FLOATLTR
-#define PyArray_DOUBLELTR NPY_DOUBLELTR
-#define PyArray_LONGDOUBLELTR NPY_LONGDOUBLELTR
-#define PyArray_CFLOATLTR NPY_CFLOATLTR
-#define PyArray_CDOUBLELTR NPY_CDOUBLELTR
-#define PyArray_CLONGDOUBLELTR NPY_CLONGDOUBLELTR
-#define PyArray_OBJECTLTR NPY_OBJECTLTR
-#define PyArray_STRINGLTR NPY_STRINGLTR
-#define PyArray_STRINGLTR2 NPY_STRINGLTR2
-#define PyArray_UNICODELTR NPY_UNICODELTR
-#define PyArray_VOIDLTR NPY_VOIDLTR
-#define PyArray_DATETIMELTR NPY_DATETIMELTR
-#define PyArray_TIMEDELTALTR NPY_TIMEDELTALTR
-#define PyArray_CHARLTR NPY_CHARLTR
-#define PyArray_INTPLTR NPY_INTPLTR
-#define PyArray_UINTPLTR NPY_UINTPLTR
-#define PyArray_GENBOOLLTR NPY_GENBOOLLTR
-#define PyArray_SIGNEDLTR NPY_SIGNEDLTR
-#define PyArray_UNSIGNEDLTR NPY_UNSIGNEDLTR
-#define PyArray_FLOATINGLTR NPY_FLOATINGLTR
-#define PyArray_COMPLEXLTR NPY_COMPLEXLTR
-
-#define PyArray_QUICKSORT NPY_QUICKSORT
-#define PyArray_HEAPSORT NPY_HEAPSORT
-#define PyArray_MERGESORT NPY_MERGESORT
-#define PyArray_SORTKIND NPY_SORTKIND
-#define PyArray_NSORTS NPY_NSORTS
-
-#define PyArray_NOSCALAR NPY_NOSCALAR
-#define PyArray_BOOL_SCALAR NPY_BOOL_SCALAR
-#define PyArray_INTPOS_SCALAR NPY_INTPOS_SCALAR
-#define PyArray_INTNEG_SCALAR NPY_INTNEG_SCALAR
-#define PyArray_FLOAT_SCALAR NPY_FLOAT_SCALAR
-#define PyArray_COMPLEX_SCALAR NPY_COMPLEX_SCALAR
-#define PyArray_OBJECT_SCALAR NPY_OBJECT_SCALAR
-#define PyArray_SCALARKIND NPY_SCALARKIND
-#define PyArray_NSCALARKINDS NPY_NSCALARKINDS
-
-#define PyArray_ANYORDER NPY_ANYORDER
-#define PyArray_CORDER NPY_CORDER
-#define PyArray_FORTRANORDER NPY_FORTRANORDER
-#define PyArray_ORDER NPY_ORDER
-
-#define PyDescr_ISBOOL PyDataType_ISBOOL
-#define PyDescr_ISUNSIGNED PyDataType_ISUNSIGNED
-#define PyDescr_ISSIGNED PyDataType_ISSIGNED
-#define PyDescr_ISINTEGER PyDataType_ISINTEGER
-#define PyDescr_ISFLOAT PyDataType_ISFLOAT
-#define PyDescr_ISNUMBER PyDataType_ISNUMBER
-#define PyDescr_ISSTRING PyDataType_ISSTRING
-#define PyDescr_ISCOMPLEX PyDataType_ISCOMPLEX
-#define PyDescr_ISPYTHON PyDataType_ISPYTHON
-#define PyDescr_ISFLEXIBLE PyDataType_ISFLEXIBLE
-#define PyDescr_ISUSERDEF PyDataType_ISUSERDEF
-#define PyDescr_ISEXTENDED PyDataType_ISEXTENDED
-#define PyDescr_ISOBJECT PyDataType_ISOBJECT
-#define PyDescr_HASFIELDS PyDataType_HASFIELDS
-
-#define PyArray_LITTLE NPY_LITTLE
-#define PyArray_BIG NPY_BIG
-#define PyArray_NATIVE NPY_NATIVE
-#define PyArray_SWAP NPY_SWAP
-#define PyArray_IGNORE NPY_IGNORE
-
-#define PyArray_NATBYTE NPY_NATBYTE
-#define PyArray_OPPBYTE NPY_OPPBYTE
-
-#define PyArray_MAX_ELSIZE NPY_MAX_ELSIZE
-
-#define PyArray_USE_PYMEM NPY_USE_PYMEM
-
-#define PyArray_RemoveLargest PyArray_RemoveSmallest
-
-#define PyArray_UCS4 npy_ucs4
-
-#endif
diff --git a/include/numpy/oldnumeric.h b/include/numpy/oldnumeric.h
deleted file mode 100644
index 748f06da3..000000000
--- a/include/numpy/oldnumeric.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#include "arrayobject.h"
-
-#ifndef REFCOUNT
-# define REFCOUNT NPY_REFCOUNT
-# define MAX_ELSIZE 16
-#endif
-
-#define PyArray_UNSIGNED_TYPES
-#define PyArray_SBYTE NPY_BYTE
-#define PyArray_CopyArray PyArray_CopyInto
-#define _PyArray_multiply_list PyArray_MultiplyIntList
-#define PyArray_ISSPACESAVER(m) NPY_FALSE
-#define PyScalarArray_Check PyArray_CheckScalar
-
-#define CONTIGUOUS NPY_CONTIGUOUS
-#define OWN_DIMENSIONS 0
-#define OWN_STRIDES 0
-#define OWN_DATA NPY_OWNDATA
-#define SAVESPACE 0
-#define SAVESPACEBIT 0
-
-#undef import_array
-#define import_array() { if (_import_array() < 0) {PyErr_Print(); PyErr_SetString(PyExc_ImportError, "numpy.core.multiarray failed to import"); } }
diff --git a/include/numpy/ufunc_api.txt b/include/numpy/ufunc_api.txt
deleted file mode 100644
index 3365433cd..000000000
--- a/include/numpy/ufunc_api.txt
+++ /dev/null
@@ -1,312 +0,0 @@
-
-=================
-Numpy Ufunc C-API
-=================
-::
-
- PyObject *
- PyUFunc_FromFuncAndData(PyUFuncGenericFunction *func, void
- **data, char *types, int ntypes, int nin, int
- nout, int identity, char *name, char *doc, int
- check_return)
-
-
-::
-
- int
- PyUFunc_RegisterLoopForType(PyUFuncObject *ufunc, int
- usertype, PyUFuncGenericFunction
- function, int *arg_types, void *data)
-
-
-::
-
- int
- PyUFunc_GenericFunction(PyUFuncObject *ufunc, PyObject *args, PyObject
- *kwds, PyArrayObject **op)
-
-
-This generic function is called with the ufunc object, the arguments to it,
-and an array of (pointers to) PyArrayObjects which are NULL.
-
-'op' is an array of at least NPY_MAXARGS PyArrayObject *.
-
-::
-
- void
- PyUFunc_f_f_As_d_d(char **args, npy_intp *dimensions, npy_intp
- *steps, void *func)
-
-
-::
-
- void
- PyUFunc_d_d(char **args, npy_intp *dimensions, npy_intp *steps, void
- *func)
-
-
-::
-
- void
- PyUFunc_f_f(char **args, npy_intp *dimensions, npy_intp *steps, void
- *func)
-
-
-::
-
- void
- PyUFunc_g_g(char **args, npy_intp *dimensions, npy_intp *steps, void
- *func)
-
-
-::
-
- void
- PyUFunc_F_F_As_D_D(char **args, npy_intp *dimensions, npy_intp
- *steps, void *func)
-
-
-::
-
- void
- PyUFunc_F_F(char **args, npy_intp *dimensions, npy_intp *steps, void
- *func)
-
-
-::
-
- void
- PyUFunc_D_D(char **args, npy_intp *dimensions, npy_intp *steps, void
- *func)
-
-
-::
-
- void
- PyUFunc_G_G(char **args, npy_intp *dimensions, npy_intp *steps, void
- *func)
-
-
-::
-
- void
- PyUFunc_O_O(char **args, npy_intp *dimensions, npy_intp *steps, void
- *func)
-
-
-::
-
- void
- PyUFunc_ff_f_As_dd_d(char **args, npy_intp *dimensions, npy_intp
- *steps, void *func)
-
-
-::
-
- void
- PyUFunc_ff_f(char **args, npy_intp *dimensions, npy_intp *steps, void
- *func)
-
-
-::
-
- void
- PyUFunc_dd_d(char **args, npy_intp *dimensions, npy_intp *steps, void
- *func)
-
-
-::
-
- void
- PyUFunc_gg_g(char **args, npy_intp *dimensions, npy_intp *steps, void
- *func)
-
-
-::
-
- void
- PyUFunc_FF_F_As_DD_D(char **args, npy_intp *dimensions, npy_intp
- *steps, void *func)
-
-
-::
-
- void
- PyUFunc_DD_D(char **args, npy_intp *dimensions, npy_intp *steps, void
- *func)
-
-
-::
-
- void
- PyUFunc_FF_F(char **args, npy_intp *dimensions, npy_intp *steps, void
- *func)
-
-
-::
-
- void
- PyUFunc_GG_G(char **args, npy_intp *dimensions, npy_intp *steps, void
- *func)
-
-
-::
-
- void
- PyUFunc_OO_O(char **args, npy_intp *dimensions, npy_intp *steps, void
- *func)
-
-
-::
-
- void
- PyUFunc_O_O_method(char **args, npy_intp *dimensions, npy_intp
- *steps, void *func)
-
-
-::
-
- void
- PyUFunc_OO_O_method(char **args, npy_intp *dimensions, npy_intp
- *steps, void *func)
-
-
-::
-
- void
- PyUFunc_On_Om(char **args, npy_intp *dimensions, npy_intp *steps, void
- *func)
-
-
-::
-
- int
- PyUFunc_GetPyValues(char *name, int *bufsize, int *errmask, PyObject
- **errobj)
-
-
-On return, if errobj is populated with a non-NULL value, the caller
-owns a new reference to errobj.
-
-::
-
- int
- PyUFunc_checkfperr(int errmask, PyObject *errobj, int *first)
-
-
-::
-
- void
- PyUFunc_clearfperr()
-
-
-::
-
- int
- PyUFunc_getfperr(void )
-
-
-::
-
- int
- PyUFunc_handlefperr(int errmask, PyObject *errobj, int retstatus, int
- *first)
-
-
-::
-
- int
- PyUFunc_ReplaceLoopBySignature(PyUFuncObject
- *func, PyUFuncGenericFunction
- newfunc, int
- *signature, PyUFuncGenericFunction
- *oldfunc)
-
-
-::
-
- PyObject *
- PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void
- **data, char *types, int
- ntypes, int nin, int nout, int
- identity, char *name, char
- *doc, int check_return, const char
- *signature)
-
-
-::
-
- int
- PyUFunc_SetUsesArraysAsData(void **data, size_t i)
-
-
-::
-
- void
- PyUFunc_e_e(char **args, npy_intp *dimensions, npy_intp *steps, void
- *func)
-
-
-::
-
- void
- PyUFunc_e_e_As_f_f(char **args, npy_intp *dimensions, npy_intp
- *steps, void *func)
-
-
-::
-
- void
- PyUFunc_e_e_As_d_d(char **args, npy_intp *dimensions, npy_intp
- *steps, void *func)
-
-
-::
-
- void
- PyUFunc_ee_e(char **args, npy_intp *dimensions, npy_intp *steps, void
- *func)
-
-
-::
-
- void
- PyUFunc_ee_e_As_ff_f(char **args, npy_intp *dimensions, npy_intp
- *steps, void *func)
-
-
-::
-
- void
- PyUFunc_ee_e_As_dd_d(char **args, npy_intp *dimensions, npy_intp
- *steps, void *func)
-
-
-::
-
- int
- PyUFunc_DefaultTypeResolver(PyUFuncObject *ufunc, NPY_CASTING
- casting, PyArrayObject
- **operands, PyObject
- *type_tup, PyArray_Descr **out_dtypes)
-
-
-This function applies the default type resolution rules
-for the provided ufunc.
-
-Returns 0 on success, -1 on error.
-
-::
-
- int
- PyUFunc_ValidateCasting(PyUFuncObject *ufunc, NPY_CASTING
- casting, PyArrayObject
- **operands, PyArray_Descr **dtypes)
-
-
-Validates that the input operands can be cast to
-the input types, and the output types can be cast to
-the output operands where provided.
-
-Returns 0 on success, -1 (with exception raised) on validation failure.
-
diff --git a/include/numpy/ufuncobject.h b/include/numpy/ufuncobject.h
deleted file mode 100644
index 95afd5aa2..000000000
--- a/include/numpy/ufuncobject.h
+++ /dev/null
@@ -1,446 +0,0 @@
-#ifndef Py_UFUNCOBJECT_H
-#define Py_UFUNCOBJECT_H
-
-#include
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * The legacy generic inner loop for a standard element-wise or
- * generalized ufunc.
- */
-typedef void (*PyUFuncGenericFunction)
- (char **args,
- npy_intp *dimensions,
- npy_intp *strides,
- void *innerloopdata);
-
-/*
- * The most generic one-dimensional inner loop for
- * a standard element-wise ufunc. This typedef is also
- * more consistent with the other NumPy function pointer typedefs
- * than PyUFuncGenericFunction.
- */
-typedef void (PyUFunc_StridedInnerLoopFunc)(
- char **dataptrs, npy_intp *strides,
- npy_intp count,
- NpyAuxData *innerloopdata);
-
-/*
- * The most generic one-dimensional inner loop for
- * a masked standard element-wise ufunc. "Masked" here means that it skips
- * doing calculations on any items for which the maskptr array has a true
- * value.
- */
-typedef void (PyUFunc_MaskedStridedInnerLoopFunc)(
- char **dataptrs, npy_intp *strides,
- char *maskptr, npy_intp mask_stride,
- npy_intp count,
- NpyAuxData *innerloopdata);
-
-/* Forward declaration for the type resolver and loop selector typedefs */
-struct _tagPyUFuncObject;
-
-/*
- * Given the operands for calling a ufunc, should determine the
- * calculation input and output data types and return an inner loop function.
- * This function should validate that the casting rule is being followed,
- * and fail if it is not.
- *
- * For backwards compatibility, the regular type resolution function does not
- * support auxiliary data with object semantics. The type resolution call
- * which returns a masked generic function returns a standard NpyAuxData
- * object, for which the NPY_AUXDATA_FREE and NPY_AUXDATA_CLONE macros
- * work.
- *
- * ufunc: The ufunc object.
- * casting: The 'casting' parameter provided to the ufunc.
- * operands: An array of length (ufunc->nin + ufunc->nout),
- * with the output parameters possibly NULL.
- * type_tup: Either NULL, or the type_tup passed to the ufunc.
- * out_dtypes: An array which should be populated with new
- * references to (ufunc->nin + ufunc->nout) new
- * dtypes, one for each input and output. These
- * dtypes should all be in native-endian format.
- *
- * Should return 0 on success, -1 on failure (with exception set),
- * or -2 if Py_NotImplemented should be returned.
- */
-typedef int (PyUFunc_TypeResolutionFunc)(
- struct _tagPyUFuncObject *ufunc,
- NPY_CASTING casting,
- PyArrayObject **operands,
- PyObject *type_tup,
- PyArray_Descr **out_dtypes);
-
-/*
- * Given an array of DTypes as returned by the PyUFunc_TypeResolutionFunc,
- * and an array of fixed strides (the array will contain NPY_MAX_INTP for
- * strides which are not necessarily fixed), returns an inner loop
- * with associated auxiliary data.
- *
- * For backwards compatibility, there is a variant of the inner loop
- * selection which returns an inner loop irrespective of the strides,
- * and with a void* static auxiliary data instead of an NpyAuxData *
- * dynamically allocatable auxiliary data.
- *
- * ufunc: The ufunc object.
- * dtypes: An array which has been populated with dtypes,
- * in most cases by the type resolution funciton
- * for the same ufunc.
- * fixed_strides: For each input/output, either the stride that
- * will be used every time the function is called
- * or NPY_MAX_INTP if the stride might change or
- * is not known ahead of time. The loop selection
- * function may use this stride to pick inner loops
- * which are optimized for contiguous or 0-stride
- * cases.
- * out_innerloop: Should be populated with the correct ufunc inner
- * loop for the given type.
- * out_innerloopdata: Should be populated with the void* data to
- * be passed into the out_innerloop function.
- * out_needs_api: If the inner loop needs to use the Python API,
- * should set the to 1, otherwise should leave
- * this untouched.
- */
-typedef int (PyUFunc_LegacyInnerLoopSelectionFunc)(
- struct _tagPyUFuncObject *ufunc,
- PyArray_Descr **dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata,
- int *out_needs_api);
-typedef int (PyUFunc_InnerLoopSelectionFunc)(
- struct _tagPyUFuncObject *ufunc,
- PyArray_Descr **dtypes,
- npy_intp *fixed_strides,
- PyUFunc_StridedInnerLoopFunc **out_innerloop,
- NpyAuxData **out_innerloopdata,
- int *out_needs_api);
-typedef int (PyUFunc_MaskedInnerLoopSelectionFunc)(
- struct _tagPyUFuncObject *ufunc,
- PyArray_Descr **dtypes,
- PyArray_Descr *mask_dtype,
- npy_intp *fixed_strides,
- npy_intp fixed_mask_stride,
- PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop,
- NpyAuxData **out_innerloopdata,
- int *out_needs_api);
-
-typedef struct _tagPyUFuncObject {
- PyObject_HEAD
- /*
- * nin: Number of inputs
- * nout: Number of outputs
- * nargs: Always nin + nout (Why is it stored?)
- */
- int nin, nout, nargs;
-
- /* Identity for reduction, either PyUFunc_One or PyUFunc_Zero */
- int identity;
-
- /* Array of one-dimensional core loops */
- PyUFuncGenericFunction *functions;
- /* Array of funcdata that gets passed into the functions */
- void **data;
- /* The number of elements in 'functions' and 'data' */
- int ntypes;
-
- /* Does not appear to be used */
- int check_return;
-
- /* The name of the ufunc */
- char *name;
-
- /* Array of type numbers, of size ('nargs' * 'ntypes') */
- char *types;
-
- /* Documentation string */
- char *doc;
-
- void *ptr;
- PyObject *obj;
- PyObject *userloops;
-
- /* generalized ufunc parameters */
-
- /* 0 for scalar ufunc; 1 for generalized ufunc */
- int core_enabled;
- /* number of distinct dimension names in signature */
- int core_num_dim_ix;
-
- /*
- * dimension indices of input/output argument k are stored in
- * core_dim_ixs[core_offsets[k]..core_offsets[k]+core_num_dims[k]-1]
- */
-
- /* numbers of core dimensions of each argument */
- int *core_num_dims;
- /*
- * dimension indices in a flatted form; indices
- * are in the range of [0,core_num_dim_ix)
- */
- int *core_dim_ixs;
- /*
- * positions of 1st core dimensions of each
- * argument in core_dim_ixs
- */
- int *core_offsets;
- /* signature string for printing purpose */
- char *core_signature;
-
- /*
- * A function which resolves the types and fills an array
- * with the dtypes for the inputs and outputs.
- */
- PyUFunc_TypeResolutionFunc *type_resolver;
- /*
- * A function which returns an inner loop written for
- * NumPy 1.6 and earlier ufuncs. This is for backwards
- * compatibility, and may be NULL if inner_loop_selector
- * is specified.
- */
- PyUFunc_LegacyInnerLoopSelectionFunc *legacy_inner_loop_selector;
- /*
- * A function which returns an inner loop for the new mechanism
- * in NumPy 1.7 and later. If provided, this is used, otherwise
- * if NULL the legacy_inner_loop_selector is used instead.
- */
- PyUFunc_InnerLoopSelectionFunc *inner_loop_selector;
- /*
- * A function which returns a masked inner loop for the ufunc.
- */
- PyUFunc_MaskedInnerLoopSelectionFunc *masked_inner_loop_selector;
-} PyUFuncObject;
-
-#include "arrayobject.h"
-
-#define UFUNC_ERR_IGNORE 0
-#define UFUNC_ERR_WARN 1
-#define UFUNC_ERR_RAISE 2
-#define UFUNC_ERR_CALL 3
-#define UFUNC_ERR_PRINT 4
-#define UFUNC_ERR_LOG 5
-
- /* Python side integer mask */
-
-#define UFUNC_MASK_DIVIDEBYZERO 0x07
-#define UFUNC_MASK_OVERFLOW 0x3f
-#define UFUNC_MASK_UNDERFLOW 0x1ff
-#define UFUNC_MASK_INVALID 0xfff
-
-#define UFUNC_SHIFT_DIVIDEBYZERO 0
-#define UFUNC_SHIFT_OVERFLOW 3
-#define UFUNC_SHIFT_UNDERFLOW 6
-#define UFUNC_SHIFT_INVALID 9
-
-
-/* platform-dependent code translates floating point
- status to an integer sum of these values
-*/
-#define UFUNC_FPE_DIVIDEBYZERO 1
-#define UFUNC_FPE_OVERFLOW 2
-#define UFUNC_FPE_UNDERFLOW 4
-#define UFUNC_FPE_INVALID 8
-
-/* Error mode that avoids look-up (no checking) */
-#define UFUNC_ERR_DEFAULT 0
-
-#define UFUNC_OBJ_ISOBJECT 1
-#define UFUNC_OBJ_NEEDS_API 2
-
- /* Default user error mode */
-#define UFUNC_ERR_DEFAULT2 \
- (UFUNC_ERR_WARN << UFUNC_SHIFT_DIVIDEBYZERO) + \
- (UFUNC_ERR_WARN << UFUNC_SHIFT_OVERFLOW) + \
- (UFUNC_ERR_WARN << UFUNC_SHIFT_INVALID)
-
-#if NPY_ALLOW_THREADS
-#define NPY_LOOP_BEGIN_THREADS do {if (!(loop->obj & UFUNC_OBJ_NEEDS_API)) _save = PyEval_SaveThread();} while (0);
-#define NPY_LOOP_END_THREADS do {if (!(loop->obj & UFUNC_OBJ_NEEDS_API)) PyEval_RestoreThread(_save);} while (0);
-#else
-#define NPY_LOOP_BEGIN_THREADS
-#define NPY_LOOP_END_THREADS
-#endif
-
-/*
- * UFunc has unit of 1, and the order of operations can be reordered
- * This case allows reduction with multiple axes at once.
- */
-#define PyUFunc_One 1
-/*
- * UFunc has unit of 0, and the order of operations can be reordered
- * This case allows reduction with multiple axes at once.
- */
-#define PyUFunc_Zero 0
-/*
- * UFunc has no unit, and the order of operations cannot be reordered.
- * This case does not allow reduction with multiple axes at once.
- */
-#define PyUFunc_None -1
-/*
- * UFunc has no unit, and the order of operations can be reordered
- * This case allows reduction with multiple axes at once.
- */
-#define PyUFunc_ReorderableNone -2
-
-#define UFUNC_REDUCE 0
-#define UFUNC_ACCUMULATE 1
-#define UFUNC_REDUCEAT 2
-#define UFUNC_OUTER 3
-
-
-typedef struct {
- int nin;
- int nout;
- PyObject *callable;
-} PyUFunc_PyFuncData;
-
-/* A linked-list of function information for
- user-defined 1-d loops.
- */
-typedef struct _loop1d_info {
- PyUFuncGenericFunction func;
- void *data;
- int *arg_types;
- struct _loop1d_info *next;
-} PyUFunc_Loop1d;
-
-
-#include "__ufunc_api.h"
-
-#define UFUNC_PYVALS_NAME "UFUNC_PYVALS"
-
-#define UFUNC_CHECK_ERROR(arg) \
- do {if ((((arg)->obj & UFUNC_OBJ_NEEDS_API) && PyErr_Occurred()) || \
- ((arg)->errormask && \
- PyUFunc_checkfperr((arg)->errormask, \
- (arg)->errobj, \
- &(arg)->first))) \
- goto fail;} while (0)
-
-/* This code checks the IEEE status flags in a platform-dependent way */
-/* Adapted from Numarray */
-
-#if (defined(__unix__) || defined(unix)) && !defined(USG)
-#include
-#endif
-
-/* OSF/Alpha (Tru64) ---------------------------------------------*/
-#if defined(__osf__) && defined(__alpha)
-
-#include
-
-#define UFUNC_CHECK_STATUS(ret) { \
- unsigned long fpstatus; \
- \
- fpstatus = ieee_get_fp_control(); \
- /* clear status bits as well as disable exception mode if on */ \
- ieee_set_fp_control( 0 ); \
- ret = ((IEEE_STATUS_DZE & fpstatus) ? UFUNC_FPE_DIVIDEBYZERO : 0) \
- | ((IEEE_STATUS_OVF & fpstatus) ? UFUNC_FPE_OVERFLOW : 0) \
- | ((IEEE_STATUS_UNF & fpstatus) ? UFUNC_FPE_UNDERFLOW : 0) \
- | ((IEEE_STATUS_INV & fpstatus) ? UFUNC_FPE_INVALID : 0); \
- }
-
-/* MS Windows -----------------------------------------------------*/
-#elif defined(_MSC_VER)
-
-#include
-
- /* Clear the floating point exception default of Borland C++ */
-#if defined(__BORLANDC__)
-#define UFUNC_NOFPE _control87(MCW_EM, MCW_EM);
-#endif
-
-#define UFUNC_CHECK_STATUS(ret) { \
- int fpstatus = (int) _clearfp(); \
- \
- ret = ((SW_ZERODIVIDE & fpstatus) ? UFUNC_FPE_DIVIDEBYZERO : 0) \
- | ((SW_OVERFLOW & fpstatus) ? UFUNC_FPE_OVERFLOW : 0) \
- | ((SW_UNDERFLOW & fpstatus) ? UFUNC_FPE_UNDERFLOW : 0) \
- | ((SW_INVALID & fpstatus) ? UFUNC_FPE_INVALID : 0); \
- }
-
-/* Solaris --------------------------------------------------------*/
-/* --------ignoring SunOS ieee_flags approach, someone else can
-** deal with that! */
-#elif defined(sun) || defined(__BSD__) || defined(__OpenBSD__) || \
- (defined(__FreeBSD__) && (__FreeBSD_version < 502114)) || \
- defined(__NetBSD__)
-#include
-
-#define UFUNC_CHECK_STATUS(ret) { \
- int fpstatus; \
- \
- fpstatus = (int) fpgetsticky(); \
- ret = ((FP_X_DZ & fpstatus) ? UFUNC_FPE_DIVIDEBYZERO : 0) \
- | ((FP_X_OFL & fpstatus) ? UFUNC_FPE_OVERFLOW : 0) \
- | ((FP_X_UFL & fpstatus) ? UFUNC_FPE_UNDERFLOW : 0) \
- | ((FP_X_INV & fpstatus) ? UFUNC_FPE_INVALID : 0); \
- (void) fpsetsticky(0); \
- }
-
-#elif defined(__GLIBC__) || defined(__APPLE__) || \
- defined(__CYGWIN__) || defined(__MINGW32__) || \
- (defined(__FreeBSD__) && (__FreeBSD_version >= 502114))
-
-#if defined(__GLIBC__) || defined(__APPLE__) || \
- defined(__MINGW32__) || defined(__FreeBSD__)
-#include
-#endif
-
-#define UFUNC_CHECK_STATUS(ret) { \
- int fpstatus = (int) fetestexcept(FE_DIVBYZERO | FE_OVERFLOW | \
- FE_UNDERFLOW | FE_INVALID); \
- ret = ((FE_DIVBYZERO & fpstatus) ? UFUNC_FPE_DIVIDEBYZERO : 0) \
- | ((FE_OVERFLOW & fpstatus) ? UFUNC_FPE_OVERFLOW : 0) \
- | ((FE_UNDERFLOW & fpstatus) ? UFUNC_FPE_UNDERFLOW : 0) \
- | ((FE_INVALID & fpstatus) ? UFUNC_FPE_INVALID : 0); \
- (void) feclearexcept(FE_DIVBYZERO | FE_OVERFLOW | \
- FE_UNDERFLOW | FE_INVALID); \
-}
-
-#elif defined(_AIX)
-
-#include
-#include
-
-#define UFUNC_CHECK_STATUS(ret) { \
- fpflag_t fpstatus; \
- \
- fpstatus = fp_read_flag(); \
- ret = ((FP_DIV_BY_ZERO & fpstatus) ? UFUNC_FPE_DIVIDEBYZERO : 0) \
- | ((FP_OVERFLOW & fpstatus) ? UFUNC_FPE_OVERFLOW : 0) \
- | ((FP_UNDERFLOW & fpstatus) ? UFUNC_FPE_UNDERFLOW : 0) \
- | ((FP_INVALID & fpstatus) ? UFUNC_FPE_INVALID : 0); \
- fp_swap_flag(0); \
-}
-
-#else
-
-#define NO_FLOATING_POINT_SUPPORT
-#define UFUNC_CHECK_STATUS(ret) { \
- ret = 0; \
- }
-
-#endif
-
-/*
- * THESE MACROS ARE DEPRECATED.
- * Use npy_set_floatstatus_* in the npymath library.
- */
-#define generate_divbyzero_error() npy_set_floatstatus_divbyzero()
-#define generate_overflow_error() npy_set_floatstatus_overflow()
-
- /* Make sure it gets defined if it isn't already */
-#ifndef UFUNC_NOFPE
-#define UFUNC_NOFPE
-#endif
-
-
-#ifdef __cplusplus
-}
-#endif
-#endif /* !Py_UFUNCOBJECT_H */
diff --git a/include/numpy/utils.h b/include/numpy/utils.h
deleted file mode 100644
index cc968a354..000000000
--- a/include/numpy/utils.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef __NUMPY_UTILS_HEADER__
-#define __NUMPY_UTILS_HEADER__
-
-#ifndef __COMP_NPY_UNUSED
- #if defined(__GNUC__)
- #define __COMP_NPY_UNUSED __attribute__ ((__unused__))
- # elif defined(__ICC)
- #define __COMP_NPY_UNUSED __attribute__ ((__unused__))
- #else
- #define __COMP_NPY_UNUSED
- #endif
-#endif
-
-/* Use this to tag a variable as not used. It will remove unused variable
- * warning on support platforms (see __COM_NPY_UNUSED) and mangle the variable
- * to avoid accidental use */
-#define NPY_UNUSED(x) (__NPY_UNUSED_TAGGED ## x) __COMP_NPY_UNUSED
-
-#endif
diff --git a/netlify.toml b/netlify.toml
index 9cb11ae81..3c17b876c 100644
--- a/netlify.toml
+++ b/netlify.toml
@@ -24,7 +24,7 @@ redirects = [
{from = "/docs/usage/customizing-tokenizer", to = "/usage/linguistic-features#tokenization", force = true},
{from = "/docs/usage/language-processing-pipeline", to = "/usage/processing-pipelines", force = true},
{from = "/docs/usage/customizing-pipeline", to = "/usage/processing-pipelines", force = true},
- {from = "/docs/usage/training-ner", to = "/usage/training#ner", force = true},
+ {from = "/docs/usage/training-ner", to = "/usage/training", force = true},
{from = "/docs/usage/tutorials", to = "/usage/examples", force = true},
{from = "/docs/usage/data-model", to = "/api", force = true},
{from = "/docs/usage/cli", to = "/api/cli", force = true},
@@ -36,8 +36,15 @@ redirects = [
{from = "/docs/api/features", to = "/models/#architecture", force = true},
{from = "/docs/api/philosophy", to = "/usage/spacy-101", force = true},
{from = "/docs/usage/showcase", to = "/universe", force = true},
- {from = "/tutorials/load-new-word-vectors", to = "/usage/vectors-similarity#custom", force = true},
+ {from = "/tutorials/load-new-word-vectors", to = "/usage/linguistic-features", force = true},
{from = "/tutorials", to = "/usage/examples", force = true},
+ # Old documentation pages (v2.x)
+ {from = "/usage/adding-languages", to = "/usage/linguistic-features", force = true},
+ {from = "/usage/vectors-similarity", to = "/usage/linguistic-features#vectors-similarity", force = true},
+ {from = "/api/goldparse", to = "/api/top-level", force = true},
+ {from = "/api/goldcorpus", to = "/api/corpus", force = true},
+ {from = "/api/annotation", to = "/api/data-formats", force = true},
+ {from = "/usage/examples", to = "/usage/projects", force = true},
# Rewrite all other docs pages to /
{from = "/docs/*", to = "/:splat"},
# Updated documentation pages
diff --git a/pyproject.toml b/pyproject.toml
index fe66494ff..d23730b00 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,6 +6,9 @@ requires = [
"cymem>=2.0.2,<2.1.0",
"preshed>=3.0.2,<3.1.0",
"murmurhash>=0.28.0,<1.1.0",
- "thinc==7.4.1",
+ "thinc>=8.0.0a30,<8.0.0a40",
+ "blis>=0.4.0,<0.5.0",
+ "pytokenizations",
+ "pathy"
]
build-backend = "setuptools.build_meta"
diff --git a/requirements.txt b/requirements.txt
index b93def651..9b108de8d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,23 +1,29 @@
# Our libraries
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
-thinc==7.4.1
+thinc>=8.0.0a30,<8.0.0a40
blis>=0.4.0,<0.5.0
+ml_datasets>=0.1.1
murmurhash>=0.28.0,<1.1.0
-wasabi>=0.4.0,<1.1.0
-srsly>=1.0.2,<1.1.0
-catalogue>=0.0.7,<1.1.0
+wasabi>=0.8.0,<1.1.0
+srsly>=2.1.0,<3.0.0
+catalogue>=2.0.1,<2.1.0
+typer>=0.3.0,<0.4.0
+pathy
# Third party dependencies
numpy>=1.15.0
requests>=2.13.0,<3.0.0
-plac>=0.9.6,<1.2.0
-pathlib==1.0.1; python_version < "3.4"
tqdm>=4.38.0,<5.0.0
-# Optional dependencies
-jsonschema>=2.6.0,<3.1.0
+pydantic>=1.3.0,<2.0.0
+pytokenizations
+# Official Python utilities
+setuptools
+packaging
+importlib_metadata>=0.20; python_version < "3.8"
# Development dependencies
cython>=0.25
pytest>=4.6.5
pytest-timeout>=1.3.0,<2.0.0
mock>=2.0.0,<3.0.0
flake8>=3.5.0,<3.6.0
+jinja2
diff --git a/setup.cfg b/setup.cfg
index 9bd45d45d..fc33abedb 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -16,10 +16,7 @@ classifiers =
Operating System :: MacOS :: MacOS X
Operating System :: Microsoft :: Windows
Programming Language :: Cython
- Programming Language :: Python :: 2
- Programming Language :: Python :: 2.7
Programming Language :: Python :: 3
- Programming Language :: Python :: 3.5
Programming Language :: Python :: 3.6
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
@@ -28,54 +25,62 @@ classifiers =
[options]
zip_safe = false
include_package_data = true
-scripts =
- bin/spacy
-python_requires = >=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*
+python_requires = >=3.6
setup_requires =
wheel
cython>=0.25
+ numpy>=1.15.0
# We also need our Cython packages here to compile against
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
murmurhash>=0.28.0,<1.1.0
- thinc==7.4.1
+ thinc>=8.0.0a30,<8.0.0a40
install_requires =
# Our libraries
murmurhash>=0.28.0,<1.1.0
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0
- thinc==7.4.1
+ thinc>=8.0.0a30,<8.0.0a40
blis>=0.4.0,<0.5.0
- wasabi>=0.4.0,<1.1.0
- srsly>=1.0.2,<1.1.0
- catalogue>=0.0.7,<1.1.0
+ wasabi>=0.8.0,<1.1.0
+ srsly>=2.1.0,<3.0.0
+ catalogue>=2.0.1,<2.1.0
+ typer>=0.3.0,<0.4.0
+ pathy
# Third-party dependencies
tqdm>=4.38.0,<5.0.0
- setuptools
numpy>=1.15.0
- plac>=0.9.6,<1.2.0
requests>=2.13.0,<3.0.0
- pathlib==1.0.1; python_version < "3.4"
+ pydantic>=1.3.0,<2.0.0
+ pytokenizations
+ # Official Python utilities
+ setuptools
+ packaging
+ importlib_metadata>=0.20; python_version < "3.8"
+
+[options.entry_points]
+console_scripts =
+ spacy = spacy.cli:app
[options.extras_require]
lookups =
spacy_lookups_data>=0.3.2,<0.4.0
cuda =
- cupy>=5.0.0b4,<8.0.0
+ cupy>=5.0.0b4,<9.0.0
cuda80 =
- cupy-cuda80>=5.0.0b4,<8.0.0
+ cupy-cuda80>=5.0.0b4,<9.0.0
cuda90 =
- cupy-cuda90>=5.0.0b4,<8.0.0
+ cupy-cuda90>=5.0.0b4,<9.0.0
cuda91 =
- cupy-cuda91>=5.0.0b4,<8.0.0
+ cupy-cuda91>=5.0.0b4,<9.0.0
cuda92 =
- cupy-cuda92>=5.0.0b4,<8.0.0
+ cupy-cuda92>=5.0.0b4,<9.0.0
cuda100 =
- cupy-cuda100>=5.0.0b4,<8.0.0
+ cupy-cuda100>=5.0.0b4,<9.0.0
cuda101 =
- cupy-cuda101>=5.0.0b4,<8.0.0
+ cupy-cuda101>=5.0.0b4,<9.0.0
cuda102 =
- cupy-cuda102>=5.0.0b4,<8.0.0
+ cupy-cuda102>=5.0.0b4,<9.0.0
# Language tokenizers with external dependencies
ja =
sudachipy>=0.4.5
@@ -100,8 +105,12 @@ exclude =
.git,
__pycache__,
_tokenizer_exceptions_list.py,
- spacy/__init__.py
[tool:pytest]
markers =
slow
+
+[mypy]
+ignore_missing_imports = True
+no_implicit_optional = True
+plugins = pydantic.mypy, thinc.mypy
diff --git a/setup.py b/setup.py
index f78781918..d448a262c 100755
--- a/setup.py
+++ b/setup.py
@@ -1,55 +1,54 @@
#!/usr/bin/env python
-from __future__ import print_function
-import io
-import os
-import subprocess
+from setuptools import Extension, setup, find_packages
import sys
-import contextlib
+import platform
from distutils.command.build_ext import build_ext
from distutils.sysconfig import get_python_inc
-import distutils.util
-from distutils import ccompiler, msvccompiler
-from setuptools import Extension, setup, find_packages
+import numpy
+from pathlib import Path
+import shutil
+from Cython.Build import cythonize
+from Cython.Compiler import Options
+import os
+import subprocess
-def is_new_osx():
- """Check whether we're on OSX >= 10.10"""
- name = distutils.util.get_platform()
- if sys.platform != "darwin":
- return False
- elif name.startswith("macosx-10"):
- minor_version = int(name.split("-")[1].split(".")[1])
- if minor_version >= 7:
- return True
- else:
- return False
- else:
- return False
+ROOT = Path(__file__).parent
+PACKAGE_ROOT = ROOT / "spacy"
+# Preserve `__doc__` on functions and classes
+# http://docs.cython.org/en/latest/src/userguide/source_files_and_compilation.html#compiler-options
+Options.docstrings = True
+
PACKAGES = find_packages()
-
-
MOD_NAMES = [
+ "spacy.gold.example",
"spacy.parts_of_speech",
"spacy.strings",
"spacy.lexeme",
"spacy.vocab",
"spacy.attrs",
"spacy.kb",
+ "spacy.ml.parser_model",
"spacy.morphology",
- "spacy.pipeline.pipes",
+ "spacy.pipeline.dep_parser",
"spacy.pipeline.morphologizer",
- "spacy.syntax.stateclass",
- "spacy.syntax._state",
+ "spacy.pipeline.multitask",
+ "spacy.pipeline.ner",
+ "spacy.pipeline.pipe",
+ "spacy.pipeline.sentencizer",
+ "spacy.pipeline.senter",
+ "spacy.pipeline.tagger",
+ "spacy.pipeline.transition_parser",
+ "spacy.pipeline._parser_internals.arc_eager",
+ "spacy.pipeline._parser_internals.ner",
+ "spacy.pipeline._parser_internals.nonproj",
+ "spacy.pipeline._parser_internals._state",
+ "spacy.pipeline._parser_internals.stateclass",
+ "spacy.pipeline._parser_internals.transition_system",
"spacy.tokenizer",
- "spacy.syntax.nn_parser",
- "spacy.syntax._parser_model",
- "spacy.syntax._beam_utils",
- "spacy.syntax.nonproj",
- "spacy.syntax.transition_system",
- "spacy.syntax.arc_eager",
- "spacy.gold",
+ "spacy.gold.gold_io",
"spacy.tokens.doc",
"spacy.tokens.span",
"spacy.tokens.token",
@@ -58,20 +57,40 @@ MOD_NAMES = [
"spacy.matcher.matcher",
"spacy.matcher.phrasematcher",
"spacy.matcher.dependencymatcher",
- "spacy.syntax.ner",
"spacy.symbols",
"spacy.vectors",
]
-
-
COMPILE_OPTIONS = {
"msvc": ["/Ox", "/EHsc"],
"mingw32": ["-O2", "-Wno-strict-prototypes", "-Wno-unused-function"],
"other": ["-O2", "-Wno-strict-prototypes", "-Wno-unused-function"],
}
-
-
LINK_OPTIONS = {"msvc": [], "mingw32": [], "other": []}
+COMPILER_DIRECTIVES = {
+ "language_level": -3,
+ "embedsignature": True,
+ "annotation_typing": False,
+}
+# Files to copy into the package that are otherwise not included
+COPY_FILES = {
+ ROOT / "setup.cfg": PACKAGE_ROOT / "tests" / "package",
+ ROOT / "pyproject.toml": PACKAGE_ROOT / "tests" / "package",
+ ROOT / "requirements.txt": PACKAGE_ROOT / "tests" / "package",
+}
+
+
+def is_new_osx():
+ """Check whether we're on OSX >= 10.7"""
+ if sys.platform != "darwin":
+ return False
+ mac_ver = platform.mac_ver()[0]
+ if mac_ver.startswith("10"):
+ minor_version = int(mac_ver.split(".")[1])
+ if minor_version >= 7:
+ return True
+ else:
+ return False
+ return False
if is_new_osx():
@@ -104,20 +123,6 @@ class build_ext_subclass(build_ext, build_ext_options):
build_ext.build_extensions(self)
-def generate_cython(root, source):
- print("Cythonizing sources")
- p = subprocess.call(
- [sys.executable, os.path.join(root, "bin", "cythonize.py"), source],
- env=os.environ,
- )
- if p != 0:
- raise RuntimeError("Running cythonize failed")
-
-
-def is_source_release(path):
- return os.path.exists(os.path.join(path, "PKG-INFO"))
-
-
# Include the git version in the build (adapted from NumPy)
# Copyright (c) 2005-2020, NumPy Developers.
# BSD 3-Clause license, see licenses/3rd_party_licenses.txt
@@ -137,19 +142,19 @@ def write_git_info_py(filename="spacy/git_info.py"):
return out
git_version = "Unknown"
- if os.path.exists(".git"):
+ if Path(".git").exists():
try:
out = _minimal_ext_cmd(["git", "rev-parse", "--short", "HEAD"])
git_version = out.strip().decode("ascii")
- except:
+ except Exception:
pass
- elif os.path.exists(filename):
+ elif Path(filename).exists():
# must be a source distribution, use existing version file
try:
a = open(filename, "r")
lines = a.readlines()
git_version = lines[-1].split('"')[1]
- except:
+ except Exception:
pass
finally:
a.close()
@@ -160,90 +165,53 @@ GIT_VERSION = "%(git_version)s"
"""
a = open(filename, "w")
try:
- a.write(
- text % {"git_version": git_version,}
- )
+ a.write(text % {"git_version": git_version})
finally:
a.close()
def clean(path):
- for name in MOD_NAMES:
- name = name.replace(".", "/")
- for ext in [".so", ".html", ".cpp", ".c"]:
- file_path = os.path.join(path, name + ext)
- if os.path.exists(file_path):
- os.unlink(file_path)
-
-
-@contextlib.contextmanager
-def chdir(new_dir):
- old_dir = os.getcwd()
- try:
- os.chdir(new_dir)
- sys.path.insert(0, new_dir)
- yield
- finally:
- del sys.path[0]
- os.chdir(old_dir)
+ for path in path.glob("**/*"):
+ if path.is_file() and path.suffix in (".so", ".cpp", ".html"):
+ print(f"Deleting {path.name}")
+ path.unlink()
def setup_package():
write_git_info_py()
+ if len(sys.argv) > 1 and sys.argv[1] == "clean":
+ return clean(PACKAGE_ROOT)
- root = os.path.abspath(os.path.dirname(__file__))
+ with (PACKAGE_ROOT / "about.py").open("r") as f:
+ about = {}
+ exec(f.read(), about)
- if hasattr(sys, "argv") and len(sys.argv) > 1 and sys.argv[1] == "clean":
- return clean(root)
+ for copy_file, target_dir in COPY_FILES.items():
+ if copy_file.exists():
+ shutil.copy(str(copy_file), str(target_dir))
+ print(f"Copied {copy_file} -> {target_dir}")
- with chdir(root):
- with io.open(os.path.join(root, "spacy", "about.py"), encoding="utf8") as f:
- about = {}
- exec(f.read(), about)
+ include_dirs = [
+ get_python_inc(plat_specific=True),
+ numpy.get_include(),
+ ]
+ ext_modules = []
+ for name in MOD_NAMES:
+ mod_path = name.replace(".", "/") + ".pyx"
+ ext = Extension(name, [mod_path], language="c++")
+ ext_modules.append(ext)
+ print("Cythonizing sources")
+ ext_modules = cythonize(ext_modules, compiler_directives=COMPILER_DIRECTIVES)
- include_dirs = [
- get_python_inc(plat_specific=True),
- os.path.join(root, "include"),
- ]
-
- if (
- ccompiler.new_compiler().compiler_type == "msvc"
- and msvccompiler.get_build_version() == 9
- ):
- include_dirs.append(os.path.join(root, "include", "msvc9"))
-
- ext_modules = []
- for mod_name in MOD_NAMES:
- mod_path = mod_name.replace(".", "/") + ".cpp"
- extra_link_args = []
- # ???
- # Imported from patch from @mikepb
- # See Issue #267. Running blind here...
- if sys.platform == "darwin":
- dylib_path = [".." for _ in range(mod_name.count("."))]
- dylib_path = "/".join(dylib_path)
- dylib_path = "@loader_path/%s/spacy/platform/darwin/lib" % dylib_path
- extra_link_args.append("-Wl,-rpath,%s" % dylib_path)
- ext_modules.append(
- Extension(
- mod_name,
- [mod_path],
- language="c++",
- include_dirs=include_dirs,
- extra_link_args=extra_link_args,
- )
- )
-
- if not is_source_release(root):
- generate_cython(root, "spacy")
-
- setup(
- name="spacy",
- packages=PACKAGES,
- version=about["__version__"],
- ext_modules=ext_modules,
- cmdclass={"build_ext": build_ext_subclass},
- )
+ setup(
+ name="spacy-nightly",
+ packages=PACKAGES,
+ version=about["__version__"],
+ ext_modules=ext_modules,
+ cmdclass={"build_ext": build_ext_subclass},
+ include_dirs=include_dirs,
+ package_data={"": ["*.pyx", "*.pxd", "*.pxi", "*.cpp"]},
+ )
if __name__ == "__main__":
diff --git a/spacy/__init__.py b/spacy/__init__.py
index 6aa7b7c16..5c286ed80 100644
--- a/spacy/__init__.py
+++ b/spacy/__init__.py
@@ -1,39 +1,57 @@
-# coding: utf8
-from __future__ import unicode_literals
+from typing import Union, Iterable, Dict, Any
+from pathlib import Path
import warnings
import sys
-warnings.filterwarnings("ignore", message="numpy.dtype size changed")
-warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
+warnings.filterwarnings("ignore", message="numpy.dtype size changed") # noqa
+warnings.filterwarnings("ignore", message="numpy.ufunc size changed") # noqa
# These are imported as part of the API
-from thinc.neural.util import prefer_gpu, require_gpu
+from thinc.api import prefer_gpu, require_gpu # noqa: F401
+from thinc.api import Config
-from . import pipeline
-from .cli.info import info as cli_info
-from .glossary import explain
-from .about import __version__
-from .errors import Errors, Warnings
+from . import pipeline # noqa: F401
+from .cli.info import info # noqa: F401
+from .glossary import explain # noqa: F401
+from .about import __version__ # noqa: F401
+from .util import registry, logger # noqa: F401
+
+from .errors import Errors
+from .language import Language
from . import util
-from .util import registry
-from .language import component
if sys.maxunicode == 65535:
raise SystemError(Errors.E130)
-def load(name, **overrides):
- depr_path = overrides.get("path")
- if depr_path not in (True, False, None):
- warnings.warn(Warnings.W001.format(path=depr_path), DeprecationWarning)
- return util.load_model(name, **overrides)
+def load(
+ name: Union[str, Path],
+ disable: Iterable[str] = util.SimpleFrozenList(),
+ exclude: Iterable[str] = util.SimpleFrozenList(),
+ config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
+) -> Language:
+ """Load a spaCy model from an installed package or a local path.
+
+ name (str): Package name or model path.
+ disable (Iterable[str]): Names of pipeline components to disable. Disabled
+ pipes will be loaded but they won't be run unless you explicitly
+ enable them by calling nlp.enable_pipe.
+ exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
+ components won't be loaded.
+ config (Dict[str, Any] / Config): Config overrides as nested dict or dict
+ keyed by section values in dot notation.
+ RETURNS (Language): The loaded nlp object.
+ """
+ return util.load_model(name, disable=disable, exclude=exclude, config=config)
-def blank(name, **kwargs):
+def blank(name: str, **overrides) -> Language:
+ """Create a blank nlp object for a given language code.
+
+ name (str): The language code, e.g. "en".
+ **overrides: Keyword arguments passed to language subclass on init.
+ RETURNS (Language): The nlp object.
+ """
LangClass = util.get_lang_class(name)
- return LangClass(**kwargs)
-
-
-def info(model=None, markdown=False, silent=False):
- return cli_info(model, markdown, silent)
+ return LangClass(**overrides)
diff --git a/spacy/__main__.py b/spacy/__main__.py
index 2c285095e..f6b5066b7 100644
--- a/spacy/__main__.py
+++ b/spacy/__main__.py
@@ -1,36 +1,4 @@
-# coding: utf8
-from __future__ import print_function
-
-# NB! This breaks in plac on Python 2!!
-# from __future__ import unicode_literals
-
if __name__ == "__main__":
- import plac
- import sys
- from wasabi import msg
- from spacy.cli import download, link, info, package, train, pretrain, convert
- from spacy.cli import init_model, profile, evaluate, validate, debug_data
+ from spacy.cli import setup_cli
- commands = {
- "download": download,
- "link": link,
- "info": info,
- "train": train,
- "pretrain": pretrain,
- "debug-data": debug_data,
- "evaluate": evaluate,
- "convert": convert,
- "package": package,
- "init-model": init_model,
- "profile": profile,
- "validate": validate,
- }
- if len(sys.argv) == 1:
- msg.info("Available commands", ", ".join(commands), exits=1)
- command = sys.argv.pop(1)
- sys.argv[0] = "spacy %s" % command
- if command in commands:
- plac.call(commands[command], sys.argv[1:])
- else:
- available = "Available: {}".format(", ".join(commands))
- msg.fail("Unknown command: {}".format(command), available, exits=1)
+ setup_cli()
diff --git a/spacy/_ml.py b/spacy/_ml.py
deleted file mode 100644
index d947aab1c..000000000
--- a/spacy/_ml.py
+++ /dev/null
@@ -1,1004 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-import numpy
-import warnings
-from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu
-from thinc.t2t import ExtractWindow, ParametricAttention
-from thinc.t2v import Pooling, sum_pool, mean_pool
-from thinc.i2v import HashEmbed
-from thinc.misc import Residual, FeatureExtracter
-from thinc.misc import LayerNorm as LN
-from thinc.api import add, layerize, chain, clone, concatenate, with_flatten
-from thinc.api import with_getitem, flatten_add_lengths
-from thinc.api import uniqued, wrap, noop
-from thinc.linear.linear import LinearModel
-from thinc.neural.ops import NumpyOps, CupyOps
-from thinc.neural.util import get_array_module, copy_array, to_categorical
-from thinc.neural.optimizers import Adam
-
-from thinc import describe
-from thinc.describe import Dimension, Synapses, Biases, Gradient
-from thinc.neural._classes.affine import _set_dimensions_if_needed
-import thinc.extra.load_nlp
-
-from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE
-from .errors import Errors, Warnings
-from . import util
-from . import ml as new_ml
-from .ml import _legacy_tok2vec
-
-
-VECTORS_KEY = "spacy_pretrained_vectors"
-# Backwards compatibility with <2.2.2
-USE_MODEL_REGISTRY_TOK2VEC = False
-
-
-def cosine(vec1, vec2):
- xp = get_array_module(vec1)
- norm1 = xp.linalg.norm(vec1)
- norm2 = xp.linalg.norm(vec2)
- if norm1 == 0.0 or norm2 == 0.0:
- return 0
- else:
- return vec1.dot(vec2) / (norm1 * norm2)
-
-
-def create_default_optimizer(ops, **cfg):
- learn_rate = util.env_opt("learn_rate", 0.001)
- beta1 = util.env_opt("optimizer_B1", 0.9)
- beta2 = util.env_opt("optimizer_B2", 0.999)
- eps = util.env_opt("optimizer_eps", 1e-8)
- L2 = util.env_opt("L2_penalty", 1e-6)
- max_grad_norm = util.env_opt("grad_norm_clip", 1.0)
- optimizer = Adam(ops, learn_rate, L2=L2, beta1=beta1, beta2=beta2, eps=eps)
- optimizer.max_grad_norm = max_grad_norm
- optimizer.device = ops.device
- return optimizer
-
-
-@layerize
-def _flatten_add_lengths(seqs, pad=0, drop=0.0):
- ops = Model.ops
- lengths = ops.asarray([len(seq) for seq in seqs], dtype="i")
-
- def finish_update(d_X, sgd=None):
- return ops.unflatten(d_X, lengths, pad=pad)
-
- X = ops.flatten(seqs, pad=pad)
- return (X, lengths), finish_update
-
-
-def _zero_init(model):
- def _zero_init_impl(self, *args, **kwargs):
- self.W.fill(0)
-
- model.on_init_hooks.append(_zero_init_impl)
- if model.W is not None:
- model.W.fill(0.0)
- return model
-
-
-def with_cpu(ops, model):
- """Wrap a model that should run on CPU, transferring inputs and outputs
- as necessary."""
- model.to_cpu()
-
- def with_cpu_forward(inputs, drop=0.0):
- cpu_outputs, backprop = model.begin_update(_to_cpu(inputs), drop=drop)
- gpu_outputs = _to_device(ops, cpu_outputs)
-
- def with_cpu_backprop(d_outputs, sgd=None):
- cpu_d_outputs = _to_cpu(d_outputs)
- return backprop(cpu_d_outputs, sgd=sgd)
-
- return gpu_outputs, with_cpu_backprop
-
- return wrap(with_cpu_forward, model)
-
-
-def _to_cpu(X):
- if isinstance(X, numpy.ndarray):
- return X
- elif isinstance(X, tuple):
- return tuple([_to_cpu(x) for x in X])
- elif isinstance(X, list):
- return [_to_cpu(x) for x in X]
- elif hasattr(X, "get"):
- return X.get()
- else:
- return X
-
-
-def _to_device(ops, X):
- if isinstance(X, tuple):
- return tuple([_to_device(ops, x) for x in X])
- elif isinstance(X, list):
- return [_to_device(ops, x) for x in X]
- else:
- return ops.asarray(X)
-
-
-class extract_ngrams(Model):
- def __init__(self, ngram_size, attr=LOWER):
- Model.__init__(self)
- self.ngram_size = ngram_size
- self.attr = attr
-
- def begin_update(self, docs, drop=0.0):
- batch_keys = []
- batch_vals = []
- for doc in docs:
- unigrams = doc.to_array([self.attr])
- ngrams = [unigrams]
- for n in range(2, self.ngram_size + 1):
- ngrams.append(self.ops.ngrams(n, unigrams))
- keys = self.ops.xp.concatenate(ngrams)
- keys, vals = self.ops.xp.unique(keys, return_counts=True)
- batch_keys.append(keys)
- batch_vals.append(vals)
- # The dtype here matches what thinc is expecting -- which differs per
- # platform (by int definition). This should be fixed once the problem
- # is fixed on Thinc's side.
- lengths = self.ops.asarray(
- [arr.shape[0] for arr in batch_keys], dtype=numpy.int_
- )
- batch_keys = self.ops.xp.concatenate(batch_keys)
- batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals), dtype="f")
- return (batch_keys, batch_vals, lengths), None
-
-
-@describe.on_data(
- _set_dimensions_if_needed, lambda model, X, y: model.init_weights(model)
-)
-@describe.attributes(
- nI=Dimension("Input size"),
- nF=Dimension("Number of features"),
- nO=Dimension("Output size"),
- nP=Dimension("Maxout pieces"),
- W=Synapses("Weights matrix", lambda obj: (obj.nF, obj.nO, obj.nP, obj.nI)),
- b=Biases("Bias vector", lambda obj: (obj.nO, obj.nP)),
- pad=Synapses(
- "Pad",
- lambda obj: (1, obj.nF, obj.nO, obj.nP),
- lambda M, ops: ops.normal_init(M, 1.0),
- ),
- d_W=Gradient("W"),
- d_pad=Gradient("pad"),
- d_b=Gradient("b"),
-)
-class PrecomputableAffine(Model):
- def __init__(self, nO=None, nI=None, nF=None, nP=None, **kwargs):
- Model.__init__(self, **kwargs)
- self.nO = nO
- self.nP = nP
- self.nI = nI
- self.nF = nF
-
- def begin_update(self, X, drop=0.0):
- Yf = self.ops.gemm(
- X, self.W.reshape((self.nF * self.nO * self.nP, self.nI)), trans2=True
- )
- Yf = Yf.reshape((Yf.shape[0], self.nF, self.nO, self.nP))
- Yf = self._add_padding(Yf)
-
- def backward(dY_ids, sgd=None):
- dY, ids = dY_ids
- dY, ids = self._backprop_padding(dY, ids)
- Xf = X[ids]
- Xf = Xf.reshape((Xf.shape[0], self.nF * self.nI))
-
- self.d_b += dY.sum(axis=0)
- dY = dY.reshape((dY.shape[0], self.nO * self.nP))
-
- Wopfi = self.W.transpose((1, 2, 0, 3))
- Wopfi = self.ops.xp.ascontiguousarray(Wopfi)
- Wopfi = Wopfi.reshape((self.nO * self.nP, self.nF * self.nI))
- dXf = self.ops.gemm(dY.reshape((dY.shape[0], self.nO * self.nP)), Wopfi)
-
- # Reuse the buffer
- dWopfi = Wopfi
- dWopfi.fill(0.0)
- self.ops.gemm(dY, Xf, out=dWopfi, trans1=True)
- dWopfi = dWopfi.reshape((self.nO, self.nP, self.nF, self.nI))
- # (o, p, f, i) --> (f, o, p, i)
- self.d_W += dWopfi.transpose((2, 0, 1, 3))
-
- if sgd is not None:
- sgd(self._mem.weights, self._mem.gradient, key=self.id)
- return dXf.reshape((dXf.shape[0], self.nF, self.nI))
-
- return Yf, backward
-
- def _add_padding(self, Yf):
- Yf_padded = self.ops.xp.vstack((self.pad, Yf))
- return Yf_padded
-
- def _backprop_padding(self, dY, ids):
- # (1, nF, nO, nP) += (nN, nF, nO, nP) where IDs (nN, nF) < 0
- mask = ids < 0.0
- mask = mask.sum(axis=1)
- d_pad = dY * mask.reshape((ids.shape[0], 1, 1))
- self.d_pad += d_pad.sum(axis=0)
- return dY, ids
-
- @staticmethod
- def init_weights(model):
- """This is like the 'layer sequential unit variance', but instead
- of taking the actual inputs, we randomly generate whitened data.
-
- Why's this all so complicated? We have a huge number of inputs,
- and the maxout unit makes guessing the dynamics tricky. Instead
- we set the maxout weights to values that empirically result in
- whitened outputs given whitened inputs.
- """
- if (model.W ** 2).sum() != 0.0:
- return
- ops = model.ops
- xp = ops.xp
- ops.normal_init(model.W, model.nF * model.nI, inplace=True)
-
- ids = ops.allocate((5000, model.nF), dtype="f")
- ids += xp.random.uniform(0, 1000, ids.shape)
- ids = ops.asarray(ids, dtype="i")
- tokvecs = ops.allocate((5000, model.nI), dtype="f")
- tokvecs += xp.random.normal(loc=0.0, scale=1.0, size=tokvecs.size).reshape(
- tokvecs.shape
- )
-
- def predict(ids, tokvecs):
- # nS ids. nW tokvecs. Exclude the padding array.
- hiddens = model(tokvecs[:-1]) # (nW, f, o, p)
- vectors = model.ops.allocate((ids.shape[0], model.nO * model.nP), dtype="f")
- # need nS vectors
- hiddens = hiddens.reshape(
- (hiddens.shape[0] * model.nF, model.nO * model.nP)
- )
- model.ops.scatter_add(vectors, ids.flatten(), hiddens)
- vectors = vectors.reshape((vectors.shape[0], model.nO, model.nP))
- vectors += model.b
- vectors = model.ops.asarray(vectors)
- if model.nP >= 2:
- return model.ops.maxout(vectors)[0]
- else:
- return vectors * (vectors >= 0)
-
- tol_var = 0.01
- tol_mean = 0.01
- t_max = 10
- t_i = 0
- for t_i in range(t_max):
- acts1 = predict(ids, tokvecs)
- var = model.ops.xp.var(acts1)
- mean = model.ops.xp.mean(acts1)
- if abs(var - 1.0) >= tol_var:
- model.W /= model.ops.xp.sqrt(var)
- elif abs(mean) >= tol_mean:
- model.b -= mean
- else:
- break
-
-
-def link_vectors_to_models(vocab, skip_rank=False):
- vectors = vocab.vectors
- if vectors.name is None:
- vectors.name = VECTORS_KEY
- if vectors.data.size != 0:
- warnings.warn(Warnings.W020.format(shape=vectors.data.shape))
- ops = Model.ops
- if not skip_rank:
- for word in vocab:
- if word.orth in vectors.key2row:
- word.rank = vectors.key2row[word.orth]
- else:
- word.rank = util.OOV_RANK
- data = ops.asarray(vectors.data)
- # Set an entry here, so that vectors are accessed by StaticVectors
- # (unideal, I know)
- key = (ops.device, vectors.name)
- if key in thinc.extra.load_nlp.VECTORS:
- if thinc.extra.load_nlp.VECTORS[key].shape != data.shape:
- # This is a hack to avoid the problem in #3853.
- old_name = vectors.name
- new_name = vectors.name + "_%d" % data.shape[0]
- warnings.warn(Warnings.W019.format(old=old_name, new=new_name))
- vectors.name = new_name
- key = (ops.device, vectors.name)
- thinc.extra.load_nlp.VECTORS[key] = data
-
-
-def PyTorchBiLSTM(nO, nI, depth, dropout=0.2):
- import torch.nn
- from thinc.api import with_square_sequences
- from thinc.extra.wrappers import PyTorchWrapperRNN
-
- if depth == 0:
- return layerize(noop())
- model = torch.nn.LSTM(nI, nO // 2, depth, bidirectional=True, dropout=dropout)
- return with_square_sequences(PyTorchWrapperRNN(model))
-
-
-def Tok2Vec(width, embed_size, **kwargs):
- if not USE_MODEL_REGISTRY_TOK2VEC:
- # Preserve prior tok2vec for backwards compat, in v2.2.2
- return _legacy_tok2vec.Tok2Vec(width, embed_size, **kwargs)
- pretrained_vectors = kwargs.get("pretrained_vectors", None)
- cnn_maxout_pieces = kwargs.get("cnn_maxout_pieces", 3)
- subword_features = kwargs.get("subword_features", True)
- char_embed = kwargs.get("char_embed", False)
- conv_depth = kwargs.get("conv_depth", 4)
- bilstm_depth = kwargs.get("bilstm_depth", 0)
- conv_window = kwargs.get("conv_window", 1)
-
- cols = ["ID", "NORM", "PREFIX", "SUFFIX", "SHAPE", "ORTH"]
-
- doc2feats_cfg = {"arch": "spacy.Doc2Feats.v1", "config": {"columns": cols}}
- if char_embed:
- embed_cfg = {
- "arch": "spacy.CharacterEmbed.v1",
- "config": {
- "width": 64,
- "chars": 6,
- "@mix": {
- "arch": "spacy.LayerNormalizedMaxout.v1",
- "config": {"width": width, "pieces": 3},
- },
- "@embed_features": None,
- },
- }
- else:
- embed_cfg = {
- "arch": "spacy.MultiHashEmbed.v1",
- "config": {
- "width": width,
- "rows": embed_size,
- "columns": cols,
- "use_subwords": subword_features,
- "@pretrained_vectors": None,
- "@mix": {
- "arch": "spacy.LayerNormalizedMaxout.v1",
- "config": {"width": width, "pieces": 3},
- },
- },
- }
- if pretrained_vectors:
- embed_cfg["config"]["@pretrained_vectors"] = {
- "arch": "spacy.PretrainedVectors.v1",
- "config": {
- "vectors_name": pretrained_vectors,
- "width": width,
- "column": cols.index("ID"),
- },
- }
- if cnn_maxout_pieces >= 2:
- cnn_cfg = {
- "arch": "spacy.MaxoutWindowEncoder.v1",
- "config": {
- "width": width,
- "window_size": conv_window,
- "pieces": cnn_maxout_pieces,
- "depth": conv_depth,
- },
- }
- else:
- cnn_cfg = {
- "arch": "spacy.MishWindowEncoder.v1",
- "config": {"width": width, "window_size": conv_window, "depth": conv_depth},
- }
- bilstm_cfg = {
- "arch": "spacy.TorchBiLSTMEncoder.v1",
- "config": {"width": width, "depth": bilstm_depth},
- }
- if conv_depth == 0 and bilstm_depth == 0:
- encode_cfg = {}
- elif conv_depth >= 1 and bilstm_depth >= 1:
- encode_cfg = {
- "arch": "thinc.FeedForward.v1",
- "config": {"children": [cnn_cfg, bilstm_cfg]},
- }
- elif conv_depth >= 1:
- encode_cfg = cnn_cfg
- else:
- encode_cfg = bilstm_cfg
- config = {"@doc2feats": doc2feats_cfg, "@embed": embed_cfg, "@encode": encode_cfg}
- return new_ml.Tok2Vec(config)
-
-
-def reapply(layer, n_times):
- def reapply_fwd(X, drop=0.0):
- backprops = []
- for i in range(n_times):
- Y, backprop = layer.begin_update(X, drop=drop)
- X = Y
- backprops.append(backprop)
-
- def reapply_bwd(dY, sgd=None):
- dX = None
- for backprop in reversed(backprops):
- dY = backprop(dY, sgd=sgd)
- if dX is None:
- dX = dY
- else:
- dX += dY
- return dX
-
- return Y, reapply_bwd
-
- return wrap(reapply_fwd, layer)
-
-
-def asarray(ops, dtype):
- def forward(X, drop=0.0):
- return ops.asarray(X, dtype=dtype), None
-
- return layerize(forward)
-
-
-def _divide_array(X, size):
- parts = []
- index = 0
- while index < len(X):
- parts.append(X[index : index + size])
- index += size
- return parts
-
-
-def get_col(idx):
- if idx < 0:
- raise IndexError(Errors.E066.format(value=idx))
-
- def forward(X, drop=0.0):
- if isinstance(X, numpy.ndarray):
- ops = NumpyOps()
- else:
- ops = CupyOps()
- output = ops.xp.ascontiguousarray(X[:, idx], dtype=X.dtype)
-
- def backward(y, sgd=None):
- dX = ops.allocate(X.shape)
- dX[:, idx] += y
- return dX
-
- return output, backward
-
- return layerize(forward)
-
-
-def doc2feats(cols=None):
- if cols is None:
- cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
-
- def forward(docs, drop=0.0):
- feats = []
- for doc in docs:
- feats.append(doc.to_array(cols))
- return feats, None
-
- model = layerize(forward)
- model.cols = cols
- return model
-
-
-def print_shape(prefix):
- def forward(X, drop=0.0):
- return X, lambda dX, **kwargs: dX
-
- return layerize(forward)
-
-
-@layerize
-def get_token_vectors(tokens_attrs_vectors, drop=0.0):
- tokens, attrs, vectors = tokens_attrs_vectors
-
- def backward(d_output, sgd=None):
- return (tokens, d_output)
-
- return vectors, backward
-
-
-@layerize
-def logistic(X, drop=0.0):
- xp = get_array_module(X)
- if not isinstance(X, xp.ndarray):
- X = xp.asarray(X)
- # Clip to range (-10, 10)
- X = xp.minimum(X, 10.0, X)
- X = xp.maximum(X, -10.0, X)
- Y = 1.0 / (1.0 + xp.exp(-X))
-
- def logistic_bwd(dY, sgd=None):
- dX = dY * (Y * (1 - Y))
- return dX
-
- return Y, logistic_bwd
-
-
-def zero_init(model):
- def _zero_init_impl(self, X, y):
- self.W.fill(0)
-
- model.on_data_hooks.append(_zero_init_impl)
- return model
-
-
-def getitem(i):
- def getitem_fwd(X, drop=0.0):
- return X[i], None
-
- return layerize(getitem_fwd)
-
-
-@describe.attributes(
- W=Synapses("Weights matrix", lambda obj: (obj.nO, obj.nI), lambda W, ops: None)
-)
-class MultiSoftmax(Affine):
- """Neural network layer that predicts several multi-class attributes at once.
- For instance, we might predict one class with 6 variables, and another with 5.
- We predict the 11 neurons required for this, and then softmax them such
- that columns 0-6 make a probability distribution and coumns 6-11 make another.
- """
-
- name = "multisoftmax"
-
- def __init__(self, out_sizes, nI=None, **kwargs):
- Model.__init__(self, **kwargs)
- self.out_sizes = out_sizes
- self.nO = sum(out_sizes)
- self.nI = nI
-
- def predict(self, input__BI):
- output__BO = self.ops.affine(self.W, self.b, input__BI)
- i = 0
- for out_size in self.out_sizes:
- self.ops.softmax(output__BO[:, i : i + out_size], inplace=True)
- i += out_size
- return output__BO
-
- def begin_update(self, input__BI, drop=0.0):
- output__BO = self.predict(input__BI)
-
- def finish_update(grad__BO, sgd=None):
- self.d_W += self.ops.gemm(grad__BO, input__BI, trans1=True)
- self.d_b += grad__BO.sum(axis=0)
- grad__BI = self.ops.gemm(grad__BO, self.W)
- if sgd is not None:
- sgd(self._mem.weights, self._mem.gradient, key=self.id)
- return grad__BI
-
- return output__BO, finish_update
-
-
-def build_tagger_model(nr_class, **cfg):
- embed_size = util.env_opt("embed_size", 2000)
- if "token_vector_width" in cfg:
- token_vector_width = cfg["token_vector_width"]
- else:
- token_vector_width = util.env_opt("token_vector_width", 96)
- pretrained_vectors = cfg.get("pretrained_vectors")
- subword_features = cfg.get("subword_features", True)
- with Model.define_operators({">>": chain, "+": add}):
- if "tok2vec" in cfg:
- tok2vec = cfg["tok2vec"]
- else:
- tok2vec = Tok2Vec(
- token_vector_width,
- embed_size,
- subword_features=subword_features,
- pretrained_vectors=pretrained_vectors,
- )
- softmax = with_flatten(Softmax(nr_class, token_vector_width))
- model = tok2vec >> softmax
- model.nI = None
- model.tok2vec = tok2vec
- model.softmax = softmax
- return model
-
-
-def build_morphologizer_model(class_nums, **cfg):
- embed_size = util.env_opt("embed_size", 7000)
- if "token_vector_width" in cfg:
- token_vector_width = cfg["token_vector_width"]
- else:
- token_vector_width = util.env_opt("token_vector_width", 128)
- pretrained_vectors = cfg.get("pretrained_vectors")
- char_embed = cfg.get("char_embed", True)
- with Model.define_operators({">>": chain, "+": add, "**": clone}):
- if "tok2vec" in cfg:
- tok2vec = cfg["tok2vec"]
- else:
- tok2vec = Tok2Vec(
- token_vector_width,
- embed_size,
- char_embed=char_embed,
- pretrained_vectors=pretrained_vectors,
- )
- softmax = with_flatten(MultiSoftmax(class_nums, token_vector_width))
- softmax.out_sizes = class_nums
- model = tok2vec >> softmax
- model.nI = None
- model.tok2vec = tok2vec
- model.softmax = softmax
- return model
-
-
-@layerize
-def SpacyVectors(docs, drop=0.0):
- batch = []
- for doc in docs:
- indices = numpy.zeros((len(doc),), dtype="i")
- for i, word in enumerate(doc):
- if word.orth in doc.vocab.vectors.key2row:
- indices[i] = doc.vocab.vectors.key2row[word.orth]
- else:
- indices[i] = 0
- vectors = doc.vocab.vectors.data[indices]
- batch.append(vectors)
- return batch, None
-
-
-def build_text_classifier(nr_class, width=64, **cfg):
- depth = cfg.get("depth", 2)
- nr_vector = cfg.get("nr_vector", 5000)
- pretrained_dims = cfg.get("pretrained_dims", 0)
- with Model.define_operators({">>": chain, "+": add, "|": concatenate, "**": clone}):
- if cfg.get("low_data") and pretrained_dims:
- model = (
- SpacyVectors
- >> flatten_add_lengths
- >> with_getitem(0, Affine(width, pretrained_dims))
- >> ParametricAttention(width)
- >> Pooling(sum_pool)
- >> Residual(ReLu(width, width)) ** 2
- >> zero_init(Affine(nr_class, width, drop_factor=0.0))
- >> logistic
- )
- return model
-
- lower = HashEmbed(width, nr_vector, column=1)
- prefix = HashEmbed(width // 2, nr_vector, column=2)
- suffix = HashEmbed(width // 2, nr_vector, column=3)
- shape = HashEmbed(width // 2, nr_vector, column=4)
-
- trained_vectors = FeatureExtracter(
- [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]
- ) >> with_flatten(
- uniqued(
- (lower | prefix | suffix | shape)
- >> LN(Maxout(width, width + (width // 2) * 3)),
- column=0,
- )
- )
-
- if pretrained_dims:
- static_vectors = SpacyVectors >> with_flatten(
- Affine(width, pretrained_dims)
- )
- # TODO Make concatenate support lists
- vectors = concatenate_lists(trained_vectors, static_vectors)
- vectors_width = width * 2
- else:
- vectors = trained_vectors
- vectors_width = width
- static_vectors = None
- tok2vec = vectors >> with_flatten(
- LN(Maxout(width, vectors_width))
- >> Residual((ExtractWindow(nW=1) >> LN(Maxout(width, width * 3)))) ** depth,
- pad=depth,
- )
- cnn_model = (
- tok2vec
- >> flatten_add_lengths
- >> ParametricAttention(width)
- >> Pooling(sum_pool)
- >> Residual(zero_init(Maxout(width, width)))
- >> zero_init(Affine(nr_class, width, drop_factor=0.0))
- )
-
- linear_model = build_bow_text_classifier(
- nr_class,
- ngram_size=cfg.get("ngram_size", 1),
- exclusive_classes=cfg.get("exclusive_classes", False),
- )
- if cfg.get("exclusive_classes", False):
- output_layer = Softmax(nr_class, nr_class * 2)
- else:
- output_layer = (
- zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic
- )
- model = (linear_model | cnn_model) >> output_layer
- model.tok2vec = chain(tok2vec, flatten)
- model.nO = nr_class
- model.lsuv = False
- return model
-
-
-def build_bow_text_classifier(
- nr_class, ngram_size=1, exclusive_classes=False, no_output_layer=False, **cfg
-):
- with Model.define_operators({">>": chain}):
- model = with_cpu(
- Model.ops, extract_ngrams(ngram_size, attr=ORTH) >> LinearModel(nr_class)
- )
- if not no_output_layer:
- model = model >> (cpu_softmax if exclusive_classes else logistic)
- model.nO = nr_class
- return model
-
-
-@layerize
-def cpu_softmax(X, drop=0.0):
- ops = NumpyOps()
-
- def cpu_softmax_backward(dY, sgd=None):
- return dY
-
- return ops.softmax(X), cpu_softmax_backward
-
-
-def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes=False, **cfg):
- """
- Build a simple CNN text classifier, given a token-to-vector model as inputs.
- If exclusive_classes=True, a softmax non-linearity is applied, so that the
- outputs sum to 1. If exclusive_classes=False, a logistic non-linearity
- is applied instead, so that outputs are in the range [0, 1].
- """
- with Model.define_operators({">>": chain}):
- if exclusive_classes:
- output_layer = Softmax(nr_class, tok2vec.nO)
- else:
- output_layer = (
- zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
- )
- model = tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> output_layer
- model.tok2vec = chain(tok2vec, flatten)
- model.nO = nr_class
- return model
-
-
-def build_nel_encoder(embed_width, hidden_width, ner_types, **cfg):
- if "entity_width" not in cfg:
- raise ValueError(Errors.E144.format(param="entity_width"))
-
- conv_depth = cfg.get("conv_depth", 2)
- cnn_maxout_pieces = cfg.get("cnn_maxout_pieces", 3)
- pretrained_vectors = cfg.get("pretrained_vectors", None)
- context_width = cfg.get("entity_width")
-
- with Model.define_operators({">>": chain, "**": clone}):
- # context encoder
- tok2vec = Tok2Vec(
- width=hidden_width,
- embed_size=embed_width,
- pretrained_vectors=pretrained_vectors,
- cnn_maxout_pieces=cnn_maxout_pieces,
- subword_features=True,
- conv_depth=conv_depth,
- bilstm_depth=0,
- )
-
- model = (
- tok2vec
- >> flatten_add_lengths
- >> Pooling(mean_pool)
- >> Residual(zero_init(Maxout(hidden_width, hidden_width)))
- >> zero_init(Affine(context_width, hidden_width, drop_factor=0.0))
- )
-
- model.tok2vec = tok2vec
- model.nO = context_width
- return model
-
-
-@layerize
-def flatten(seqs, drop=0.0):
- ops = Model.ops
- lengths = ops.asarray([len(seq) for seq in seqs], dtype="i")
-
- def finish_update(d_X, sgd=None):
- return ops.unflatten(d_X, lengths, pad=0)
-
- X = ops.flatten(seqs, pad=0)
- return X, finish_update
-
-
-def concatenate_lists(*layers, **kwargs): # pragma: no cover
- """Compose two or more models `f`, `g`, etc, such that their outputs are
- concatenated, i.e. `concatenate(f, g)(x)` computes `hstack(f(x), g(x))`
- """
- if not layers:
- return noop()
- drop_factor = kwargs.get("drop_factor", 1.0)
- ops = layers[0].ops
- layers = [chain(layer, flatten) for layer in layers]
- concat = concatenate(*layers)
-
- def concatenate_lists_fwd(Xs, drop=0.0):
- if drop is not None:
- drop *= drop_factor
- lengths = ops.asarray([len(X) for X in Xs], dtype="i")
- flat_y, bp_flat_y = concat.begin_update(Xs, drop=drop)
- ys = ops.unflatten(flat_y, lengths)
-
- def concatenate_lists_bwd(d_ys, sgd=None):
- return bp_flat_y(ops.flatten(d_ys), sgd=sgd)
-
- return ys, concatenate_lists_bwd
-
- model = wrap(concatenate_lists_fwd, concat)
- return model
-
-
-def masked_language_model(vocab, model, mask_prob=0.15):
- """Convert a model into a BERT-style masked language model"""
-
- random_words = _RandomWords(vocab)
-
- def mlm_forward(docs, drop=0.0):
- mask, docs = _apply_mask(docs, random_words, mask_prob=mask_prob)
- mask = model.ops.asarray(mask).reshape((mask.shape[0], 1))
- output, backprop = model.begin_update(docs, drop=drop)
-
- def mlm_backward(d_output, sgd=None):
- d_output *= 1 - mask
- # Rescale gradient for number of instances.
- d_output *= mask.size - mask.sum()
- return backprop(d_output, sgd=sgd)
-
- return output, mlm_backward
-
- return wrap(mlm_forward, model)
-
-
-class _RandomWords(object):
- def __init__(self, vocab):
- self.words = [lex.text for lex in vocab if lex.prob != 0.0]
- self.probs = [lex.prob for lex in vocab if lex.prob != 0.0]
- self.words = self.words[:10000]
- self.probs = self.probs[:10000]
- self.probs = numpy.exp(numpy.array(self.probs, dtype="f"))
- self.probs /= self.probs.sum()
- self._cache = []
-
- def next(self):
- if not self._cache:
- self._cache.extend(
- numpy.random.choice(len(self.words), 10000, p=self.probs)
- )
- index = self._cache.pop()
- return self.words[index]
-
-
-def _apply_mask(docs, random_words, mask_prob=0.15):
- # This needs to be here to avoid circular imports
- from .tokens.doc import Doc
-
- N = sum(len(doc) for doc in docs)
- mask = numpy.random.uniform(0.0, 1.0, (N,))
- mask = mask >= mask_prob
- i = 0
- masked_docs = []
- for doc in docs:
- words = []
- for token in doc:
- if not mask[i]:
- word = _replace_word(token.text, random_words)
- else:
- word = token.text
- words.append(word)
- i += 1
- spaces = [bool(w.whitespace_) for w in doc]
- # NB: If you change this implementation to instead modify
- # the docs in place, take care that the IDs reflect the original
- # words. Currently we use the original docs to make the vectors
- # for the target, so we don't lose the original tokens. But if
- # you modified the docs in place here, you would.
- masked_docs.append(Doc(doc.vocab, words=words, spaces=spaces))
- return mask, masked_docs
-
-
-def _replace_word(word, random_words, mask="[MASK]"):
- roll = numpy.random.random()
- if roll < 0.8:
- return mask
- elif roll < 0.9:
- return random_words.next()
- else:
- return word
-
-
-def _uniform_init(lo, hi):
- def wrapped(W, ops):
- copy_array(W, ops.xp.random.uniform(lo, hi, W.shape))
-
- return wrapped
-
-
-@describe.attributes(
- nM=Dimension("Vector dimensions"),
- nC=Dimension("Number of characters per word"),
- vectors=Synapses(
- "Embed matrix", lambda obj: (obj.nC, obj.nV, obj.nM), _uniform_init(-0.1, 0.1)
- ),
- d_vectors=Gradient("vectors"),
-)
-class CharacterEmbed(Model):
- def __init__(self, nM=None, nC=None, **kwargs):
- Model.__init__(self, **kwargs)
- self.nM = nM
- self.nC = nC
-
- @property
- def nO(self):
- return self.nM * self.nC
-
- @property
- def nV(self):
- return 256
-
- def begin_update(self, docs, drop=0.0):
- if not docs:
- return []
- ids = []
- output = []
- weights = self.vectors
- # This assists in indexing; it's like looping over this dimension.
- # Still consider this weird witch craft...But thanks to Mark Neumann
- # for the tip.
- nCv = self.ops.xp.arange(self.nC)
- for doc in docs:
- doc_ids = self.ops.asarray(doc.to_utf8_array(nr_char=self.nC))
- doc_vectors = self.ops.allocate((len(doc), self.nC, self.nM))
- # Let's say I have a 2d array of indices, and a 3d table of data. What numpy
- # incantation do I chant to get
- # output[i, j, k] == data[j, ids[i, j], k]?
- doc_vectors[:, nCv] = weights[nCv, doc_ids[:, nCv]]
- output.append(doc_vectors.reshape((len(doc), self.nO)))
- ids.append(doc_ids)
-
- def backprop_character_embed(d_vectors, sgd=None):
- gradient = self.d_vectors
- for doc_ids, d_doc_vectors in zip(ids, d_vectors):
- d_doc_vectors = d_doc_vectors.reshape((len(doc_ids), self.nC, self.nM))
- gradient[nCv, doc_ids[:, nCv]] += d_doc_vectors[:, nCv]
- if sgd is not None:
- sgd(self._mem.weights, self._mem.gradient, key=self.id)
- return None
-
- return output, backprop_character_embed
-
-
-def get_cossim_loss(yh, y, ignore_zeros=False):
- xp = get_array_module(yh)
- # Find the zero vectors
- if ignore_zeros:
- zero_indices = xp.abs(y).sum(axis=1) == 0
- # Add a small constant to avoid 0 vectors
- yh = yh + 1e-8
- y = y + 1e-8
- # https://math.stackexchange.com/questions/1923613/partial-derivative-of-cosine-similarity
- norm_yh = xp.linalg.norm(yh, axis=1, keepdims=True)
- norm_y = xp.linalg.norm(y, axis=1, keepdims=True)
- mul_norms = norm_yh * norm_y
- cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms
- d_yh = (y / mul_norms) - (cosine * (yh / norm_yh ** 2))
- losses = xp.abs(cosine - 1)
- if ignore_zeros:
- # If the target was a zero vector, don't count it in the loss.
- d_yh[zero_indices] = 0
- losses[zero_indices] = 0
- loss = losses.sum()
- return loss, -d_yh
-
-
-def get_characters_loss(ops, docs, prediction, nr_char=10):
- target_ids = numpy.vstack([doc.to_utf8_array(nr_char=nr_char) for doc in docs])
- target_ids = target_ids.reshape((-1,))
- target = ops.asarray(to_categorical(target_ids, nb_classes=256), dtype="f")
- target = target.reshape((-1, 256*nr_char))
- diff = prediction - target
- loss = (diff**2).sum()
- d_target = diff / float(prediction.shape[0])
- return loss, d_target
-
-
-
diff --git a/spacy/about.py b/spacy/about.py
index 42c38cda5..3fe720dbc 100644
--- a/spacy/about.py
+++ b/spacy/about.py
@@ -1,7 +1,7 @@
# fmt: off
-__title__ = "spacy"
-__version__ = "2.3.2"
+__title__ = "spacy-nightly"
+__version__ = "3.0.0a13"
__release__ = True
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
-__shortcuts__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/shortcuts-v2.json"
+__projects__ = "https://github.com/explosion/spacy-boilerplates"
diff --git a/spacy/analysis.py b/spacy/analysis.py
deleted file mode 100644
index 960ce6c0f..000000000
--- a/spacy/analysis.py
+++ /dev/null
@@ -1,181 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-import warnings
-
-from collections import OrderedDict
-from wasabi import Printer
-
-from .tokens import Doc, Token, Span
-from .errors import Errors, Warnings
-
-
-def analyze_pipes(pipeline, name, pipe, index, warn=True):
- """Analyze a pipeline component with respect to its position in the current
- pipeline and the other components. Will check whether requirements are
- fulfilled (e.g. if previous components assign the attributes).
-
- pipeline (list): A list of (name, pipe) tuples e.g. nlp.pipeline.
- name (unicode): The name of the pipeline component to analyze.
- pipe (callable): The pipeline component function to analyze.
- index (int): The index of the component in the pipeline.
- warn (bool): Show user warning if problem is found.
- RETURNS (list): The problems found for the given pipeline component.
- """
- assert pipeline[index][0] == name
- prev_pipes = pipeline[:index]
- pipe_requires = getattr(pipe, "requires", [])
- requires = OrderedDict([(annot, False) for annot in pipe_requires])
- if requires:
- for prev_name, prev_pipe in prev_pipes:
- prev_assigns = getattr(prev_pipe, "assigns", [])
- for annot in prev_assigns:
- requires[annot] = True
- problems = []
- for annot, fulfilled in requires.items():
- if not fulfilled:
- problems.append(annot)
- if warn:
- warnings.warn(Warnings.W025.format(name=name, attr=annot))
- return problems
-
-
-def analyze_all_pipes(pipeline, warn=True):
- """Analyze all pipes in the pipeline in order.
-
- pipeline (list): A list of (name, pipe) tuples e.g. nlp.pipeline.
- warn (bool): Show user warning if problem is found.
- RETURNS (dict): The problems found, keyed by component name.
- """
- problems = {}
- for i, (name, pipe) in enumerate(pipeline):
- problems[name] = analyze_pipes(pipeline, name, pipe, i, warn=warn)
- return problems
-
-
-def dot_to_dict(values):
- """Convert dot notation to a dict. For example: ["token.pos", "token._.xyz"]
- become {"token": {"pos": True, "_": {"xyz": True }}}.
-
- values (iterable): The values to convert.
- RETURNS (dict): The converted values.
- """
- result = {}
- for value in values:
- path = result
- parts = value.lower().split(".")
- for i, item in enumerate(parts):
- is_last = i == len(parts) - 1
- path = path.setdefault(item, True if is_last else {})
- return result
-
-
-def validate_attrs(values):
- """Validate component attributes provided to "assigns", "requires" etc.
- Raises error for invalid attributes and formatting. Doesn't check if
- custom extension attributes are registered, since this is something the
- user might want to do themselves later in the component.
-
- values (iterable): The string attributes to check, e.g. `["token.pos"]`.
- RETURNS (iterable): The checked attributes.
- """
- data = dot_to_dict(values)
- objs = {"doc": Doc, "token": Token, "span": Span}
- for obj_key, attrs in data.items():
- if obj_key == "span":
- # Support Span only for custom extension attributes
- span_attrs = [attr for attr in values if attr.startswith("span.")]
- span_attrs = [attr for attr in span_attrs if not attr.startswith("span._.")]
- if span_attrs:
- raise ValueError(Errors.E180.format(attrs=", ".join(span_attrs)))
- if obj_key not in objs: # first element is not doc/token/span
- invalid_attrs = ", ".join(a for a in values if a.startswith(obj_key))
- raise ValueError(Errors.E181.format(obj=obj_key, attrs=invalid_attrs))
- if not isinstance(attrs, dict): # attr is something like "doc"
- raise ValueError(Errors.E182.format(attr=obj_key))
- for attr, value in attrs.items():
- if attr == "_":
- if value is True: # attr is something like "doc._"
- raise ValueError(Errors.E182.format(attr="{}._".format(obj_key)))
- for ext_attr, ext_value in value.items():
- # We don't check whether the attribute actually exists
- if ext_value is not True: # attr is something like doc._.x.y
- good = "{}._.{}".format(obj_key, ext_attr)
- bad = "{}.{}".format(good, ".".join(ext_value))
- raise ValueError(Errors.E183.format(attr=bad, solution=good))
- continue # we can't validate those further
- if attr.endswith("_"): # attr is something like "token.pos_"
- raise ValueError(Errors.E184.format(attr=attr, solution=attr[:-1]))
- if value is not True: # attr is something like doc.x.y
- good = "{}.{}".format(obj_key, attr)
- bad = "{}.{}".format(good, ".".join(value))
- raise ValueError(Errors.E183.format(attr=bad, solution=good))
- obj = objs[obj_key]
- if not hasattr(obj, attr):
- raise ValueError(Errors.E185.format(obj=obj_key, attr=attr))
- return values
-
-
-def _get_feature_for_attr(pipeline, attr, feature):
- assert feature in ["assigns", "requires"]
- result = []
- for pipe_name, pipe in pipeline:
- pipe_assigns = getattr(pipe, feature, [])
- if attr in pipe_assigns:
- result.append((pipe_name, pipe))
- return result
-
-
-def get_assigns_for_attr(pipeline, attr):
- """Get all pipeline components that assign an attr, e.g. "doc.tensor".
-
- pipeline (list): A list of (name, pipe) tuples e.g. nlp.pipeline.
- attr (unicode): The attribute to check.
- RETURNS (list): (name, pipeline) tuples of components that assign the attr.
- """
- return _get_feature_for_attr(pipeline, attr, "assigns")
-
-
-def get_requires_for_attr(pipeline, attr):
- """Get all pipeline components that require an attr, e.g. "doc.tensor".
-
- pipeline (list): A list of (name, pipe) tuples e.g. nlp.pipeline.
- attr (unicode): The attribute to check.
- RETURNS (list): (name, pipeline) tuples of components that require the attr.
- """
- return _get_feature_for_attr(pipeline, attr, "requires")
-
-
-def print_summary(nlp, pretty=True, no_print=False):
- """Print a formatted summary for the current nlp object's pipeline. Shows
- a table with the pipeline components and why they assign and require, as
- well as any problems if available.
-
- nlp (Language): The nlp object.
- pretty (bool): Pretty-print the results (color etc).
- no_print (bool): Don't print anything, just return the data.
- RETURNS (dict): A dict with "overview" and "problems".
- """
- msg = Printer(pretty=pretty, no_print=no_print)
- overview = []
- problems = {}
- for i, (name, pipe) in enumerate(nlp.pipeline):
- requires = getattr(pipe, "requires", [])
- assigns = getattr(pipe, "assigns", [])
- retok = getattr(pipe, "retokenizes", False)
- overview.append((i, name, requires, assigns, retok))
- problems[name] = analyze_pipes(nlp.pipeline, name, pipe, i, warn=False)
- msg.divider("Pipeline Overview")
- header = ("#", "Component", "Requires", "Assigns", "Retokenizes")
- msg.table(overview, header=header, divider=True, multiline=True)
- n_problems = sum(len(p) for p in problems.values())
- if any(p for p in problems.values()):
- msg.divider("Problems ({})".format(n_problems))
- for name, problem in problems.items():
- if problem:
- problem = ", ".join(problem)
- msg.warn("'{}' requirements not met: {}".format(name, problem))
- else:
- msg.good("No problems found.")
- if no_print:
- return {"overview": overview, "problems": problems}
diff --git a/spacy/attrs.pxd b/spacy/attrs.pxd
index 805dc2950..33d5372de 100644
--- a/spacy/attrs.pxd
+++ b/spacy/attrs.pxd
@@ -91,6 +91,7 @@ cdef enum attr_id_t:
LANG
ENT_KB_ID = symbols.ENT_KB_ID
+ MORPH
ENT_ID = symbols.ENT_ID
IDX
diff --git a/spacy/attrs.pyx b/spacy/attrs.pyx
index fe9895d06..b15db7599 100644
--- a/spacy/attrs.pyx
+++ b/spacy/attrs.pyx
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
IDS = {
"": NULL_ATTR,
@@ -92,6 +89,7 @@ IDS = {
"SPACY": SPACY,
"PROB": PROB,
"LANG": LANG,
+ "MORPH": MORPH,
"IDX": IDX
}
diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py
index 778453711..92cb76971 100644
--- a/spacy/cli/__init__.py
+++ b/spacy/cli/__init__.py
@@ -1,12 +1,37 @@
+from wasabi import msg
+
+from ._util import app, setup_cli # noqa: F401
+
+# These are the actual functions, NOT the wrapped CLI commands. The CLI commands
+# are registered automatically and won't have to be imported here.
from .download import download # noqa: F401
from .info import info # noqa: F401
-from .link import link # noqa: F401
from .package import package # noqa: F401
from .profile import profile # noqa: F401
-from .train import train # noqa: F401
+from .train import train_cli # noqa: F401
from .pretrain import pretrain # noqa: F401
from .debug_data import debug_data # noqa: F401
+from .debug_config import debug_config # noqa: F401
+from .debug_model import debug_model # noqa: F401
from .evaluate import evaluate # noqa: F401
from .convert import convert # noqa: F401
from .init_model import init_model # noqa: F401
+from .init_config import init_config, fill_config # noqa: F401
from .validate import validate # noqa: F401
+from .project.clone import project_clone # noqa: F401
+from .project.assets import project_assets # noqa: F401
+from .project.run import project_run # noqa: F401
+from .project.dvc import project_update_dvc # noqa: F401
+from .project.push import project_push # noqa: F401
+from .project.pull import project_pull # noqa: F401
+from .project.document import project_document # noqa: F401
+
+
+@app.command("link", no_args_is_help=True, deprecated=True, hidden=True)
+def link(*args, **kwargs):
+ """As of spaCy v3.0, symlinks like "en" are deprecated. You can load trained
+ pipeline packages using their full names or from a directory path."""
+ msg.warn(
+ "As of spaCy v3.0, model symlinks are deprecated. You can load trained "
+ "pipeline packages using their full names or from a directory path."
+ )
diff --git a/spacy/cli/_schemas.py b/spacy/cli/_schemas.py
deleted file mode 100644
index 3fb2c8979..000000000
--- a/spacy/cli/_schemas.py
+++ /dev/null
@@ -1,220 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
-# NB: This schema describes the new format of the training data, see #2928
-TRAINING_SCHEMA = {
- "$schema": "http://json-schema.org/draft-06/schema",
- "title": "Training data for spaCy models",
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "text": {
- "title": "The text of the training example",
- "type": "string",
- "minLength": 1,
- },
- "ents": {
- "title": "Named entity spans in the text",
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "start": {
- "title": "Start character offset of the span",
- "type": "integer",
- "minimum": 0,
- },
- "end": {
- "title": "End character offset of the span",
- "type": "integer",
- "minimum": 0,
- },
- "label": {
- "title": "Entity label",
- "type": "string",
- "minLength": 1,
- "pattern": "^[A-Z0-9]*$",
- },
- },
- "required": ["start", "end", "label"],
- },
- },
- "sents": {
- "title": "Sentence spans in the text",
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "start": {
- "title": "Start character offset of the span",
- "type": "integer",
- "minimum": 0,
- },
- "end": {
- "title": "End character offset of the span",
- "type": "integer",
- "minimum": 0,
- },
- },
- "required": ["start", "end"],
- },
- },
- "cats": {
- "title": "Text categories for the text classifier",
- "type": "object",
- "patternProperties": {
- "*": {
- "title": "A text category",
- "oneOf": [
- {"type": "boolean"},
- {"type": "number", "minimum": 0},
- ],
- }
- },
- "propertyNames": {"pattern": "^[A-Z0-9]*$", "minLength": 1},
- },
- "tokens": {
- "title": "The tokens in the text",
- "type": "array",
- "items": {
- "type": "object",
- "minProperties": 1,
- "properties": {
- "id": {
- "title": "Token ID, usually token index",
- "type": "integer",
- "minimum": 0,
- },
- "start": {
- "title": "Start character offset of the token",
- "type": "integer",
- "minimum": 0,
- },
- "end": {
- "title": "End character offset of the token",
- "type": "integer",
- "minimum": 0,
- },
- "pos": {
- "title": "Coarse-grained part-of-speech tag",
- "type": "string",
- "minLength": 1,
- },
- "tag": {
- "title": "Fine-grained part-of-speech tag",
- "type": "string",
- "minLength": 1,
- },
- "dep": {
- "title": "Dependency label",
- "type": "string",
- "minLength": 1,
- },
- "head": {
- "title": "Index of the token's head",
- "type": "integer",
- "minimum": 0,
- },
- },
- "required": ["start", "end"],
- },
- },
- "_": {"title": "Custom user space", "type": "object"},
- },
- "required": ["text"],
- },
-}
-
-META_SCHEMA = {
- "$schema": "http://json-schema.org/draft-06/schema",
- "type": "object",
- "properties": {
- "lang": {
- "title": "Two-letter language code, e.g. 'en'",
- "type": "string",
- "minLength": 2,
- "maxLength": 2,
- "pattern": "^[a-z]*$",
- },
- "name": {
- "title": "Model name",
- "type": "string",
- "minLength": 1,
- "pattern": "^[a-z_]*$",
- },
- "version": {
- "title": "Model version",
- "type": "string",
- "minLength": 1,
- "pattern": "^[0-9a-z.-]*$",
- },
- "spacy_version": {
- "title": "Compatible spaCy version identifier",
- "type": "string",
- "minLength": 1,
- "pattern": "^[0-9a-z.-><=]*$",
- },
- "parent_package": {
- "title": "Name of parent spaCy package, e.g. spacy or spacy-nightly",
- "type": "string",
- "minLength": 1,
- "default": "spacy",
- },
- "pipeline": {
- "title": "Names of pipeline components",
- "type": "array",
- "items": {"type": "string", "minLength": 1},
- },
- "description": {"title": "Model description", "type": "string"},
- "license": {"title": "Model license", "type": "string"},
- "author": {"title": "Model author name", "type": "string"},
- "email": {"title": "Model author email", "type": "string", "format": "email"},
- "url": {"title": "Model author URL", "type": "string", "format": "uri"},
- "sources": {
- "title": "Training data sources",
- "type": "array",
- "items": {"type": "string"},
- },
- "vectors": {
- "title": "Included word vectors",
- "type": "object",
- "properties": {
- "keys": {
- "title": "Number of unique keys",
- "type": "integer",
- "minimum": 0,
- },
- "vectors": {
- "title": "Number of unique vectors",
- "type": "integer",
- "minimum": 0,
- },
- "width": {
- "title": "Number of dimensions",
- "type": "integer",
- "minimum": 0,
- },
- },
- },
- "accuracy": {
- "title": "Accuracy numbers",
- "type": "object",
- "patternProperties": {"*": {"type": "number", "minimum": 0.0}},
- },
- "speed": {
- "title": "Speed evaluation numbers",
- "type": "object",
- "patternProperties": {
- "*": {
- "oneOf": [
- {"type": "number", "minimum": 0.0},
- {"type": "integer", "minimum": 0},
- ]
- }
- },
- },
- },
- "required": ["lang", "name", "version"],
-}
diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py
new file mode 100644
index 000000000..0ecb5ad8f
--- /dev/null
+++ b/spacy/cli/_util.py
@@ -0,0 +1,350 @@
+from typing import Dict, Any, Union, List, Optional, TYPE_CHECKING
+import sys
+import shutil
+from pathlib import Path
+from wasabi import msg
+import srsly
+import hashlib
+import typer
+from click import NoSuchOption
+from typer.main import get_command
+from contextlib import contextmanager
+from thinc.config import Config, ConfigValidationError
+from configparser import InterpolationError
+
+from ..schemas import ProjectConfigSchema, validate
+from ..util import import_file, run_command, make_tempdir
+
+if TYPE_CHECKING:
+ from pathy import Pathy # noqa: F401
+
+
+PROJECT_FILE = "project.yml"
+PROJECT_LOCK = "project.lock"
+COMMAND = "python -m spacy"
+NAME = "spacy"
+HELP = """spaCy Command-line Interface
+
+DOCS: https://nightly.spacy.io/api/cli
+"""
+PROJECT_HELP = f"""Command-line interface for spaCy projects and templates.
+You'd typically start by cloning a project template to a local directory and
+fetching its assets like datasets etc. See the project's {PROJECT_FILE} for the
+available commands.
+"""
+DEBUG_HELP = """Suite of helpful commands for debugging and profiling. Includes
+commands to check and validate your config files, training and evaluation data,
+and custom model implementations.
+"""
+INIT_HELP = """Commands for initializing configs and pipeline packages."""
+
+# Wrappers for Typer's annotations. Initially created to set defaults and to
+# keep the names short, but not needed at the moment.
+Arg = typer.Argument
+Opt = typer.Option
+
+app = typer.Typer(name=NAME, help=HELP)
+project_cli = typer.Typer(name="project", help=PROJECT_HELP, no_args_is_help=True)
+debug_cli = typer.Typer(name="debug", help=DEBUG_HELP, no_args_is_help=True)
+init_cli = typer.Typer(name="init", help=INIT_HELP, no_args_is_help=True)
+
+app.add_typer(project_cli)
+app.add_typer(debug_cli)
+app.add_typer(init_cli)
+
+
+def setup_cli() -> None:
+ # Ensure that the help messages always display the correct prompt
+ command = get_command(app)
+ command(prog_name=COMMAND)
+
+
+def parse_config_overrides(args: List[str]) -> Dict[str, Any]:
+ """Generate a dictionary of config overrides based on the extra arguments
+ provided on the CLI, e.g. --training.batch_size to override
+ "training.batch_size". Arguments without a "." are considered invalid,
+ since the config only allows top-level sections to exist.
+
+ args (List[str]): The extra arguments from the command line.
+ RETURNS (Dict[str, Any]): The parsed dict, keyed by nested config setting.
+ """
+ result = {}
+ while args:
+ opt = args.pop(0)
+ err = f"Invalid CLI argument '{opt}'"
+ if opt.startswith("--"): # new argument
+ orig_opt = opt
+ opt = opt.replace("--", "")
+ if "." not in opt:
+ raise NoSuchOption(orig_opt)
+ if "=" in opt: # we have --opt=value
+ opt, value = opt.split("=", 1)
+ opt = opt.replace("-", "_")
+ else:
+ if not args or args[0].startswith("--"): # flag with no value
+ value = "true"
+ else:
+ value = args.pop(0)
+ # Just like we do in the config, we're calling json.loads on the
+ # values. But since they come from the CLI, it'd be unintuitive to
+ # explicitly mark strings with escaped quotes. So we're working
+ # around that here by falling back to a string if parsing fails.
+ # TODO: improve logic to handle simple types like list of strings?
+ try:
+ result[opt] = srsly.json_loads(value)
+ except ValueError:
+ result[opt] = str(value)
+ else:
+ msg.fail(f"{err}: override option should start with --", exits=1)
+ return result
+
+
+def load_project_config(path: Path, interpolate: bool = True) -> Dict[str, Any]:
+ """Load the project.yml file from a directory and validate it. Also make
+ sure that all directories defined in the config exist.
+
+ path (Path): The path to the project directory.
+ interpolate (bool): Whether to substitute project variables.
+ RETURNS (Dict[str, Any]): The loaded project.yml.
+ """
+ config_path = path / PROJECT_FILE
+ if not config_path.exists():
+ msg.fail(f"Can't find {PROJECT_FILE}", config_path, exits=1)
+ invalid_err = f"Invalid {PROJECT_FILE}. Double-check that the YAML is correct."
+ try:
+ config = srsly.read_yaml(config_path)
+ except ValueError as e:
+ msg.fail(invalid_err, e, exits=1)
+ errors = validate(ProjectConfigSchema, config)
+ if errors:
+ msg.fail(invalid_err)
+ print("\n".join(errors))
+ sys.exit(1)
+ validate_project_commands(config)
+ # Make sure directories defined in config exist
+ for subdir in config.get("directories", []):
+ dir_path = path / subdir
+ if not dir_path.exists():
+ dir_path.mkdir(parents=True)
+ if interpolate:
+ err = "project.yml validation error"
+ with show_validation_error(title=err, hint_fill=False):
+ config = substitute_project_variables(config)
+ return config
+
+
+def substitute_project_variables(config: Dict[str, Any], overrides: Dict = {}):
+ key = "vars"
+ config.setdefault(key, {})
+ config[key].update(overrides)
+ # Need to put variables in the top scope again so we can have a top-level
+ # section "project" (otherwise, a list of commands in the top scope wouldn't)
+ # be allowed by Thinc's config system
+ cfg = Config({"project": config, key: config[key]})
+ interpolated = cfg.interpolate()
+ return dict(interpolated["project"])
+
+
+def validate_project_commands(config: Dict[str, Any]) -> None:
+ """Check that project commands and workflows are valid, don't contain
+ duplicates, don't clash and only refer to commands that exist.
+
+ config (Dict[str, Any]): The loaded config.
+ """
+ command_names = [cmd["name"] for cmd in config.get("commands", [])]
+ workflows = config.get("workflows", {})
+ duplicates = set([cmd for cmd in command_names if command_names.count(cmd) > 1])
+ if duplicates:
+ err = f"Duplicate commands defined in {PROJECT_FILE}: {', '.join(duplicates)}"
+ msg.fail(err, exits=1)
+ for workflow_name, workflow_steps in workflows.items():
+ if workflow_name in command_names:
+ err = f"Can't use workflow name '{workflow_name}': name already exists as a command"
+ msg.fail(err, exits=1)
+ for step in workflow_steps:
+ if step not in command_names:
+ msg.fail(
+ f"Unknown command specified in workflow '{workflow_name}': {step}",
+ f"Workflows can only refer to commands defined in the 'commands' "
+ f"section of the {PROJECT_FILE}.",
+ exits=1,
+ )
+
+
+def get_hash(data) -> str:
+ """Get the hash for a JSON-serializable object.
+
+ data: The data to hash.
+ RETURNS (str): The hash.
+ """
+ data_str = srsly.json_dumps(data, sort_keys=True).encode("utf8")
+ return hashlib.md5(data_str).hexdigest()
+
+
+def get_checksum(path: Union[Path, str]) -> str:
+ """Get the checksum for a file or directory given its file path. If a
+ directory path is provided, this uses all files in that directory.
+
+ path (Union[Path, str]): The file or directory path.
+ RETURNS (str): The checksum.
+ """
+ path = Path(path)
+ if path.is_file():
+ return hashlib.md5(Path(path).read_bytes()).hexdigest()
+ if path.is_dir():
+ # TODO: this is currently pretty slow
+ dir_checksum = hashlib.md5()
+ for sub_file in sorted(fp for fp in path.rglob("*") if fp.is_file()):
+ dir_checksum.update(sub_file.read_bytes())
+ return dir_checksum.hexdigest()
+ msg.fail(f"Can't get checksum for {path}: not a file or directory", exits=1)
+
+
+@contextmanager
+def show_validation_error(
+ file_path: Optional[Union[str, Path]] = None,
+ *,
+ title: str = "Config validation error",
+ hint_fill: bool = True,
+):
+ """Helper to show custom config validation errors on the CLI.
+
+ file_path (str / Path): Optional file path of config file, used in hints.
+ title (str): Title of the custom formatted error.
+ hint_fill (bool): Show hint about filling config.
+ """
+ try:
+ yield
+ except (ConfigValidationError, InterpolationError) as e:
+ msg.fail(title, spaced=True)
+ # TODO: This is kinda hacky and we should probably provide a better
+ # helper for this in Thinc
+ err_text = str(e).replace("Config validation error", "").strip()
+ print(err_text)
+ if hint_fill and "field required" in err_text:
+ config_path = file_path if file_path is not None else "config.cfg"
+ msg.text(
+ "If your config contains missing values, you can run the 'init "
+ "fill-config' command to fill in all the defaults, if possible:",
+ spaced=True,
+ )
+ print(f"{COMMAND} init fill-config {config_path} --base {config_path}\n")
+ sys.exit(1)
+
+
+def import_code(code_path: Optional[Union[Path, str]]) -> None:
+ """Helper to import Python file provided in training commands / commands
+ using the config. This makes custom registered functions available.
+ """
+ if code_path is not None:
+ if not Path(code_path).exists():
+ msg.fail("Path to Python code not found", code_path, exits=1)
+ try:
+ import_file("python_code", code_path)
+ except Exception as e:
+ msg.fail(f"Couldn't load Python code: {code_path}", e, exits=1)
+
+
+def get_sourced_components(config: Union[Dict[str, Any], Config]) -> List[str]:
+ """RETURNS (List[str]): All sourced components in the original config,
+ e.g. {"source": "en_core_web_sm"}. If the config contains a key
+ "factory", we assume it refers to a component factory.
+ """
+ return [
+ name
+ for name, cfg in config.get("components", {}).items()
+ if "factory" not in cfg and "source" in cfg
+ ]
+
+
+def upload_file(src: Path, dest: Union[str, "Pathy"]) -> None:
+ """Upload a file.
+
+ src (Path): The source path.
+ url (str): The destination URL to upload to.
+ """
+ import smart_open
+
+ dest = str(dest)
+ with smart_open.open(dest, mode="wb") as output_file:
+ with src.open(mode="rb") as input_file:
+ output_file.write(input_file.read())
+
+
+def download_file(src: Union[str, "Pathy"], dest: Path, *, force: bool = False) -> None:
+ """Download a file using smart_open.
+
+ url (str): The URL of the file.
+ dest (Path): The destination path.
+ force (bool): Whether to force download even if file exists.
+ If False, the download will be skipped.
+ """
+ import smart_open
+
+ if dest.exists() and not force:
+ return None
+ src = str(src)
+ with smart_open.open(src, mode="rb") as input_file:
+ with dest.open(mode="wb") as output_file:
+ output_file.write(input_file.read())
+
+
+def ensure_pathy(path):
+ """Temporary helper to prevent importing Pathy globally (which can cause
+ slow and annoying Google Cloud warning)."""
+ from pathy import Pathy # noqa: F811
+
+ return Pathy(path)
+
+
+def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "master"):
+ if dest.exists():
+ msg.fail("Destination of checkout must not exist", exits=1)
+ if not dest.parent.exists():
+ raise IOError("Parent of destination of checkout must exist")
+ # We're using Git, partial clone and sparse checkout to
+ # only clone the files we need
+ # This ends up being RIDICULOUS. omg.
+ # So, every tutorial and SO post talks about 'sparse checkout'...But they
+ # go and *clone* the whole repo. Worthless. And cloning part of a repo
+ # turns out to be completely broken. The only way to specify a "path" is..
+ # a path *on the server*? The contents of which, specifies the paths. Wat.
+ # Obviously this is hopelessly broken and insecure, because you can query
+ # arbitrary paths on the server! So nobody enables this.
+ # What we have to do is disable *all* files. We could then just checkout
+ # the path, and it'd "work", but be hopelessly slow...Because it goes and
+ # transfers every missing object one-by-one. So the final piece is that we
+ # need to use some weird git internals to fetch the missings in bulk, and
+ # *that* we can do by path.
+ # We're using Git and sparse checkout to only clone the files we need
+ with make_tempdir() as tmp_dir:
+ # This is the "clone, but don't download anything" part.
+ cmd = (
+ f"git clone {repo} {tmp_dir} --no-checkout --depth 1 "
+ f"--filter=blob:none " # <-- The key bit
+ f"-b {branch}"
+ )
+ run_command(cmd, capture=True)
+ # Now we need to find the missing filenames for the subpath we want.
+ # Looking for this 'rev-list' command in the git --help? Hah.
+ cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
+ ret = run_command(cmd, capture=True)
+ repo = _from_http_to_git(repo)
+ # Now pass those missings into another bit of git internals
+ missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
+ cmd = f"git -C {tmp_dir} fetch-pack {repo} {missings}"
+ run_command(cmd, capture=True)
+ # And finally, we can checkout our subpath
+ cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
+ run_command(cmd)
+ # We need Path(name) to make sure we also support subdirectories
+ shutil.move(str(tmp_dir / Path(subpath)), str(dest))
+
+
+def _from_http_to_git(repo):
+ if repo.startswith("http://"):
+ repo = repo.replace(r"http://", r"https://")
+ if repo.startswith(r"https://"):
+ repo = repo.replace("https://", "git@").replace("/", ":", 1)
+ repo = f"{repo}.git"
+ return repo
diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py
index fa867fa04..ade5a3ad4 100644
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@@ -1,132 +1,174 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-import plac
+from typing import Optional, Any, List, Union
+from enum import Enum
from pathlib import Path
from wasabi import Printer
import srsly
import re
+import sys
-from .converters import conllu2json, iob2json, conll_ner2json
-from .converters import ner_jsonl2json
+from ._util import app, Arg, Opt
+from ..gold import docs_to_json
+from ..tokens import DocBin
+from ..gold.converters import iob2docs, conll_ner2docs, json2docs, conllu2docs
# Converters are matched by file extension except for ner/iob, which are
# matched by file extension and content. To add a converter, add a new
# entry to this dict with the file extension mapped to the converter function
# imported from /converters.
+
CONVERTERS = {
- "conllubio": conllu2json,
- "conllu": conllu2json,
- "conll": conllu2json,
- "ner": conll_ner2json,
- "iob": iob2json,
- "jsonl": ner_jsonl2json,
+ "conllubio": conllu2docs,
+ "conllu": conllu2docs,
+ "conll": conllu2docs,
+ "ner": conll_ner2docs,
+ "iob": iob2docs,
+ "json": json2docs,
}
-# File types
-FILE_TYPES = ("json", "jsonl", "msg")
-FILE_TYPES_STDOUT = ("json", "jsonl")
+
+# File types that can be written to stdout
+FILE_TYPES_STDOUT = ("json",)
-@plac.annotations(
- input_file=("Input file", "positional", None, str),
- output_dir=("Output directory. '-' for stdout.", "positional", None, str),
- file_type=("Type of data to produce: {}".format(FILE_TYPES), "option", "t", str),
- n_sents=("Number of sentences per doc (0 to disable)", "option", "n", int),
- seg_sents=("Segment sentences (for -c ner)", "flag", "s"),
- model=("Model for sentence segmentation (for -s)", "option", "b", str),
- converter=("Converter: {}".format(tuple(CONVERTERS.keys())), "option", "c", str),
- lang=("Language (if tokenizer required)", "option", "l", str),
- morphology=("Enable appending morphology to tags", "flag", "m", bool),
-)
-def convert(
- input_file,
- output_dir="-",
- file_type="json",
- n_sents=1,
- seg_sents=False,
- model=None,
- morphology=False,
- converter="auto",
- lang=None,
+class FileTypes(str, Enum):
+ json = "json"
+ spacy = "spacy"
+
+
+@app.command("convert")
+def convert_cli(
+ # fmt: off
+ input_path: str = Arg(..., help="Input file or directory", exists=True),
+ output_dir: Path = Arg("-", help="Output directory. '-' for stdout.", allow_dash=True, exists=True),
+ file_type: FileTypes = Opt("spacy", "--file-type", "-t", help="Type of data to produce"),
+ n_sents: int = Opt(1, "--n-sents", "-n", help="Number of sentences per doc (0 to disable)"),
+ seg_sents: bool = Opt(False, "--seg-sents", "-s", help="Segment sentences (for -c ner)"),
+ model: Optional[str] = Opt(None, "--model", "--base", "-b", help="Trained spaCy pipeline for sentence segmentation to use as base (for --seg-sents)"),
+ morphology: bool = Opt(False, "--morphology", "-m", help="Enable appending morphology to tags"),
+ merge_subtokens: bool = Opt(False, "--merge-subtokens", "-T", help="Merge CoNLL-U subtokens"),
+ converter: str = Opt("auto", "--converter", "-c", help=f"Converter: {tuple(CONVERTERS.keys())}"),
+ ner_map: Optional[Path] = Opt(None, "--ner-map", "-nm", help="NER tag mapping (as JSON-encoded dict of entity types)", exists=True),
+ lang: Optional[str] = Opt(None, "--lang", "-l", help="Language (if tokenizer required)"),
+ concatenate: bool = Opt(None, "--concatenate", "-C", help="Concatenate output to a single file"),
+ # fmt: on
):
"""
- Convert files into JSON format for use with train command and other
- experiment management functions. If no output_dir is specified, the data
+ Convert files into json or DocBin format for training. The resulting .spacy
+ file can be used with the train command and other experiment management
+ functions.
+
+ If no output_dir is specified and the output format is JSON, the data
is written to stdout, so you can pipe them forward to a JSON file:
- $ spacy convert some_file.conllu > some_file.json
+ $ spacy convert some_file.conllu --file-type json > some_file.json
+
+ DOCS: https://nightly.spacy.io/api/cli#convert
"""
- no_print = output_dir == "-"
- msg = Printer(no_print=no_print)
- input_path = Path(input_file)
- if file_type not in FILE_TYPES:
- msg.fail(
- "Unknown file type: '{}'".format(file_type),
- "Supported file types: '{}'".format(", ".join(FILE_TYPES)),
- exits=1,
- )
- if file_type not in FILE_TYPES_STDOUT and output_dir == "-":
- # TODO: support msgpack via stdout in srsly?
- msg.fail(
- "Can't write .{} data to stdout.".format(file_type),
- "Please specify an output directory.",
- exits=1,
- )
- if not input_path.exists():
- msg.fail("Input file not found", input_path, exits=1)
- if output_dir != "-" and not Path(output_dir).exists():
- msg.fail("Output directory not found", output_dir, exits=1)
- input_data = input_path.open("r", encoding="utf-8").read()
- if converter == "auto":
- converter = input_path.suffix[1:]
- if converter == "ner" or converter == "iob":
- converter_autodetect = autodetect_ner_format(input_data)
- if converter_autodetect == "ner":
- msg.info("Auto-detected token-per-line NER format")
- converter = converter_autodetect
- elif converter_autodetect == "iob":
- msg.info("Auto-detected sentence-per-line NER format")
- converter = converter_autodetect
- else:
- msg.warn(
- "Can't automatically detect NER format. Conversion may not succeed. See https://spacy.io/api/cli#convert"
- )
- if converter not in CONVERTERS:
- msg.fail("Can't find converter for {}".format(converter), exits=1)
- # Use converter function to convert data
- func = CONVERTERS[converter]
- data = func(
- input_data,
+ if isinstance(file_type, FileTypes):
+ # We get an instance of the FileTypes from the CLI so we need its string value
+ file_type = file_type.value
+ input_path = Path(input_path)
+ output_dir = "-" if output_dir == Path("-") else output_dir
+ silent = output_dir == "-"
+ msg = Printer(no_print=silent)
+ verify_cli_args(msg, input_path, output_dir, file_type, converter, ner_map)
+ converter = _get_converter(msg, converter, input_path)
+ convert(
+ input_path,
+ output_dir,
+ file_type=file_type,
n_sents=n_sents,
seg_sents=seg_sents,
- use_morphology=morphology,
- lang=lang,
model=model,
- no_print=no_print,
+ morphology=morphology,
+ merge_subtokens=merge_subtokens,
+ converter=converter,
+ ner_map=ner_map,
+ lang=lang,
+ concatenate=concatenate,
+ silent=silent,
+ msg=msg,
)
- if output_dir != "-":
- # Export data to a file
- suffix = ".{}".format(file_type)
- output_file = Path(output_dir) / Path(input_path.parts[-1]).with_suffix(suffix)
- if file_type == "json":
- srsly.write_json(output_file, data)
- elif file_type == "jsonl":
- srsly.write_jsonl(output_file, data)
- elif file_type == "msg":
- srsly.write_msgpack(output_file, data)
- msg.good(
- "Generated output file ({} documents): {}".format(len(data), output_file)
+
+
+def convert(
+ input_path: Union[str, Path],
+ output_dir: Union[str, Path],
+ *,
+ file_type: str = "json",
+ n_sents: int = 1,
+ seg_sents: bool = False,
+ model: Optional[str] = None,
+ morphology: bool = False,
+ merge_subtokens: bool = False,
+ converter: str = "auto",
+ ner_map: Optional[Path] = None,
+ lang: Optional[str] = None,
+ concatenate: bool = False,
+ silent: bool = True,
+ msg: Optional[Printer],
+) -> None:
+ if not msg:
+ msg = Printer(no_print=silent)
+ ner_map = srsly.read_json(ner_map) if ner_map is not None else None
+ doc_files = []
+ for input_loc in walk_directory(Path(input_path), converter):
+ input_data = input_loc.open("r", encoding="utf-8").read()
+ # Use converter function to convert data
+ func = CONVERTERS[converter]
+ docs = func(
+ input_data,
+ n_sents=n_sents,
+ seg_sents=seg_sents,
+ append_morphology=morphology,
+ merge_subtokens=merge_subtokens,
+ lang=lang,
+ model=model,
+ no_print=silent,
+ ner_map=ner_map,
)
- else:
- # Print to stdout
+ doc_files.append((input_loc, docs))
+ if concatenate:
+ all_docs = []
+ for _, docs in doc_files:
+ all_docs.extend(docs)
+ doc_files = [(input_path, all_docs)]
+ for input_loc, docs in doc_files:
if file_type == "json":
- srsly.write_json("-", data)
- elif file_type == "jsonl":
- srsly.write_jsonl("-", data)
+ data = [docs_to_json(docs)]
+ else:
+ data = DocBin(docs=docs, store_user_data=True).to_bytes()
+ if output_dir == "-":
+ _print_docs_to_stdout(data, file_type)
+ else:
+ if input_loc != input_path:
+ subpath = input_loc.relative_to(input_path)
+ output_file = Path(output_dir) / subpath.with_suffix(f".{file_type}")
+ else:
+ output_file = Path(output_dir) / input_loc.parts[-1]
+ output_file = output_file.with_suffix(f".{file_type}")
+ _write_docs_to_file(data, output_file, file_type)
+ msg.good(f"Generated output file ({len(docs)} documents): {output_file}")
-def autodetect_ner_format(input_data):
+def _print_docs_to_stdout(data: Any, output_type: str) -> None:
+ if output_type == "json":
+ srsly.write_json("-", data)
+ else:
+ sys.stdout.buffer.write(data)
+
+
+def _write_docs_to_file(data: Any, output_file: Path, output_type: str) -> None:
+ if not output_file.parent.exists():
+ output_file.parent.mkdir(parents=True)
+ if output_type == "json":
+ srsly.write_json(output_file, data)
+ else:
+ with output_file.open("wb") as file_:
+ file_.write(data)
+
+
+def autodetect_ner_format(input_data: str) -> Optional[str]:
# guess format from the first 20 lines
lines = input_data.split("\n")[:20]
format_guesses = {"ner": 0, "iob": 0}
@@ -143,3 +185,84 @@ def autodetect_ner_format(input_data):
if format_guesses["ner"] == 0 and format_guesses["iob"] > 0:
return "iob"
return None
+
+
+def walk_directory(path: Path, converter: str) -> List[Path]:
+ if not path.is_dir():
+ return [path]
+ paths = [path]
+ locs = []
+ seen = set()
+ for path in paths:
+ if str(path) in seen:
+ continue
+ seen.add(str(path))
+ if path.parts[-1].startswith("."):
+ continue
+ elif path.is_dir():
+ paths.extend(path.iterdir())
+ elif converter == "json" and not path.parts[-1].endswith("json"):
+ continue
+ elif converter == "conll" and not path.parts[-1].endswith("conll"):
+ continue
+ elif converter == "iob" and not path.parts[-1].endswith("iob"):
+ continue
+ else:
+ locs.append(path)
+ return locs
+
+
+def verify_cli_args(
+ msg: Printer,
+ input_path: Union[str, Path],
+ output_dir: Union[str, Path],
+ file_type: FileTypes,
+ converter: str,
+ ner_map: Optional[Path],
+):
+ input_path = Path(input_path)
+ if file_type not in FILE_TYPES_STDOUT and output_dir == "-":
+ msg.fail(
+ f"Can't write .{file_type} data to stdout. Please specify an output directory.",
+ exits=1,
+ )
+ if not input_path.exists():
+ msg.fail("Input file not found", input_path, exits=1)
+ if output_dir != "-" and not Path(output_dir).exists():
+ msg.fail("Output directory not found", output_dir, exits=1)
+ if ner_map is not None and not Path(ner_map).exists():
+ msg.fail("NER map not found", ner_map, exits=1)
+ if input_path.is_dir():
+ input_locs = walk_directory(input_path, converter)
+ if len(input_locs) == 0:
+ msg.fail("No input files in directory", input_path, exits=1)
+ file_types = list(set([loc.suffix[1:] for loc in input_locs]))
+ if converter == "auto" and len(file_types) >= 2:
+ file_types = ",".join(file_types)
+ msg.fail("All input files must be same type", file_types, exits=1)
+ if converter != "auto" and converter not in CONVERTERS:
+ msg.fail(f"Can't find converter for {converter}", exits=1)
+
+
+def _get_converter(msg, converter, input_path):
+ if input_path.is_dir():
+ input_path = walk_directory(input_path, converter)[0]
+ if converter == "auto":
+ converter = input_path.suffix[1:]
+ if converter == "ner" or converter == "iob":
+ with input_path.open() as file_:
+ input_data = file_.read()
+ converter_autodetect = autodetect_ner_format(input_data)
+ if converter_autodetect == "ner":
+ msg.info("Auto-detected token-per-line NER format")
+ converter = converter_autodetect
+ elif converter_autodetect == "iob":
+ msg.info("Auto-detected sentence-per-line NER format")
+ converter = converter_autodetect
+ else:
+ msg.warn(
+ "Can't automatically detect NER format. "
+ "Conversion may not succeed. "
+ "See https://nightly.spacy.io/api/cli#convert"
+ )
+ return converter
diff --git a/spacy/cli/converters/__init__.py b/spacy/cli/converters/__init__.py
deleted file mode 100644
index 9dcbf5b13..000000000
--- a/spacy/cli/converters/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .conllu2json import conllu2json # noqa: F401
-from .iob2json import iob2json # noqa: F401
-from .conll_ner2json import conll_ner2json # noqa: F401
-from .jsonl2json import ner_jsonl2json # noqa: F401
diff --git a/spacy/cli/converters/conllu2json.py b/spacy/cli/converters/conllu2json.py
deleted file mode 100644
index 3de4dcc30..000000000
--- a/spacy/cli/converters/conllu2json.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-import re
-
-from ...gold import iob_to_biluo
-
-
-def conllu2json(input_data, n_sents=10, use_morphology=False, lang=None, **_):
- """
- Convert conllu files into JSON format for use with train cli.
- use_morphology parameter enables appending morphology to tags, which is
- useful for languages such as Spanish, where UD tags are not so rich.
-
- Extract NER tags if available and convert them so that they follow
- BILUO and the Wikipedia scheme
- """
- # by @dvsrepo, via #11 explosion/spacy-dev-resources
- # by @katarkor
- docs = []
- sentences = []
- conll_tuples = read_conllx(input_data, use_morphology=use_morphology)
- checked_for_ner = False
- has_ner_tags = False
- for i, (raw_text, tokens) in enumerate(conll_tuples):
- sentence, brackets = tokens[0]
- if not checked_for_ner:
- has_ner_tags = is_ner(sentence[5][0])
- checked_for_ner = True
- sentences.append(generate_sentence(sentence, has_ner_tags))
- # Real-sized documents could be extracted using the comments on the
- # conluu document
- if len(sentences) % n_sents == 0:
- doc = create_doc(sentences, i)
- docs.append(doc)
- sentences = []
- if sentences:
- doc = create_doc(sentences, i)
- docs.append(doc)
- return docs
-
-
-def is_ner(tag):
- """
- Check the 10th column of the first token to determine if the file contains
- NER tags
- """
- tag_match = re.match("([A-Z_]+)-([A-Z_]+)", tag)
- if tag_match:
- return True
- elif tag == "O":
- return True
- else:
- return False
-
-
-def read_conllx(input_data, use_morphology=False, n=0):
- i = 0
- for sent in input_data.strip().split("\n\n"):
- lines = sent.strip().split("\n")
- if lines:
- while lines[0].startswith("#"):
- lines.pop(0)
- tokens = []
- for line in lines:
-
- parts = line.split("\t")
- id_, word, lemma, pos, tag, morph, head, dep, _1, iob = parts
- if "-" in id_ or "." in id_:
- continue
- try:
- id_ = int(id_) - 1
- head = (int(head) - 1) if head not in ["0", "_"] else id_
- dep = "ROOT" if dep == "root" else dep
- tag = pos if tag == "_" else tag
- tag = tag + "__" + morph if use_morphology else tag
- iob = iob if iob else "O"
- tokens.append((id_, word, tag, head, dep, iob))
- except: # noqa: E722
- print(line)
- raise
- tuples = [list(t) for t in zip(*tokens)]
- yield (None, [[tuples, []]])
- i += 1
- if n >= 1 and i >= n:
- break
-
-
-def simplify_tags(iob):
- """
- Simplify tags obtained from the dataset in order to follow Wikipedia
- scheme (PER, LOC, ORG, MISC). 'PER', 'LOC' and 'ORG' keep their tags, while
- 'GPE_LOC' is simplified to 'LOC', 'GPE_ORG' to 'ORG' and all remaining tags to
- 'MISC'.
- """
- new_iob = []
- for tag in iob:
- tag_match = re.match("([A-Z_]+)-([A-Z_]+)", tag)
- if tag_match:
- prefix = tag_match.group(1)
- suffix = tag_match.group(2)
- if suffix == "GPE_LOC":
- suffix = "LOC"
- elif suffix == "GPE_ORG":
- suffix = "ORG"
- elif suffix != "PER" and suffix != "LOC" and suffix != "ORG":
- suffix = "MISC"
- tag = prefix + "-" + suffix
- new_iob.append(tag)
- return new_iob
-
-
-def generate_sentence(sent, has_ner_tags):
- (id_, word, tag, head, dep, iob) = sent
- sentence = {}
- tokens = []
- if has_ner_tags:
- iob = simplify_tags(iob)
- biluo = iob_to_biluo(iob)
- for i, id in enumerate(id_):
- token = {}
- token["id"] = id
- token["orth"] = word[i]
- token["tag"] = tag[i]
- token["head"] = head[i] - id
- token["dep"] = dep[i]
- if has_ner_tags:
- token["ner"] = biluo[i]
- tokens.append(token)
- sentence["tokens"] = tokens
- return sentence
-
-
-def create_doc(sentences, id):
- doc = {}
- paragraph = {}
- doc["id"] = id
- doc["paragraphs"] = []
- paragraph["sentences"] = sentences
- doc["paragraphs"].append(paragraph)
- return doc
diff --git a/spacy/cli/converters/iob2json.py b/spacy/cli/converters/iob2json.py
deleted file mode 100644
index 61c398f8d..000000000
--- a/spacy/cli/converters/iob2json.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from wasabi import Printer
-
-from ...gold import iob_to_biluo
-from ...util import minibatch
-from .conll_ner2json import n_sents_info
-
-
-def iob2json(input_data, n_sents=10, no_print=False, *args, **kwargs):
- """
- Convert IOB files with one sentence per line and tags separated with '|'
- into JSON format for use with train cli. IOB and IOB2 are accepted.
-
- Sample formats:
-
- I|O like|O London|I-GPE and|O New|B-GPE York|I-GPE City|I-GPE .|O
- I|O like|O London|B-GPE and|O New|B-GPE York|I-GPE City|I-GPE .|O
- I|PRP|O like|VBP|O London|NNP|I-GPE and|CC|O New|NNP|B-GPE York|NNP|I-GPE City|NNP|I-GPE .|.|O
- I|PRP|O like|VBP|O London|NNP|B-GPE and|CC|O New|NNP|B-GPE York|NNP|I-GPE City|NNP|I-GPE .|.|O
- """
- msg = Printer(no_print=no_print)
- docs = read_iob(input_data.split("\n"))
- if n_sents > 0:
- n_sents_info(msg, n_sents)
- docs = merge_sentences(docs, n_sents)
- return docs
-
-
-def read_iob(raw_sents):
- sentences = []
- for line in raw_sents:
- if not line.strip():
- continue
- tokens = [t.split("|") for t in line.split()]
- if len(tokens[0]) == 3:
- words, pos, iob = zip(*tokens)
- elif len(tokens[0]) == 2:
- words, iob = zip(*tokens)
- pos = ["-"] * len(words)
- else:
- raise ValueError(
- "The sentence-per-line IOB/IOB2 file is not formatted correctly. Try checking whitespace and delimiters. See https://spacy.io/api/cli#convert"
- )
- biluo = iob_to_biluo(iob)
- sentences.append(
- [
- {"orth": w, "tag": p, "ner": ent}
- for (w, p, ent) in zip(words, pos, biluo)
- ]
- )
- sentences = [{"tokens": sent} for sent in sentences]
- paragraphs = [{"sentences": [sent]} for sent in sentences]
- docs = [{"id": i, "paragraphs": [para]} for i, para in enumerate(paragraphs)]
- return docs
-
-
-def merge_sentences(docs, n_sents):
- merged = []
- for group in minibatch(docs, size=n_sents):
- group = list(group)
- first = group.pop(0)
- to_extend = first["paragraphs"][0]["sentences"]
- for sent in group:
- to_extend.extend(sent["paragraphs"][0]["sentences"])
- merged.append(first)
- return merged
diff --git a/spacy/cli/converters/jsonl2json.py b/spacy/cli/converters/jsonl2json.py
deleted file mode 100644
index 1c1bc45c7..000000000
--- a/spacy/cli/converters/jsonl2json.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-import srsly
-
-from ...gold import docs_to_json
-from ...util import get_lang_class, minibatch
-
-
-def ner_jsonl2json(input_data, lang=None, n_sents=10, use_morphology=False, **_):
- if lang is None:
- raise ValueError("No --lang specified, but tokenization required")
- json_docs = []
- input_examples = [srsly.json_loads(line) for line in input_data.strip().split("\n")]
- nlp = get_lang_class(lang)()
- sentencizer = nlp.create_pipe("sentencizer")
- for i, batch in enumerate(minibatch(input_examples, size=n_sents)):
- docs = []
- for record in batch:
- raw_text = record["text"]
- if "entities" in record:
- ents = record["entities"]
- else:
- ents = record["spans"]
- ents = [(e["start"], e["end"], e["label"]) for e in ents]
- doc = nlp.make_doc(raw_text)
- sentencizer(doc)
- spans = [doc.char_span(s, e, label=L) for s, e, L in ents]
- doc.ents = _cleanup_spans(spans)
- docs.append(doc)
- json_docs.append(docs_to_json(docs, id=i))
- return json_docs
-
-
-def _cleanup_spans(spans):
- output = []
- seen = set()
- for span in spans:
- if span is not None:
- # Trim whitespace
- while len(span) and span[0].is_space:
- span = span[1:]
- while len(span) and span[-1].is_space:
- span = span[:-1]
- if not len(span):
- continue
- for i in range(span.start, span.end):
- if i in seen:
- break
- else:
- output.append(span)
- seen.update(range(span.start, span.end))
- return output
diff --git a/spacy/cli/debug_config.py b/spacy/cli/debug_config.py
new file mode 100644
index 000000000..7930d0674
--- /dev/null
+++ b/spacy/cli/debug_config.py
@@ -0,0 +1,95 @@
+from typing import Optional, Dict, Any, Union, List
+from pathlib import Path
+from wasabi import msg, table
+from thinc.api import Config
+from thinc.config import VARIABLE_RE
+import typer
+
+from ._util import Arg, Opt, show_validation_error, parse_config_overrides
+from ._util import import_code, debug_cli
+from .. import util
+
+
+@debug_cli.command(
+ "config",
+ context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+)
+def debug_config_cli(
+ # fmt: off
+ ctx: typer.Context, # This is only used to read additional arguments
+ config_path: Path = Arg(..., help="Path to config file", exists=True),
+ code_path: Optional[Path] = Opt(None, "--code-path", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+ show_funcs: bool = Opt(False, "--show-functions", "-F", help="Show an overview of all registered functions used in the config and where they come from (modules, files etc.)"),
+ show_vars: bool = Opt(False, "--show-variables", "-V", help="Show an overview of all variables referenced in the config and their values. This will also reflect variables overwritten on the CLI.")
+ # fmt: on
+):
+ """Debug a config.cfg file and show validation errors. The command will
+ create all objects in the tree and validate them. Note that some config
+ validation errors are blocking and will prevent the rest of the config from
+ being resolved. This means that you may not see all validation errors at
+ once and some issues are only shown once previous errors have been fixed.
+ Similar as with the 'train' command, you can override settings from the config
+ as command line options. For instance, --training.batch_size 128 overrides
+ the value of "batch_size" in the block "[training]".
+
+ DOCS: https://nightly.spacy.io/api/cli#debug-config
+ """
+ overrides = parse_config_overrides(ctx.args)
+ import_code(code_path)
+ debug_config(
+ config_path, overrides=overrides, show_funcs=show_funcs, show_vars=show_vars
+ )
+
+
+def debug_config(
+ config_path: Path,
+ *,
+ overrides: Dict[str, Any] = {},
+ show_funcs: bool = False,
+ show_vars: bool = False,
+):
+ msg.divider("Config validation")
+ with show_validation_error(config_path):
+ config = util.load_config(config_path, overrides=overrides)
+ nlp, _ = util.load_model_from_config(config)
+ msg.good("Config is valid")
+ if show_vars:
+ variables = get_variables(config)
+ msg.divider(f"Variables ({len(variables)})")
+ head = ("Variable", "Value")
+ msg.table(variables, header=head, divider=True, widths=(41, 34), spacing=2)
+ if show_funcs:
+ funcs = get_registered_funcs(config)
+ msg.divider(f"Registered functions ({len(funcs)})")
+ for func in funcs:
+ func_data = {
+ "Registry": f"@{func['registry']}",
+ "Name": func["name"],
+ "Module": func["module"],
+ "File": f"{func['file']} (line {func['line_no']})",
+ }
+ msg.info(f"[{func['path']}]")
+ print(table(func_data).strip())
+
+
+def get_registered_funcs(config: Config) -> List[Dict[str, Optional[Union[str, int]]]]:
+ result = []
+ for key, value in util.walk_dict(config):
+ if not key[-1].startswith("@"):
+ continue
+ # We have a reference to a registered function
+ reg_name = key[-1][1:]
+ registry = getattr(util.registry, reg_name)
+ path = ".".join(key[:-1])
+ info = registry.find(value)
+ result.append({"name": value, "registry": reg_name, "path": path, **info})
+ return result
+
+
+def get_variables(config: Config) -> Dict[str, Any]:
+ result = {}
+ for variable in sorted(set(VARIABLE_RE.findall(config.to_str()))):
+ path = variable[2:-1].replace(":", ".")
+ value = util.dot_to_object(config, path)
+ result[variable] = repr(value)
+ return result
diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index 22540c779..75a81e6f5 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -1,104 +1,132 @@
-# coding: utf8
-from __future__ import unicode_literals, print_function
-
+from typing import List, Sequence, Dict, Any, Tuple, Optional
from pathlib import Path
from collections import Counter
-import plac
import sys
import srsly
-from wasabi import Printer, MESSAGES
+from wasabi import Printer, MESSAGES, msg
+import typer
-from ..gold import GoldCorpus
-from ..syntax import nonproj
-from ..util import load_model, get_lang_class
+from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides
+from ._util import import_code, debug_cli, get_sourced_components
+from ..gold import Corpus, Example
+from ..pipeline._parser_internals import nonproj
+from ..language import Language
+from .. import util
# Minimum number of expected occurrences of NER label in data to train new label
NEW_LABEL_THRESHOLD = 50
# Minimum number of expected occurrences of dependency labels
DEP_LABEL_THRESHOLD = 20
-# Minimum number of expected examples to train a blank model
+# Minimum number of expected examples to train a new pipeline
BLANK_MODEL_MIN_THRESHOLD = 100
BLANK_MODEL_THRESHOLD = 2000
-@plac.annotations(
- # fmt: off
- lang=("model language", "positional", None, str),
- train_path=("location of JSON-formatted training data", "positional", None, Path),
- dev_path=("location of JSON-formatted development data", "positional", None, Path),
- tag_map_path=("Location of JSON-formatted tag map", "option", "tm", Path),
- base_model=("name of model to update (optional)", "option", "b", str),
- pipeline=("Comma-separated names of pipeline components to train", "option", "p", str),
- ignore_warnings=("Ignore warnings, only show stats and errors", "flag", "IW", bool),
- verbose=("Print additional information and explanations", "flag", "V", bool),
- no_format=("Don't pretty-print the results", "flag", "NF", bool),
- # fmt: on
+@debug_cli.command(
+ "data", context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
)
-def debug_data(
- lang,
- train_path,
- dev_path,
- tag_map_path=None,
- base_model=None,
- pipeline="tagger,parser,ner",
- ignore_warnings=False,
- verbose=False,
- no_format=False,
+@app.command(
+ "debug-data",
+ context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+ hidden=True, # hide this from main CLI help but still allow it to work with warning
+)
+def debug_data_cli(
+ # fmt: off
+ ctx: typer.Context, # This is only used to read additional arguments
+ train_path: Path = Arg(..., help="Location of JSON-formatted training data", exists=True),
+ dev_path: Path = Arg(..., help="Location of JSON-formatted development data", exists=True),
+ config_path: Path = Arg(..., help="Path to config file", exists=True),
+ code_path: Optional[Path] = Opt(None, "--code-path", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+ ignore_warnings: bool = Opt(False, "--ignore-warnings", "-IW", help="Ignore warnings, only show stats and errors"),
+ verbose: bool = Opt(False, "--verbose", "-V", help="Print additional information and explanations"),
+ no_format: bool = Opt(False, "--no-format", "-NF", help="Don't pretty-print the results"),
+ # fmt: on
):
"""
- Analyze, debug and validate your training and development data, get useful
- stats, and find problems like invalid entity annotations, cyclic
- dependencies, low data labels and more.
- """
- msg = Printer(pretty=not no_format, ignore_warnings=ignore_warnings)
+ Analyze, debug and validate your training and development data. Outputs
+ useful stats, and can help you find problems like invalid entity annotations,
+ cyclic dependencies, low data labels and more.
+ DOCS: https://nightly.spacy.io/api/cli#debug-data
+ """
+ if ctx.command.name == "debug-data":
+ msg.warn(
+ "The debug-data command is now available via the 'debug data' "
+ "subcommand (without the hyphen). You can run python -m spacy debug "
+ "--help for an overview of the other available debugging commands."
+ )
+ overrides = parse_config_overrides(ctx.args)
+ import_code(code_path)
+ debug_data(
+ train_path,
+ dev_path,
+ config_path,
+ config_overrides=overrides,
+ ignore_warnings=ignore_warnings,
+ verbose=verbose,
+ no_format=no_format,
+ silent=False,
+ )
+
+
+def debug_data(
+ train_path: Path,
+ dev_path: Path,
+ config_path: Path,
+ *,
+ config_overrides: Dict[str, Any] = {},
+ ignore_warnings: bool = False,
+ verbose: bool = False,
+ no_format: bool = True,
+ silent: bool = True,
+):
+ msg = Printer(
+ no_print=silent, pretty=not no_format, ignore_warnings=ignore_warnings
+ )
# Make sure all files and paths exists if they are needed
if not train_path.exists():
msg.fail("Training data not found", train_path, exits=1)
if not dev_path.exists():
msg.fail("Development data not found", dev_path, exits=1)
-
+ if not config_path.exists():
+ msg.fail("Config file not found", config_path, exists=1)
+ with show_validation_error(config_path):
+ cfg = util.load_config(config_path, overrides=config_overrides)
+ nlp, config = util.load_model_from_config(cfg)
+ # Use original config here, not resolved version
+ sourced_components = get_sourced_components(cfg)
+ frozen_components = config["training"]["frozen_components"]
+ resume_components = [p for p in sourced_components if p not in frozen_components]
+ pipeline = nlp.pipe_names
+ factory_names = [nlp.get_pipe_meta(pipe).factory for pipe in nlp.pipe_names]
+ tag_map_path = util.ensure_path(config["training"]["tag_map"])
tag_map = {}
if tag_map_path is not None:
tag_map = srsly.read_json(tag_map_path)
-
- # Initialize the model and pipeline
- pipeline = [p.strip() for p in pipeline.split(",")]
- if base_model:
- nlp = load_model(base_model)
- else:
- lang_cls = get_lang_class(lang)
- nlp = lang_cls()
+ morph_rules_path = util.ensure_path(config["training"]["morph_rules"])
+ morph_rules = {}
+ if morph_rules_path is not None:
+ morph_rules = srsly.read_json(morph_rules_path)
# Replace tag map with provided mapping
nlp.vocab.morphology.load_tag_map(tag_map)
+ # Load morph rules
+ nlp.vocab.morphology.load_morph_exceptions(morph_rules)
- msg.divider("Data format validation")
-
- # TODO: Validate data format using the JSON schema
- # TODO: update once the new format is ready
- # TODO: move validation to GoldCorpus in order to be able to load from dir
+ msg.divider("Data file validation")
# Create the gold corpus to be able to better analyze data
loading_train_error_message = ""
loading_dev_error_message = ""
with msg.loading("Loading corpus..."):
- corpus = GoldCorpus(train_path, dev_path)
try:
- train_docs = list(corpus.train_docs(nlp))
- train_docs_unpreprocessed = list(
- corpus.train_docs_without_preprocessing(nlp)
- )
+ train_dataset = list(Corpus(train_path)(nlp))
except ValueError as e:
- loading_train_error_message = "Training data cannot be loaded: {}".format(
- str(e)
- )
+ loading_train_error_message = f"Training data cannot be loaded: {e}"
try:
- dev_docs = list(corpus.dev_docs(nlp))
+ dev_dataset = list(Corpus(dev_path)(nlp))
except ValueError as e:
- loading_dev_error_message = "Development data cannot be loaded: {}".format(
- str(e)
- )
+ loading_dev_error_message = f"Development data cannot be loaded: {e}"
if loading_train_error_message or loading_dev_error_message:
if loading_train_error_message:
msg.fail(loading_train_error_message)
@@ -107,82 +135,68 @@ def debug_data(
sys.exit(1)
msg.good("Corpus is loadable")
- # Create all gold data here to avoid iterating over the train_docs constantly
- gold_train_data = _compile_gold(train_docs, pipeline, nlp)
+ # Create all gold data here to avoid iterating over the train_dataset constantly
+ gold_train_data = _compile_gold(train_dataset, factory_names, nlp, make_proj=True)
gold_train_unpreprocessed_data = _compile_gold(
- train_docs_unpreprocessed, pipeline, nlp
+ train_dataset, factory_names, nlp, make_proj=False
)
- gold_dev_data = _compile_gold(dev_docs, pipeline, nlp)
+ gold_dev_data = _compile_gold(dev_dataset, factory_names, nlp, make_proj=True)
train_texts = gold_train_data["texts"]
dev_texts = gold_dev_data["texts"]
+ frozen_components = config["training"]["frozen_components"]
msg.divider("Training stats")
- msg.text("Training pipeline: {}".format(", ".join(pipeline)))
- for pipe in [p for p in pipeline if p not in nlp.factories]:
- msg.fail("Pipeline component '{}' not available in factories".format(pipe))
- if base_model:
- msg.text("Starting with base model '{}'".format(base_model))
- else:
- msg.text("Starting with blank model '{}'".format(lang))
- msg.text("{} training docs".format(len(train_docs)))
- msg.text("{} evaluation docs".format(len(dev_docs)))
+ msg.text(f"Language: {config['nlp']['lang']}")
+ msg.text(f"Training pipeline: {', '.join(pipeline)}")
+ if resume_components:
+ msg.text(f"Components from other pipelines: {', '.join(resume_components)}")
+ if frozen_components:
+ msg.text(f"Frozen components: {', '.join(frozen_components)}")
+ msg.text(f"{len(train_dataset)} training docs")
+ msg.text(f"{len(dev_dataset)} evaluation docs")
- if not len(dev_docs):
+ if not len(gold_dev_data):
msg.fail("No evaluation docs")
overlap = len(train_texts.intersection(dev_texts))
if overlap:
- msg.warn("{} training examples also in evaluation data".format(overlap))
+ msg.warn(f"{overlap} training examples also in evaluation data")
else:
msg.good("No overlap between training and evaluation data")
- if not base_model and len(train_docs) < BLANK_MODEL_THRESHOLD:
- text = "Low number of examples to train from a blank model ({})".format(
- len(train_docs)
- )
- if len(train_docs) < BLANK_MODEL_MIN_THRESHOLD:
+ # TODO: make this feedback more fine-grained and report on updated
+ # components vs. blank components
+ if not resume_components and len(train_dataset) < BLANK_MODEL_THRESHOLD:
+ text = f"Low number of examples to train a new pipeline ({len(train_dataset)})"
+ if len(train_dataset) < BLANK_MODEL_MIN_THRESHOLD:
msg.fail(text)
else:
msg.warn(text)
msg.text(
- "It's recommended to use at least {} examples (minimum {})".format(
- BLANK_MODEL_THRESHOLD, BLANK_MODEL_MIN_THRESHOLD
- ),
+ f"It's recommended to use at least {BLANK_MODEL_THRESHOLD} examples "
+ f"(minimum {BLANK_MODEL_MIN_THRESHOLD})",
show=verbose,
)
msg.divider("Vocab & Vectors")
n_words = gold_train_data["n_words"]
msg.info(
- "{} total {} in the data ({} unique)".format(
- n_words, "word" if n_words == 1 else "words", len(gold_train_data["words"])
- )
+ f"{n_words} total word(s) in the data ({len(gold_train_data['words'])} unique)"
)
if gold_train_data["n_misaligned_words"] > 0:
- msg.warn(
- "{} misaligned tokens in the training data".format(
- gold_train_data["n_misaligned_words"]
- )
- )
+ n_misaligned = gold_train_data["n_misaligned_words"]
+ msg.warn(f"{n_misaligned} misaligned tokens in the training data")
if gold_dev_data["n_misaligned_words"] > 0:
- msg.warn(
- "{} misaligned tokens in the dev data".format(
- gold_dev_data["n_misaligned_words"]
- )
- )
+ n_misaligned = gold_dev_data["n_misaligned_words"]
+ msg.warn(f"{n_misaligned} misaligned tokens in the dev data")
most_common_words = gold_train_data["words"].most_common(10)
msg.text(
- "10 most common words: {}".format(
- _format_labels(most_common_words, counts=True)
- ),
+ f"10 most common words: {_format_labels(most_common_words, counts=True)}",
show=verbose,
)
if len(nlp.vocab.vectors):
msg.info(
- "{} vectors ({} unique keys, {} dimensions)".format(
- len(nlp.vocab.vectors),
- nlp.vocab.vectors.n_keys,
- nlp.vocab.vectors_length,
- )
+ f"{len(nlp.vocab.vectors)} vectors ({nlp.vocab.vectors.n_keys} "
+ f"unique keys, {nlp.vocab.vectors_length} dimensions)"
)
n_missing_vectors = sum(gold_train_data["words_missing_vectors"].values())
msg.warn(
@@ -200,12 +214,12 @@ def debug_data(
show=verbose,
)
else:
- msg.info("No word vectors present in the model")
+ msg.info("No word vectors present in the package")
- if "ner" in pipeline:
+ if "ner" in factory_names:
# Get all unique NER labels present in the data
labels = set(
- label for label in gold_train_data["ner"] if label not in ("O", "-")
+ label for label in gold_train_data["ner"] if label not in ("O", "-", None)
)
label_counts = gold_train_data["ner"]
model_labels = _get_labels_from_model(nlp, "ner")
@@ -218,19 +232,10 @@ def debug_data(
msg.divider("Named Entity Recognition")
msg.info(
- "{} new {}, {} existing {}".format(
- len(new_labels),
- "label" if len(new_labels) == 1 else "labels",
- len(existing_labels),
- "label" if len(existing_labels) == 1 else "labels",
- )
+ f"{len(new_labels)} new label(s), {len(existing_labels)} existing label(s)"
)
missing_values = label_counts["-"]
- msg.text(
- "{} missing {} (tokens with '-' label)".format(
- missing_values, "value" if missing_values == 1 else "values"
- )
- )
+ msg.text(f"{missing_values} missing value(s) (tokens with '-' label)")
for label in new_labels:
if len(label) == 0:
msg.fail("Empty label found in new labels")
@@ -241,43 +246,28 @@ def debug_data(
if label != "-"
]
labels_with_counts = _format_labels(labels_with_counts, counts=True)
- msg.text("New: {}".format(labels_with_counts), show=verbose)
+ msg.text(f"New: {labels_with_counts}", show=verbose)
if existing_labels:
- msg.text(
- "Existing: {}".format(_format_labels(existing_labels)), show=verbose
- )
-
+ msg.text(f"Existing: {_format_labels(existing_labels)}", show=verbose)
if gold_train_data["ws_ents"]:
- msg.fail(
- "{} invalid whitespace entity span(s)".format(
- gold_train_data["ws_ents"]
- )
- )
+ msg.fail(f"{gold_train_data['ws_ents']} invalid whitespace entity spans")
has_ws_ents_error = True
if gold_train_data["punct_ents"]:
- msg.warn(
- "{} entity span(s) with punctuation".format(
- gold_train_data["punct_ents"]
- )
- )
+ msg.warn(f"{gold_train_data['punct_ents']} entity span(s) with punctuation")
has_punct_ents_warning = True
for label in new_labels:
if label_counts[label] <= NEW_LABEL_THRESHOLD:
msg.warn(
- "Low number of examples for new label '{}' ({})".format(
- label, label_counts[label]
- )
+ f"Low number of examples for new label '{label}' ({label_counts[label]})"
)
has_low_data_warning = True
with msg.loading("Analyzing label distribution..."):
- neg_docs = _get_examples_without_label(train_docs, label)
+ neg_docs = _get_examples_without_label(train_dataset, label)
if neg_docs == 0:
- msg.warn(
- "No examples for texts WITHOUT new label '{}'".format(label)
- )
+ msg.warn(f"No examples for texts WITHOUT new label '{label}'")
has_no_neg_warning = True
if not has_low_data_warning:
@@ -291,8 +281,8 @@ def debug_data(
if has_low_data_warning:
msg.text(
- "To train a new entity type, your data should include at "
- "least {} instances of the new label".format(NEW_LABEL_THRESHOLD),
+ f"To train a new entity type, your data should include at "
+ f"least {NEW_LABEL_THRESHOLD} instances of the new label",
show=verbose,
)
if has_no_neg_warning:
@@ -314,34 +304,28 @@ def debug_data(
"with punctuation can not be trained with a noise level > 0."
)
- if "textcat" in pipeline:
+ if "textcat" in factory_names:
msg.divider("Text Classification")
labels = [label for label in gold_train_data["cats"]]
model_labels = _get_labels_from_model(nlp, "textcat")
new_labels = [l for l in labels if l not in model_labels]
existing_labels = [l for l in labels if l in model_labels]
msg.info(
- "Text Classification: {} new label(s), {} existing label(s)".format(
- len(new_labels), len(existing_labels)
- )
+ f"Text Classification: {len(new_labels)} new label(s), "
+ f"{len(existing_labels)} existing label(s)"
)
if new_labels:
labels_with_counts = _format_labels(
gold_train_data["cats"].most_common(), counts=True
)
- msg.text("New: {}".format(labels_with_counts), show=verbose)
+ msg.text(f"New: {labels_with_counts}", show=verbose)
if existing_labels:
- msg.text(
- "Existing: {}".format(_format_labels(existing_labels)), show=verbose
- )
+ msg.text(f"Existing: {_format_labels(existing_labels)}", show=verbose)
if set(gold_train_data["cats"]) != set(gold_dev_data["cats"]):
msg.fail(
- "The train and dev labels are not the same. "
- "Train labels: {}. "
- "Dev labels: {}.".format(
- _format_labels(gold_train_data["cats"]),
- _format_labels(gold_dev_data["cats"]),
- )
+ f"The train and dev labels are not the same. "
+ f"Train labels: {_format_labels(gold_train_data['cats'])}. "
+ f"Dev labels: {_format_labels(gold_dev_data['cats'])}."
)
if gold_train_data["n_cats_multilabel"] > 0:
msg.info(
@@ -367,53 +351,39 @@ def debug_data(
"contains only instances with mutually-exclusive classes."
)
- if "tagger" in pipeline:
+ if "tagger" in factory_names:
msg.divider("Part-of-speech Tagging")
labels = [label for label in gold_train_data["tags"]]
tag_map = nlp.vocab.morphology.tag_map
- msg.info(
- "{} {} in data ({} {} in tag map)".format(
- len(labels),
- "label" if len(labels) == 1 else "labels",
- len(tag_map),
- "label" if len(tag_map) == 1 else "labels",
- )
- )
+ msg.info(f"{len(labels)} label(s) in data ({len(tag_map)} label(s) in tag map)")
labels_with_counts = _format_labels(
gold_train_data["tags"].most_common(), counts=True
)
msg.text(labels_with_counts, show=verbose)
non_tagmap = [l for l in labels if l not in tag_map]
if not non_tagmap:
- msg.good("All labels present in tag map for language '{}'".format(nlp.lang))
+ msg.good(f"All labels present in tag map for language '{nlp.lang}'")
for label in non_tagmap:
- msg.fail(
- "Label '{}' not found in tag map for language '{}'".format(
- label, nlp.lang
- )
- )
+ msg.fail(f"Label '{label}' not found in tag map for language '{nlp.lang}'")
- if "parser" in pipeline:
+ if "parser" in factory_names:
has_low_data_warning = False
msg.divider("Dependency Parsing")
# profile sentence length
msg.info(
- "Found {} sentence{} with an average length of {:.1f} words.".format(
- gold_train_data["n_sents"],
- "s" if len(train_docs) > 1 else "",
- gold_train_data["n_words"] / gold_train_data["n_sents"],
- )
+ f"Found {gold_train_data['n_sents']} sentence(s) with an average "
+ f"length of {gold_train_data['n_words'] / gold_train_data['n_sents']:.1f} words."
)
# check for documents with multiple sentences
sents_per_doc = gold_train_data["n_sents"] / len(gold_train_data["texts"])
if sents_per_doc < 1.1:
msg.warn(
- "The training data contains {:.2f} sentences per "
- "document. When there are very few documents containing more "
- "than one sentence, the parser will not learn how to segment "
- "longer texts into sentences.".format(sents_per_doc)
+ f"The training data contains {sents_per_doc:.2f} sentences per "
+ f"document. When there are very few documents containing more "
+ f"than one sentence, the parser will not learn how to segment "
+ f"longer texts into sentences."
)
# profile labels
@@ -424,32 +394,13 @@ def debug_data(
labels_dev = [label for label in gold_dev_data["deps"]]
if gold_train_unpreprocessed_data["n_nonproj"] > 0:
- msg.info(
- "Found {} nonprojective train sentence{}".format(
- gold_train_unpreprocessed_data["n_nonproj"],
- "s" if gold_train_unpreprocessed_data["n_nonproj"] > 1 else "",
- )
- )
+ n_nonproj = gold_train_unpreprocessed_data["n_nonproj"]
+ msg.info(f"Found {n_nonproj} nonprojective train sentence(s)")
if gold_dev_data["n_nonproj"] > 0:
- msg.info(
- "Found {} nonprojective dev sentence{}".format(
- gold_dev_data["n_nonproj"],
- "s" if gold_dev_data["n_nonproj"] > 1 else "",
- )
- )
-
- msg.info(
- "{} {} in train data".format(
- len(labels_train_unpreprocessed),
- "label" if len(labels_train) == 1 else "labels",
- )
- )
- msg.info(
- "{} {} in projectivized train data".format(
- len(labels_train), "label" if len(labels_train) == 1 else "labels"
- )
- )
-
+ n_nonproj = gold_dev_data["n_nonproj"]
+ msg.info(f"Found {n_nonproj} nonprojective dev sentence(s)")
+ msg.info(f"{len(labels_train_unpreprocessed)} label(s) in train data")
+ msg.info(f"{len(labels_train)} label(s) in projectivized train data")
labels_with_counts = _format_labels(
gold_train_unpreprocessed_data["deps"].most_common(), counts=True
)
@@ -459,9 +410,8 @@ def debug_data(
for label in gold_train_unpreprocessed_data["deps"]:
if gold_train_unpreprocessed_data["deps"][label] <= DEP_LABEL_THRESHOLD:
msg.warn(
- "Low number of examples for label '{}' ({})".format(
- label, gold_train_unpreprocessed_data["deps"][label]
- )
+ f"Low number of examples for label '{label}' "
+ f"({gold_train_unpreprocessed_data['deps'][label]})"
)
has_low_data_warning = True
@@ -470,22 +420,19 @@ def debug_data(
for label in gold_train_data["deps"]:
if gold_train_data["deps"][label] <= DEP_LABEL_THRESHOLD and "||" in label:
rare_projectivized_labels.append(
- "{}: {}".format(label, str(gold_train_data["deps"][label]))
+ f"{label}: {gold_train_data['deps'][label]}"
)
if len(rare_projectivized_labels) > 0:
msg.warn(
- "Low number of examples for {} label{} in the "
- "projectivized dependency trees used for training. You may "
- "want to projectivize labels such as punct before "
- "training in order to improve parser performance.".format(
- len(rare_projectivized_labels),
- "s" if len(rare_projectivized_labels) > 1 else "",
- )
+ f"Low number of examples for {len(rare_projectivized_labels)} "
+ "label(s) in the projectivized dependency trees used for "
+ "training. You may want to projectivize labels such as punct "
+ "before training in order to improve parser performance."
)
msg.warn(
- "Projectivized labels with low numbers of examples: "
- "{}".format("\n".join(rare_projectivized_labels)),
+ f"Projectivized labels with low numbers of examples: ",
+ ", ".join(rare_projectivized_labels),
show=verbose,
)
has_low_data_warning = True
@@ -493,50 +440,44 @@ def debug_data(
# labels only in train
if set(labels_train) - set(labels_dev):
msg.warn(
- "The following labels were found only in the train data: "
- "{}".format(", ".join(set(labels_train) - set(labels_dev))),
+ "The following labels were found only in the train data:",
+ ", ".join(set(labels_train) - set(labels_dev)),
show=verbose,
)
# labels only in dev
if set(labels_dev) - set(labels_train):
msg.warn(
- "The following labels were found only in the dev data: "
- + ", ".join(set(labels_dev) - set(labels_train)),
+ "The following labels were found only in the dev data:",
+ ", ".join(set(labels_dev) - set(labels_train)),
show=verbose,
)
if has_low_data_warning:
msg.text(
- "To train a parser, your data should include at "
- "least {} instances of each label.".format(DEP_LABEL_THRESHOLD),
+ f"To train a parser, your data should include at "
+ f"least {DEP_LABEL_THRESHOLD} instances of each label.",
show=verbose,
)
# multiple root labels
if len(gold_train_unpreprocessed_data["roots"]) > 1:
msg.warn(
- "Multiple root labels ({}) ".format(
- ", ".join(gold_train_unpreprocessed_data["roots"])
- )
- + "found in training data. spaCy's parser uses a single root "
- "label ROOT so this distinction will not be available."
+ f"Multiple root labels "
+ f"({', '.join(gold_train_unpreprocessed_data['roots'])}) "
+ f"found in training data. spaCy's parser uses a single root "
+ f"label ROOT so this distinction will not be available."
)
# these should not happen, but just in case
if gold_train_data["n_nonproj"] > 0:
msg.fail(
- "Found {} nonprojective projectivized train sentence{}".format(
- gold_train_data["n_nonproj"],
- "s" if gold_train_data["n_nonproj"] > 1 else "",
- )
+ f"Found {gold_train_data['n_nonproj']} nonprojective "
+ f"projectivized train sentence(s)"
)
if gold_train_data["n_cycles"] > 0:
msg.fail(
- "Found {} projectivized train sentence{} with cycles".format(
- gold_train_data["n_cycles"],
- "s" if gold_train_data["n_cycles"] > 1 else "",
- )
+ f"Found {gold_train_data['n_cycles']} projectivized train sentence(s) with cycles"
)
msg.divider("Summary")
@@ -544,42 +485,39 @@ def debug_data(
warn_counts = msg.counts[MESSAGES.WARN]
fail_counts = msg.counts[MESSAGES.FAIL]
if good_counts:
- msg.good(
- "{} {} passed".format(
- good_counts, "check" if good_counts == 1 else "checks"
- )
- )
+ msg.good(f"{good_counts} {'check' if good_counts == 1 else 'checks'} passed")
if warn_counts:
- msg.warn(
- "{} {}".format(warn_counts, "warning" if warn_counts == 1 else "warnings")
- )
- if fail_counts:
- msg.fail("{} {}".format(fail_counts, "error" if fail_counts == 1 else "errors"))
-
+ msg.warn(f"{warn_counts} {'warning' if warn_counts == 1 else 'warnings'}")
if fail_counts:
+ msg.fail(f"{fail_counts} {'error' if fail_counts == 1 else 'errors'}")
sys.exit(1)
-def _load_file(file_path, msg):
+def _load_file(file_path: Path, msg: Printer) -> None:
file_name = file_path.parts[-1]
if file_path.suffix == ".json":
- with msg.loading("Loading {}...".format(file_name)):
+ with msg.loading(f"Loading {file_name}..."):
data = srsly.read_json(file_path)
- msg.good("Loaded {}".format(file_name))
+ msg.good(f"Loaded {file_name}")
return data
elif file_path.suffix == ".jsonl":
- with msg.loading("Loading {}...".format(file_name)):
+ with msg.loading(f"Loading {file_name}..."):
data = srsly.read_jsonl(file_path)
- msg.good("Loaded {}".format(file_name))
+ msg.good(f"Loaded {file_name}")
return data
msg.fail(
- "Can't load file extension {}".format(file_path.suffix),
+ f"Can't load file extension {file_path.suffix}",
"Expected .json or .jsonl",
exits=1,
)
-def _compile_gold(train_docs, pipeline, nlp):
+def _compile_gold(
+ examples: Sequence[Example],
+ factory_names: List[str],
+ nlp: Language,
+ make_proj: bool,
+) -> Dict[str, Any]:
data = {
"ner": Counter(),
"cats": Counter(),
@@ -598,18 +536,20 @@ def _compile_gold(train_docs, pipeline, nlp):
"n_cats_multilabel": 0,
"texts": set(),
}
- for doc, gold in train_docs:
- valid_words = [x for x in gold.words if x is not None]
+ for eg in examples:
+ gold = eg.reference
+ doc = eg.predicted
+ valid_words = [x for x in gold if x is not None]
data["words"].update(valid_words)
data["n_words"] += len(valid_words)
- data["n_misaligned_words"] += len(gold.words) - len(valid_words)
+ data["n_misaligned_words"] += len(gold) - len(valid_words)
data["texts"].add(doc.text)
if len(nlp.vocab.vectors):
for word in valid_words:
if nlp.vocab.strings[word] not in nlp.vocab.vectors:
data["words_missing_vectors"].update([word])
- if "ner" in pipeline:
- for i, label in enumerate(gold.ner):
+ if "ner" in factory_names:
+ for i, label in enumerate(eg.get_aligned_ner()):
if label is None:
continue
if label.startswith(("B-", "U-", "L-")) and doc[i].is_space:
@@ -630,45 +570,47 @@ def _compile_gold(train_docs, pipeline, nlp):
data["ner"][combined_label] += 1
elif label == "-":
data["ner"]["-"] += 1
- if "textcat" in pipeline:
+ if "textcat" in factory_names:
data["cats"].update(gold.cats)
if list(gold.cats.values()).count(1.0) != 1:
data["n_cats_multilabel"] += 1
- if "tagger" in pipeline:
- data["tags"].update([x for x in gold.tags if x is not None])
- if "parser" in pipeline:
- data["deps"].update([x for x in gold.labels if x is not None])
- for i, (dep, head) in enumerate(zip(gold.labels, gold.heads)):
+ if "tagger" in factory_names:
+ tags = eg.get_aligned("TAG", as_string=True)
+ data["tags"].update([x for x in tags if x is not None])
+ if "parser" in factory_names:
+ aligned_heads, aligned_deps = eg.get_aligned_parse(projectivize=make_proj)
+ data["deps"].update([x for x in aligned_deps if x is not None])
+ for i, (dep, head) in enumerate(zip(aligned_deps, aligned_heads)):
if head == i:
data["roots"].update([dep])
data["n_sents"] += 1
- if nonproj.is_nonproj_tree(gold.heads):
+ if nonproj.is_nonproj_tree(aligned_heads):
data["n_nonproj"] += 1
- if nonproj.contains_cycle(gold.heads):
+ if nonproj.contains_cycle(aligned_heads):
data["n_cycles"] += 1
return data
-def _format_labels(labels, counts=False):
+def _format_labels(labels: List[Tuple[str, int]], counts: bool = False) -> str:
if counts:
- return ", ".join(["'{}' ({})".format(l, c) for l, c in labels])
- return ", ".join(["'{}'".format(l) for l in labels])
+ return ", ".join([f"'{l}' ({c})" for l, c in labels])
+ return ", ".join([f"'{l}'" for l in labels])
-def _get_examples_without_label(data, label):
+def _get_examples_without_label(data: Sequence[Example], label: str) -> int:
count = 0
- for doc, gold in data:
+ for eg in data:
labels = [
label.split("-")[1]
- for label in gold.ner
- if label is not None and label not in ("O", "-")
+ for label in eg.get_aligned_ner()
+ if label not in ("O", "-", None)
]
if label not in labels:
count += 1
return count
-def _get_labels_from_model(nlp, pipe_name):
+def _get_labels_from_model(nlp: Language, pipe_name: str) -> Sequence[str]:
if pipe_name not in nlp.pipe_names:
return set()
pipe = nlp.get_pipe(pipe_name)
diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
new file mode 100644
index 000000000..5bd4e008f
--- /dev/null
+++ b/spacy/cli/debug_model.py
@@ -0,0 +1,194 @@
+from typing import Dict, Any, Optional
+from pathlib import Path
+from wasabi import msg
+from thinc.api import require_gpu, fix_random_seed, set_dropout_rate, Adam
+from thinc.api import Model, data_validation
+import typer
+
+from ._util import Arg, Opt, debug_cli, show_validation_error, parse_config_overrides
+from .. import util
+
+
+@debug_cli.command("model")
+def debug_model_cli(
+ # fmt: off
+ ctx: typer.Context, # This is only used to read additional arguments
+ config_path: Path = Arg(..., help="Path to config file", exists=True),
+ component: str = Arg(..., help="Name of the pipeline component of which the model should be analysed"),
+ layers: str = Opt("", "--layers", "-l", help="Comma-separated names of layer IDs to print"),
+ dimensions: bool = Opt(False, "--dimensions", "-DIM", help="Show dimensions"),
+ parameters: bool = Opt(False, "--parameters", "-PAR", help="Show parameters"),
+ gradients: bool = Opt(False, "--gradients", "-GRAD", help="Show gradients"),
+ attributes: bool = Opt(False, "--attributes", "-ATTR", help="Show attributes"),
+ P0: bool = Opt(False, "--print-step0", "-P0", help="Print model before training"),
+ P1: bool = Opt(False, "--print-step1", "-P1", help="Print model after initialization"),
+ P2: bool = Opt(False, "--print-step2", "-P2", help="Print model after training"),
+ P3: bool = Opt(False, "--print-step3", "-P3", help="Print final predictions"),
+ use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
+ # fmt: on
+):
+ """
+ Analyze a Thinc model implementation. Includes checks for internal structure
+ and activations during training.
+
+ DOCS: https://nightly.spacy.io/api/cli#debug-model
+ """
+ if use_gpu >= 0:
+ msg.info("Using GPU")
+ require_gpu(use_gpu)
+ else:
+ msg.info("Using CPU")
+ print_settings = {
+ "dimensions": dimensions,
+ "parameters": parameters,
+ "gradients": gradients,
+ "attributes": attributes,
+ "layers": [int(x.strip()) for x in layers.split(",")] if layers else [],
+ "print_before_training": P0,
+ "print_after_init": P1,
+ "print_after_training": P2,
+ "print_prediction": P3,
+ }
+ config_overrides = parse_config_overrides(ctx.args)
+ with show_validation_error(config_path):
+ config = util.load_config(config_path, overrides=config_overrides)
+ nlp, config = util.load_model_from_config(config_path)
+ seed = config["training"]["seed"]
+ if seed is not None:
+ msg.info(f"Fixing random seed: {seed}")
+ fix_random_seed(seed)
+ pipe = nlp.get_pipe(component)
+ if hasattr(pipe, "model"):
+ model = pipe.model
+ else:
+ msg.fail(
+ f"The component '{component}' does not specify an object that holds a Model.",
+ exits=1,
+ )
+ debug_model(model, print_settings=print_settings)
+
+
+def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None):
+ if not isinstance(model, Model):
+ msg.fail(
+ f"Requires a Thinc Model to be analysed, but found {type(model)} instead.",
+ exits=1,
+ )
+ if print_settings is None:
+ print_settings = {}
+
+ # STEP 0: Printing before training
+ msg.info(f"Analysing model with ID {model.id}")
+ if print_settings.get("print_before_training"):
+ msg.divider(f"STEP 0 - before training")
+ _print_model(model, print_settings)
+
+ # STEP 1: Initializing the model and printing again
+ Y = _get_output(model.ops.xp)
+ _set_output_dim(nO=Y.shape[-1], model=model)
+ # The output vector might differ from the official type of the output layer
+ with data_validation(False):
+ model.initialize(X=_get_docs(), Y=Y)
+ if print_settings.get("print_after_init"):
+ msg.divider(f"STEP 1 - after initialization")
+ _print_model(model, print_settings)
+
+ # STEP 2: Updating the model and printing again
+ optimizer = Adam(0.001)
+ set_dropout_rate(model, 0.2)
+ for e in range(3):
+ Y, get_dX = model.begin_update(_get_docs())
+ dY = get_gradient(model, Y)
+ get_dX(dY)
+ model.finish_update(optimizer)
+ if print_settings.get("print_after_training"):
+ msg.divider(f"STEP 2 - after training")
+ _print_model(model, print_settings)
+
+ # STEP 3: the final prediction
+ prediction = model.predict(_get_docs())
+ if print_settings.get("print_prediction"):
+ msg.divider(f"STEP 3 - prediction")
+ msg.info(str(prediction))
+
+
+def get_gradient(model, Y):
+ goldY = _get_output(model.ops.xp)
+ return Y - goldY
+
+
+def _sentences():
+ return [
+ "Apple is looking at buying U.K. startup for $1 billion",
+ "Autonomous cars shift insurance liability toward manufacturers",
+ "San Francisco considers banning sidewalk delivery robots",
+ "London is a big city in the United Kingdom.",
+ ]
+
+
+def _get_docs(lang: str = "en"):
+ nlp = util.get_lang_class(lang)()
+ return list(nlp.pipe(_sentences()))
+
+
+def _get_output(xp):
+ return xp.asarray([i + 10 for i, _ in enumerate(_get_docs())], dtype="float32")
+
+
+def _set_output_dim(model, nO):
+ # the dim inference doesn't always work 100%, we need this hack like we have it in pipe.pyx
+ if model.has_dim("nO") is None:
+ model.set_dim("nO", nO)
+ if model.has_ref("output_layer"):
+ if model.get_ref("output_layer").has_dim("nO") is None:
+ model.get_ref("output_layer").set_dim("nO", nO)
+
+
+def _print_model(model, print_settings):
+ layers = print_settings.get("layers", "")
+ parameters = print_settings.get("parameters", False)
+ dimensions = print_settings.get("dimensions", False)
+ gradients = print_settings.get("gradients", False)
+ attributes = print_settings.get("attributes", False)
+
+ for i, node in enumerate(model.walk()):
+ if not layers or i in layers:
+ msg.info(f"Layer {i}: model ID {node.id}: '{node.name}'")
+
+ if dimensions:
+ for name in node.dim_names:
+ if node.has_dim(name):
+ msg.info(f" - dim {name}: {node.get_dim(name)}")
+ else:
+ msg.info(f" - dim {name}: {node.has_dim(name)}")
+
+ if parameters:
+ for name in node.param_names:
+ if node.has_param(name):
+ print_value = _print_matrix(node.get_param(name))
+ msg.info(f" - param {name}: {print_value}")
+ else:
+ msg.info(f" - param {name}: {node.has_param(name)}")
+ if gradients:
+ for name in node.param_names:
+ if node.has_grad(name):
+ print_value = _print_matrix(node.get_grad(name))
+ msg.info(f" - grad {name}: {print_value}")
+ else:
+ msg.info(f" - grad {name}: {node.has_grad(name)}")
+ if attributes:
+ attrs = node.attrs
+ for name, value in attrs.items():
+ msg.info(f" - attr {name}: {value}")
+
+
+def _print_matrix(value):
+ if value is None or isinstance(value, bool):
+ return value
+ result = str(value.shape) + " - sample: "
+ sample_matrix = value
+ for d in range(value.ndim - 1):
+ sample_matrix = sample_matrix[0]
+ sample_matrix = sample_matrix[0:5]
+ result = result + str(sample_matrix)
+ return result
diff --git a/spacy/cli/download.py b/spacy/cli/download.py
index 19f3e7860..036aeab17 100644
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@@ -1,36 +1,47 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-import plac
+from typing import Optional, Sequence
import requests
-import os
-import subprocess
import sys
from wasabi import msg
+import typer
-from .link import link
-from ..util import get_package_path
+from ._util import app, Arg, Opt
from .. import about
+from ..util import is_package, get_base_version, run_command
+from ..errors import OLD_MODEL_SHORTCUTS
-@plac.annotations(
- model=("Model to download (shortcut or name)", "positional", None, str),
- direct=("Force direct download of name + version", "flag", "d", bool),
- pip_args=("Additional arguments to be passed to `pip install` on model install"),
+@app.command(
+ "download",
+ context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
)
-def download(model, direct=False, *pip_args):
+def download_cli(
+ # fmt: off
+ ctx: typer.Context,
+ model: str = Arg(..., help="Name of pipeline package to download"),
+ direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"),
+ # fmt: on
+):
"""
- Download compatible model from default download path using pip. Model
- can be shortcut, model name or, if --direct flag is set, full model name
- with version. For direct downloads, the compatibility check will be skipped.
+ Download compatible trained pipeline from the default download path using
+ pip. If --direct flag is set, the command expects the full package name with
+ version. For direct downloads, the compatibility check will be skipped. All
+ additional arguments provided to this command will be passed to `pip install`
+ on package installation.
+
+ DOCS: https://nightly.spacy.io/api/cli#download
+ AVAILABLE PACKAGES: https://spacy.io/models
"""
- if not require_package("spacy") and "--no-deps" not in pip_args:
+ download(model, direct, *ctx.args)
+
+
+def download(model: str, direct: bool = False, *pip_args) -> None:
+ if not is_package("spacy") and "--no-deps" not in pip_args:
msg.warn(
- "Skipping model package dependencies and setting `--no-deps`. "
+ "Skipping pipeline package dependencies and setting `--no-deps`. "
"You don't seem to have the spaCy package itself installed "
"(maybe because you've built from source?), so installing the "
- "model dependencies would cause spaCy to be downloaded, which "
- "probably isn't what you want. If the model package has other "
+ "package dependencies would cause spaCy to be downloaded, which "
+ "probably isn't what you want. If the pipeline package has other "
"dependencies, you'll have to install them manually."
)
pip_args = pip_args + ("--no-deps",)
@@ -39,97 +50,59 @@ def download(model, direct=False, *pip_args):
components = model.split("-")
model_name = "".join(components[:-1])
version = components[-1]
- dl = download_model(dl_tpl.format(m=model_name, v=version), pip_args)
+ download_model(dl_tpl.format(m=model_name, v=version), pip_args)
else:
- shortcuts = get_json(about.__shortcuts__, "available shortcuts")
- model_name = shortcuts.get(model, model)
+ model_name = model
+ if model in OLD_MODEL_SHORTCUTS:
+ msg.warn(
+ f"As of spaCy v3.0, shortcuts like '{model}' are deprecated. Please"
+ f"use the full pipeline package name '{OLD_MODEL_SHORTCUTS[model]}' instead."
+ )
+ model_name = OLD_MODEL_SHORTCUTS[model]
compatibility = get_compatibility()
version = get_version(model_name, compatibility)
- dl = download_model(dl_tpl.format(m=model_name, v=version), pip_args)
- if dl != 0: # if download subprocess doesn't return 0, exit
- sys.exit(dl)
- msg.good(
- "Download and installation successful",
- "You can now load the model via spacy.load('{}')".format(model_name),
- )
- # Only create symlink if the model is installed via a shortcut like 'en'.
- # There's no real advantage over an additional symlink for en_core_web_sm
- # and if anything, it's more error prone and causes more confusion.
- if model in shortcuts:
- try:
- # Get package path here because link uses
- # pip.get_installed_distributions() to check if model is a
- # package, which fails if model was just installed via
- # subprocess
- package_path = get_package_path(model_name)
- link(model_name, model, force=True, model_path=package_path)
- except: # noqa: E722
- # Dirty, but since spacy.download and the auto-linking is
- # mostly a convenience wrapper, it's best to show a success
- # message and loading instructions, even if linking fails.
- msg.warn(
- "Download successful but linking failed",
- "Creating a shortcut link for '{}' didn't work (maybe you "
- "don't have admin permissions?), but you can still load "
- "the model via its full package name: "
- "nlp = spacy.load('{}')".format(model, model_name),
- )
- # If a model is downloaded and then loaded within the same process, our
- # is_package check currently fails, because pkg_resources.working_set
- # is not refreshed automatically (see #3923). We're trying to work
- # around this here be requiring the package explicitly.
- require_package(model_name)
+ download_model(dl_tpl.format(m=model_name, v=version), pip_args)
+ msg.good(
+ "Download and installation successful",
+ f"You can now load the package via spacy.load('{model_name}')",
+ )
-def require_package(name):
- try:
- import pkg_resources
-
- pkg_resources.working_set.require(name)
- return True
- except: # noqa: E722
- return False
-
-
-def get_json(url, desc):
- r = requests.get(url)
+def get_compatibility() -> dict:
+ version = get_base_version(about.__version__)
+ r = requests.get(about.__compatibility__)
if r.status_code != 200:
msg.fail(
- "Server error ({})".format(r.status_code),
- "Couldn't fetch {}. Please find a model for your spaCy "
- "installation (v{}), and download it manually. For more "
- "details, see the documentation: "
- "https://spacy.io/usage/models".format(desc, about.__version__),
+ f"Server error ({r.status_code})",
+ f"Couldn't fetch compatibility table. Please find a package for your spaCy "
+ f"installation (v{about.__version__}), and download it manually. "
+ f"For more details, see the documentation: "
+ f"https://nightly.spacy.io/usage/models",
exits=1,
)
- return r.json()
-
-
-def get_compatibility():
- version = about.__version__
- version = version.rsplit(".dev", 1)[0]
- comp_table = get_json(about.__compatibility__, "compatibility table")
+ comp_table = r.json()
comp = comp_table["spacy"]
if version not in comp:
- msg.fail("No compatible models found for v{} of spaCy".format(version), exits=1)
+ msg.fail(f"No compatible packages found for v{version} of spaCy", exits=1)
return comp[version]
-def get_version(model, comp):
- model = model.rsplit(".dev", 1)[0]
+def get_version(model: str, comp: dict) -> str:
+ model = get_base_version(model)
if model not in comp:
msg.fail(
- "No compatible model found for '{}' "
- "(spaCy v{}).".format(model, about.__version__),
+ f"No compatible package found for '{model}' (spaCy v{about.__version__})",
exits=1,
)
return comp[model][0]
-def download_model(filename, user_pip_args=None):
+def download_model(
+ filename: str, user_pip_args: Optional[Sequence[str]] = None
+) -> None:
download_url = about.__download_url__ + "/" + filename
pip_args = ["--no-cache-dir"]
if user_pip_args:
pip_args.extend(user_pip_args)
cmd = [sys.executable, "-m", "pip", "install"] + pip_args + [download_url]
- return subprocess.call(cmd, env=os.environ.copy())
+ run_command(cmd)
diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py
index be994de73..c5cbab09a 100644
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@@ -1,76 +1,124 @@
-# coding: utf8
-from __future__ import unicode_literals, division, print_function
+from typing import Optional, List, Dict
+from wasabi import Printer
+from pathlib import Path
+import re
+import srsly
+from thinc.api import require_gpu, fix_random_seed
-import plac
-from timeit import default_timer as timer
-from wasabi import msg
-
-from ..gold import GoldCorpus
+from ..gold import Corpus
+from ..tokens import Doc
+from ._util import app, Arg, Opt
+from ..scorer import Scorer
from .. import util
from .. import displacy
-@plac.annotations(
- model=("Model name or path", "positional", None, str),
- data_path=("Location of JSON-formatted evaluation data", "positional", None, str),
- gold_preproc=("Use gold preprocessing", "flag", "G", bool),
- gpu_id=("Use GPU", "option", "g", int),
- displacy_path=("Directory to output rendered parses as HTML", "option", "dp", str),
- displacy_limit=("Limit of parses to render as HTML", "option", "dl", int),
- return_scores=("Return dict containing model scores", "flag", "R", bool),
-)
-def evaluate(
- model,
- data_path,
- gpu_id=-1,
- gold_preproc=False,
- displacy_path=None,
- displacy_limit=25,
- return_scores=False,
+@app.command("evaluate")
+def evaluate_cli(
+ # fmt: off
+ model: str = Arg(..., help="Model name or path"),
+ data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True),
+ output: Optional[Path] = Opt(None, "--output", "-o", help="Output JSON file for metrics", dir_okay=False),
+ use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
+ gold_preproc: bool = Opt(False, "--gold-preproc", "-G", help="Use gold preprocessing"),
+ displacy_path: Optional[Path] = Opt(None, "--displacy-path", "-dp", help="Directory to output rendered parses as HTML", exists=True, file_okay=False),
+ displacy_limit: int = Opt(25, "--displacy-limit", "-dl", help="Limit of parses to render as HTML"),
+ # fmt: on
):
"""
- Evaluate a model. To render a sample of parses in a HTML file, set an
- output directory as the displacy_path argument.
+ Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation
+ data in the binary .spacy format. The --gold-preproc option sets up the
+ evaluation examples with gold-standard sentences and tokens for the
+ predictions. Gold preprocessing helps the annotations align to the
+ tokenization, and may result in sequences of more consistent length. However,
+ it may reduce runtime accuracy due to train/test skew. To render a sample of
+ dependency parses in a HTML file, set as output directory as the
+ displacy_path argument.
+
+ DOCS: https://nightly.spacy.io/api/cli#evaluate
"""
- util.fix_random_seed()
- if gpu_id >= 0:
- util.use_gpu(gpu_id)
- util.set_env_log(False)
+ evaluate(
+ model,
+ data_path,
+ output=output,
+ use_gpu=use_gpu,
+ gold_preproc=gold_preproc,
+ displacy_path=displacy_path,
+ displacy_limit=displacy_limit,
+ silent=False,
+ )
+
+
+def evaluate(
+ model: str,
+ data_path: Path,
+ output: Optional[Path] = None,
+ use_gpu: int = -1,
+ gold_preproc: bool = False,
+ displacy_path: Optional[Path] = None,
+ displacy_limit: int = 25,
+ silent: bool = True,
+) -> Scorer:
+ msg = Printer(no_print=silent, pretty=not silent)
+ fix_random_seed()
+ if use_gpu >= 0:
+ require_gpu(use_gpu)
data_path = util.ensure_path(data_path)
+ output_path = util.ensure_path(output)
displacy_path = util.ensure_path(displacy_path)
if not data_path.exists():
msg.fail("Evaluation data not found", data_path, exits=1)
if displacy_path and not displacy_path.exists():
msg.fail("Visualization output directory not found", displacy_path, exits=1)
- corpus = GoldCorpus(data_path, data_path)
- if model.startswith("blank:"):
- nlp = util.get_lang_class(model.replace("blank:", ""))()
- else:
- nlp = util.load_model(model)
- dev_docs = list(corpus.dev_docs(nlp, gold_preproc=gold_preproc))
- begin = timer()
- scorer = nlp.evaluate(dev_docs, verbose=False)
- end = timer()
- nwords = sum(len(doc_gold[0]) for doc_gold in dev_docs)
- results = {
- "Time": "%.2f s" % (end - begin),
- "Words": nwords,
- "Words/s": "%.0f" % (nwords / (end - begin)),
- "TOK": "%.2f" % scorer.token_acc,
- "POS": "%.2f" % scorer.tags_acc,
- "UAS": "%.2f" % scorer.uas,
- "LAS": "%.2f" % scorer.las,
- "NER P": "%.2f" % scorer.ents_p,
- "NER R": "%.2f" % scorer.ents_r,
- "NER F": "%.2f" % scorer.ents_f,
- "Textcat": "%.2f" % scorer.textcat_score,
+ corpus = Corpus(data_path, gold_preproc=gold_preproc)
+ nlp = util.load_model(model)
+ dev_dataset = list(corpus(nlp))
+ scores = nlp.evaluate(dev_dataset)
+ metrics = {
+ "TOK": "token_acc",
+ "TAG": "tag_acc",
+ "POS": "pos_acc",
+ "MORPH": "morph_acc",
+ "LEMMA": "lemma_acc",
+ "UAS": "dep_uas",
+ "LAS": "dep_las",
+ "NER P": "ents_p",
+ "NER R": "ents_r",
+ "NER F": "ents_f",
+ "TEXTCAT": "cats_score",
+ "SENT P": "sents_p",
+ "SENT R": "sents_r",
+ "SENT F": "sents_f",
+ "SPEED": "speed",
}
+ results = {}
+ for metric, key in metrics.items():
+ if key in scores:
+ if key == "cats_score":
+ metric = metric + " (" + scores.get("cats_score_desc", "unk") + ")"
+ if key == "speed":
+ results[metric] = f"{scores[key]:.0f}"
+ else:
+ results[metric] = f"{scores[key]*100:.2f}"
+ data = {re.sub(r"[\s/]", "_", k.lower()): v for k, v in results.items()}
+
msg.table(results, title="Results")
+ if "ents_per_type" in scores:
+ if scores["ents_per_type"]:
+ print_ents_per_type(msg, scores["ents_per_type"])
+ if "cats_f_per_type" in scores:
+ if scores["cats_f_per_type"]:
+ print_textcats_f_per_cat(msg, scores["cats_f_per_type"])
+ if "cats_auc_per_type" in scores:
+ if scores["cats_auc_per_type"]:
+ print_textcats_auc_per_cat(msg, scores["cats_auc_per_type"])
+
if displacy_path:
- docs, golds = zip(*dev_docs)
- render_deps = "parser" in nlp.meta.get("pipeline", [])
- render_ents = "ner" in nlp.meta.get("pipeline", [])
+ factory_names = [nlp.get_pipe_meta(pipe).factory for pipe in nlp.pipe_names]
+ docs = [ex.predicted for ex in dev_dataset]
+ render_deps = "parser" in factory_names
+ render_ents = "ner" in factory_names
render_parses(
docs,
displacy_path,
@@ -79,12 +127,22 @@ def evaluate(
deps=render_deps,
ents=render_ents,
)
- msg.good("Generated {} parses as HTML".format(displacy_limit), displacy_path)
- if return_scores:
- return scorer.scores
+ msg.good(f"Generated {displacy_limit} parses as HTML", displacy_path)
+
+ if output_path is not None:
+ srsly.write_json(output_path, data)
+ msg.good(f"Saved results to {output_path}")
+ return data
-def render_parses(docs, output_path, model_name="", limit=250, deps=True, ents=True):
+def render_parses(
+ docs: List[Doc],
+ output_path: Path,
+ model_name: str = "",
+ limit: int = 250,
+ deps: bool = True,
+ ents: bool = True,
+):
docs[0].user_data["title"] = model_name
if ents:
html = displacy.render(docs[:limit], style="ent", page=True)
@@ -96,3 +154,40 @@ def render_parses(docs, output_path, model_name="", limit=250, deps=True, ents=T
)
with (output_path / "parses.html").open("w", encoding="utf8") as file_:
file_.write(html)
+
+
+def print_ents_per_type(msg: Printer, scores: Dict[str, Dict[str, float]]) -> None:
+ data = [
+ (k, f"{v['p']*100:.2f}", f"{v['r']*100:.2f}", f"{v['f']*100:.2f}")
+ for k, v in scores.items()
+ ]
+ msg.table(
+ data,
+ header=("", "P", "R", "F"),
+ aligns=("l", "r", "r", "r"),
+ title="NER (per type)",
+ )
+
+
+def print_textcats_f_per_cat(msg: Printer, scores: Dict[str, Dict[str, float]]) -> None:
+ data = [
+ (k, f"{v['p']*100:.2f}", f"{v['r']*100:.2f}", f"{v['f']*100:.2f}")
+ for k, v in scores.items()
+ ]
+ msg.table(
+ data,
+ header=("", "P", "R", "F"),
+ aligns=("l", "r", "r", "r"),
+ title="Textcat F (per label)",
+ )
+
+
+def print_textcats_auc_per_cat(
+ msg: Printer, scores: Dict[str, Dict[str, float]]
+) -> None:
+ msg.table(
+ [(k, f"{v:.2f}") for k, v in scores.items()],
+ header=("", "ROC AUC"),
+ aligns=("l", "r"),
+ title="Textcat ROC AUC (per label)",
+ )
diff --git a/spacy/cli/info.py b/spacy/cli/info.py
index 080d0dc77..2b87163c2 100644
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@@ -1,92 +1,113 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-import plac
+from typing import Optional, Dict, Any, Union
import platform
from pathlib import Path
-from wasabi import msg
+from wasabi import Printer, MarkdownRenderer
import srsly
-from ..compat import path2str, basestring_, unicode_
+from ._util import app, Arg, Opt
from .. import util
from .. import about
-@plac.annotations(
- model=("Optional shortcut link of model", "positional", None, str),
- markdown=("Generate Markdown for GitHub issues", "flag", "md", str),
- silent=("Don't print anything (just return)", "flag", "s"),
-)
-def info(model=None, markdown=False, silent=False):
+@app.command("info")
+def info_cli(
+ # fmt: off
+ model: Optional[str] = Arg(None, help="Optional loadable spaCy pipeline"),
+ markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues"),
+ silent: bool = Opt(False, "--silent", "-s", "-S", help="Don't print anything (just return)"),
+ # fmt: on
+):
"""
- Print info about spaCy installation. If a model shortcut link is
- speficied as an argument, print model information. Flag --markdown
- prints details in Markdown for easy copy-pasting to GitHub issues.
+ Print info about spaCy installation. If a pipeline is speficied as an argument,
+ print its meta information. Flag --markdown prints details in Markdown for easy
+ copy-pasting to GitHub issues.
+
+ DOCS: https://nightly.spacy.io/api/cli#info
"""
+ info(model, markdown=markdown, silent=silent)
+
+
+def info(
+ model: Optional[str] = None, *, markdown: bool = False, silent: bool = True
+) -> Union[str, dict]:
+ msg = Printer(no_print=silent, pretty=not silent)
if model:
- if util.is_package(model):
- model_path = util.get_package_path(model)
- else:
- model_path = util.get_data_path() / model
- meta_path = model_path / "meta.json"
- if not meta_path.is_file():
- msg.fail("Can't find model meta.json", meta_path, exits=1)
- meta = srsly.read_json(meta_path)
- if model_path.resolve() != model_path:
- meta["link"] = path2str(model_path)
- meta["source"] = path2str(model_path.resolve())
- else:
- meta["source"] = path2str(model_path)
+ title = f"Info about pipeline '{model}'"
+ data = info_model(model, silent=silent)
+ else:
+ title = "Info about spaCy"
+ data = info_spacy()
+ raw_data = {k.lower().replace(" ", "_"): v for k, v in data.items()}
+ if "Pipelines" in data and isinstance(data["Pipelines"], dict):
+ data["Pipelines"] = ", ".join(
+ f"{n} ({v})" for n, v in data["Pipelines"].items()
+ )
+ markdown_data = get_markdown(data, title=title)
+ if markdown:
if not silent:
- title = "Info about model '{}'".format(model)
- model_meta = {
- k: v for k, v in meta.items() if k not in ("accuracy", "speed")
- }
- if markdown:
- print_markdown(model_meta, title=title)
- else:
- msg.table(model_meta, title=title)
- return meta
- data = {
+ print(markdown_data)
+ return markdown_data
+ if not silent:
+ table_data = dict(data)
+ msg.table(table_data, title=title)
+ return raw_data
+
+
+def info_spacy() -> Dict[str, any]:
+ """Generate info about the current spaCy intallation.
+
+ RETURNS (dict): The spaCy info.
+ """
+ all_models = {}
+ for pkg_name in util.get_installed_models():
+ package = pkg_name.replace("-", "_")
+ all_models[package] = util.get_package_version(pkg_name)
+ return {
"spaCy version": about.__version__,
- "Location": path2str(Path(__file__).parent.parent),
+ "Location": str(Path(__file__).parent.parent),
"Platform": platform.platform(),
"Python version": platform.python_version(),
- "Models": list_models(),
+ "Pipelines": all_models,
}
- if not silent:
- title = "Info about spaCy"
- if markdown:
- print_markdown(data, title=title)
- else:
- msg.table(data, title=title)
- return data
-def list_models():
- def exclude_dir(dir_name):
- # exclude common cache directories and hidden directories
- exclude = ("cache", "pycache", "__pycache__")
- return dir_name in exclude or dir_name.startswith(".")
+def info_model(model: str, *, silent: bool = True) -> Dict[str, Any]:
+ """Generate info about a specific model.
- data_path = util.get_data_path()
- if data_path:
- models = [f.parts[-1] for f in data_path.iterdir() if f.is_dir()]
- return ", ".join([m for m in models if not exclude_dir(m)])
- return "-"
+ model (str): Model name of path.
+ silent (bool): Don't print anything, just return.
+ RETURNS (dict): The model meta.
+ """
+ msg = Printer(no_print=silent, pretty=not silent)
+ if util.is_package(model):
+ model_path = util.get_package_path(model)
+ else:
+ model_path = model
+ meta_path = model_path / "meta.json"
+ if not meta_path.is_file():
+ msg.fail("Can't find pipeline meta.json", meta_path, exits=1)
+ meta = srsly.read_json(meta_path)
+ if model_path.resolve() != model_path:
+ meta["source"] = str(model_path.resolve())
+ else:
+ meta["source"] = str(model_path)
+ return {k: v for k, v in meta.items() if k not in ("accuracy", "speed")}
-def print_markdown(data, title=None):
- """Print data in GitHub-flavoured Markdown format for issues etc.
+def get_markdown(data: Dict[str, Any], title: Optional[str] = None) -> str:
+ """Get data in GitHub-flavoured Markdown format for issues etc.
data (dict or list of tuples): Label/value pairs.
- title (unicode or None): Title, will be rendered as headline 2.
+ title (str / None): Title, will be rendered as headline 2.
+ RETURNS (str): The Markdown string.
"""
- markdown = []
- for key, value in data.items():
- if isinstance(value, basestring_) and Path(value).exists():
- continue
- markdown.append("* **{}:** {}".format(key, unicode_(value)))
+ md = MarkdownRenderer()
if title:
- print("\n## {}".format(title))
- print("\n{}\n".format("\n".join(markdown)))
+ md.add(md.title(2, title))
+ items = []
+ for key, value in data.items():
+ if isinstance(value, str) and Path(value).exists():
+ continue
+ items.append(f"{md.bold(f'{key}:')} {value}")
+ md.add(md.list(items))
+ return f"\n{md.text}\n"
diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py
new file mode 100644
index 000000000..584ca7f64
--- /dev/null
+++ b/spacy/cli/init_config.py
@@ -0,0 +1,199 @@
+from typing import Optional, List, Tuple
+from enum import Enum
+from pathlib import Path
+from wasabi import Printer, diff_strings
+from thinc.api import Config
+import srsly
+import re
+
+from .. import util
+from ..language import DEFAULT_CONFIG_PRETRAIN_PATH
+from ..schemas import RecommendationSchema
+from ._util import init_cli, Arg, Opt, show_validation_error, COMMAND
+
+
+ROOT = Path(__file__).parent / "templates"
+TEMPLATE_PATH = ROOT / "quickstart_training.jinja"
+RECOMMENDATIONS = srsly.read_yaml(ROOT / "quickstart_training_recommendations.yml")
+
+
+class Optimizations(str, Enum):
+ efficiency = "efficiency"
+ accuracy = "accuracy"
+
+
+@init_cli.command("config")
+def init_config_cli(
+ # fmt: off
+ output_file: Path = Arg(..., help="File to save config.cfg to or - for stdout (will only output config and no additional logging info)", allow_dash=True),
+ lang: Optional[str] = Opt("en", "--lang", "-l", help="Two-letter code of the language to use"),
+ pipeline: Optional[str] = Opt("tagger,parser,ner", "--pipeline", "-p", help="Comma-separated names of trainable pipeline components to include (without 'tok2vec' or 'transformer')"),
+ optimize: Optimizations = Opt(Optimizations.efficiency.value, "--optimize", "-o", help="Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters."),
+ cpu: bool = Opt(False, "--cpu", "-C", help="Whether the model needs to run on CPU. This will impact the choice of architecture, pretrained weights and related hyperparameters."),
+ # fmt: on
+):
+ """
+ Generate a starter config.cfg for training. Based on your requirements
+ specified via the CLI arguments, this command generates a config with the
+ optimal settings for you use case. This includes the choice of architecture,
+ pretrained weights and related hyperparameters.
+
+ DOCS: https://nightly.spacy.io/api/cli#init-config
+ """
+ if isinstance(optimize, Optimizations): # instance of enum from the CLI
+ optimize = optimize.value
+ pipeline = [p.strip() for p in pipeline.split(",")]
+ init_config(output_file, lang=lang, pipeline=pipeline, optimize=optimize, cpu=cpu)
+
+
+@init_cli.command("fill-config")
+def init_fill_config_cli(
+ # fmt: off
+ base_path: Path = Arg(..., help="Base config to fill", exists=True, dir_okay=False),
+ output_file: Path = Arg("-", help="File to save config.cfg to (or - for stdout)", allow_dash=True),
+ pretraining: bool = Opt(False, "--pretraining", "-p", help="Include config for pretraining (with 'spacy pretrain')"),
+ diff: bool = Opt(False, "--diff", "-D", help="Print a visual diff highlighting the changes")
+ # fmt: on
+):
+ """
+ Fill partial config.cfg with default values. Will add all missing settings
+ from the default config and will create all objects, check the registered
+ functions for their default values and update the base config. This command
+ can be used with a config generated via the training quickstart widget:
+ https://nightly.spacy.io/usage/training#quickstart
+
+ DOCS: https://nightly.spacy.io/api/cli#init-fill-config
+ """
+ fill_config(output_file, base_path, pretraining=pretraining, diff=diff)
+
+
+def fill_config(
+ output_file: Path,
+ base_path: Path,
+ *,
+ pretraining: bool = False,
+ diff: bool = False,
+ silent: bool = False,
+) -> Tuple[Config, Config]:
+ is_stdout = str(output_file) == "-"
+ no_print = is_stdout or silent
+ msg = Printer(no_print=no_print)
+ with show_validation_error(hint_fill=False):
+ config = util.load_config(base_path)
+ nlp, _ = util.load_model_from_config(config, auto_fill=True, validate=False)
+ # Load a second time with validation to be extra sure that the produced
+ # config result is a valid config
+ nlp, _ = util.load_model_from_config(nlp.config)
+ filled = nlp.config
+ if pretraining:
+ validate_config_for_pretrain(filled, msg)
+ pretrain_config = util.load_config(DEFAULT_CONFIG_PRETRAIN_PATH)
+ filled = pretrain_config.merge(filled)
+ before = config.to_str()
+ after = filled.to_str()
+ if before == after:
+ msg.warn("Nothing to auto-fill: base config is already complete")
+ else:
+ msg.good("Auto-filled config with all values")
+ if diff and not no_print:
+ if before == after:
+ msg.warn("No diff to show: nothing was auto-filled")
+ else:
+ msg.divider("START CONFIG DIFF")
+ print("")
+ print(diff_strings(before, after))
+ msg.divider("END CONFIG DIFF")
+ print("")
+ save_config(filled, output_file, is_stdout=is_stdout, silent=silent)
+ return config, filled
+
+
+def init_config(
+ output_file: Path, *, lang: str, pipeline: List[str], optimize: str, cpu: bool
+) -> None:
+ is_stdout = str(output_file) == "-"
+ msg = Printer(no_print=is_stdout)
+ try:
+ from jinja2 import Template
+ except ImportError:
+ msg.fail("This command requires jinja2", "pip install jinja2", exits=1)
+ with TEMPLATE_PATH.open("r") as f:
+ template = Template(f.read())
+ # Filter out duplicates since tok2vec and transformer are added by template
+ pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")]
+ reco = RecommendationSchema(**RECOMMENDATIONS.get(lang, {})).dict()
+ variables = {
+ "lang": lang,
+ "components": pipeline,
+ "optimize": optimize,
+ "hardware": "cpu" if cpu else "gpu",
+ "transformer_data": reco["transformer"],
+ "word_vectors": reco["word_vectors"],
+ "has_letters": reco["has_letters"],
+ }
+ if variables["transformer_data"] and not has_spacy_transformers():
+ msg.warn(
+ "To generate a more effective transformer-based config (GPU-only), "
+ "install the spacy-transformers package and re-run this command. "
+ "The config generated now does not use transformers."
+ )
+ variables["transformer_data"] = None
+ base_template = template.render(variables).strip()
+ # Giving up on getting the newlines right in jinja for now
+ base_template = re.sub(r"\n\n\n+", "\n\n", base_template)
+ # Access variables declared in templates
+ template_vars = template.make_module(variables)
+ use_case = {
+ "Language": lang,
+ "Pipeline": ", ".join(pipeline),
+ "Optimize for": optimize,
+ "Hardware": variables["hardware"].upper(),
+ "Transformer": template_vars.transformer.get("name", False),
+ }
+ msg.info("Generated template specific for your use case")
+ for label, value in use_case.items():
+ msg.text(f"- {label}: {value}")
+ with show_validation_error(hint_fill=False):
+ config = util.load_config_from_str(base_template)
+ nlp, _ = util.load_model_from_config(config, auto_fill=True)
+ msg.good("Auto-filled config with all values")
+ save_config(nlp.config, output_file, is_stdout=is_stdout)
+
+
+def save_config(
+ config: Config, output_file: Path, is_stdout: bool = False, silent: bool = False
+) -> None:
+ no_print = is_stdout or silent
+ msg = Printer(no_print=no_print)
+ if is_stdout:
+ print(config.to_str())
+ else:
+ if not output_file.parent.exists():
+ output_file.parent.mkdir(parents=True)
+ config.to_disk(output_file, interpolate=False)
+ msg.good("Saved config", output_file)
+ msg.text("You can now add your data and train your pipeline:")
+ variables = ["--paths.train ./train.spacy", "--paths.dev ./dev.spacy"]
+ if not no_print:
+ print(f"{COMMAND} train {output_file.parts[-1]} {' '.join(variables)}")
+
+
+def has_spacy_transformers() -> bool:
+ try:
+ import spacy_transformers # noqa: F401
+
+ return True
+ except ImportError:
+ return False
+
+
+def validate_config_for_pretrain(config: Config, msg: Printer) -> None:
+ if "tok2vec" not in config["nlp"]["pipeline"]:
+ msg.warn(
+ "No tok2vec component found in the pipeline. If your tok2vec "
+ "component has a different name, you may need to adjust the "
+ "tok2vec_model reference in the [pretraining] block. If you don't "
+ "have a tok2vec component, make sure to add it to your [components] "
+ "and the pipeline specified in the [nlp] block, so you can pretrain "
+ "weights for it."
+ )
diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py
index 7fdd39932..5f06fd895 100644
--- a/spacy/cli/init_model.py
+++ b/spacy/cli/init_model.py
@@ -1,7 +1,4 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-import plac
+from typing import Optional, List, Dict, Any, Union, IO
import math
from tqdm import tqdm
import numpy
@@ -13,13 +10,14 @@ import gzip
import zipfile
import srsly
import warnings
-from wasabi import msg
+from wasabi import msg, Printer
+import typer
+from ._util import app, init_cli, Arg, Opt
from ..vectors import Vectors
from ..errors import Errors, Warnings
+from ..language import Language
from ..util import ensure_path, get_lang_class, load_model, OOV_RANK
-from ..lookups import Lookups
-
try:
import ftfy
@@ -30,49 +28,72 @@ except ImportError:
DEFAULT_OOV_PROB = -20
-@plac.annotations(
- lang=("Model language", "positional", None, str),
- output_dir=("Model output directory", "positional", None, Path),
- freqs_loc=("Location of words frequencies file", "option", "f", Path),
- jsonl_loc=("Location of JSONL-formatted attributes file", "option", "j", Path),
- clusters_loc=("Optional location of brown clusters data", "option", "c", str),
- vectors_loc=("Optional vectors file in Word2Vec format", "option", "v", str),
- truncate_vectors=(
- "Optional number of vectors to truncate to when reading in vectors file",
- "option",
- "t",
- int,
- ),
- prune_vectors=("Optional number of vectors to prune to", "option", "V", int),
- vectors_name=(
- "Optional name for the word vectors, e.g. en_core_web_lg.vectors",
- "option",
- "vn",
- str,
- ),
- model_name=("Optional name for the model meta", "option", "mn", str),
- omit_extra_lookups=("Don't include extra lookups in model", "flag", "OEL", bool),
- base_model=("Base model (for languages with custom tokenizers)", "option", "b", str),
+@init_cli.command("vocab")
+@app.command(
+ "init-model",
+ context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+ hidden=True, # hide this from main CLI help but still allow it to work with warning
)
-def init_model(
- lang,
- output_dir,
- freqs_loc=None,
- clusters_loc=None,
- jsonl_loc=None,
- vectors_loc=None,
- truncate_vectors=0,
- prune_vectors=-1,
- vectors_name=None,
- model_name=None,
- omit_extra_lookups=False,
- base_model=None,
+def init_model_cli(
+ # fmt: off
+ ctx: typer.Context, # This is only used to read additional arguments
+ lang: str = Arg(..., help="Pipeline language"),
+ output_dir: Path = Arg(..., help="Pipeline output directory"),
+ freqs_loc: Optional[Path] = Arg(None, help="Location of words frequencies file", exists=True),
+ clusters_loc: Optional[Path] = Opt(None, "--clusters-loc", "-c", help="Optional location of brown clusters data", exists=True),
+ jsonl_loc: Optional[Path] = Opt(None, "--jsonl-loc", "-j", help="Location of JSONL-formatted attributes file", exists=True),
+ vectors_loc: Optional[Path] = Opt(None, "--vectors-loc", "-v", help="Optional vectors file in Word2Vec format", exists=True),
+ prune_vectors: int = Opt(-1, "--prune-vectors", "-V", help="Optional number of vectors to prune to"),
+ truncate_vectors: int = Opt(0, "--truncate-vectors", "-t", help="Optional number of vectors to truncate to when reading in vectors file"),
+ vectors_name: Optional[str] = Opt(None, "--vectors-name", "-vn", help="Optional name for the word vectors, e.g. en_core_web_lg.vectors"),
+ model_name: Optional[str] = Opt(None, "--meta-name", "-mn", help="Optional name of the package for the pipeline meta"),
+ base_model: Optional[str] = Opt(None, "--base", "-b", help="Name of or path to base pipeline to start with (mostly relevant for pipelines with custom tokenizers)")
+ # fmt: on
):
"""
- Create a new model from raw data, like word frequencies, Brown clusters
- and word vectors. If vectors are provided in Word2Vec format, they can
- be either a .txt or zipped as a .zip or .tar.gz.
+ Create a new blank pipeline directory with vocab and vectors from raw data.
+ If vectors are provided in Word2Vec format, they can be either a .txt or
+ zipped as a .zip or .tar.gz.
+
+ DOCS: https://nightly.spacy.io/api/cli#init-vocab
"""
+ if ctx.command.name == "init-model":
+ msg.warn(
+ "The init-model command is now called 'init vocab'. You can run "
+ "'python -m spacy init --help' for an overview of the other "
+ "available initialization commands."
+ )
+ init_model(
+ lang,
+ output_dir,
+ freqs_loc=freqs_loc,
+ clusters_loc=clusters_loc,
+ jsonl_loc=jsonl_loc,
+ vectors_loc=vectors_loc,
+ prune_vectors=prune_vectors,
+ truncate_vectors=truncate_vectors,
+ vectors_name=vectors_name,
+ model_name=model_name,
+ base_model=base_model,
+ silent=False,
+ )
+
+
+def init_model(
+ lang: str,
+ output_dir: Path,
+ freqs_loc: Optional[Path] = None,
+ clusters_loc: Optional[Path] = None,
+ jsonl_loc: Optional[Path] = None,
+ vectors_loc: Optional[Path] = None,
+ prune_vectors: int = -1,
+ truncate_vectors: int = 0,
+ vectors_name: Optional[str] = None,
+ model_name: Optional[str] = None,
+ base_model: Optional[str] = None,
+ silent: bool = True,
+) -> Language:
+ msg = Printer(no_print=silent, pretty=not silent)
if jsonl_loc is not None:
if freqs_loc is not None or clusters_loc is not None:
settings = ["-j"]
@@ -95,27 +116,20 @@ def init_model(
freqs_loc = ensure_path(freqs_loc)
if freqs_loc is not None and not freqs_loc.exists():
msg.fail("Can't find words frequencies file", freqs_loc, exits=1)
- lex_attrs = read_attrs_from_deprecated(freqs_loc, clusters_loc)
+ lex_attrs = read_attrs_from_deprecated(msg, freqs_loc, clusters_loc)
- with msg.loading("Creating model..."):
+ with msg.loading("Creating blank pipeline..."):
nlp = create_model(lang, lex_attrs, name=model_name, base_model=base_model)
- # Create empty extra lexeme tables so the data from spacy-lookups-data
- # isn't loaded if these features are accessed
- if omit_extra_lookups:
- nlp.vocab.lookups_extra = Lookups()
- nlp.vocab.lookups_extra.add_table("lexeme_cluster")
- nlp.vocab.lookups_extra.add_table("lexeme_prob")
- nlp.vocab.lookups_extra.add_table("lexeme_settings")
-
- msg.good("Successfully created model")
+ msg.good("Successfully created blank pipeline")
if vectors_loc is not None:
- add_vectors(nlp, vectors_loc, truncate_vectors, prune_vectors, vectors_name)
+ add_vectors(
+ msg, nlp, vectors_loc, truncate_vectors, prune_vectors, vectors_name
+ )
vec_added = len(nlp.vocab.vectors)
lex_added = len(nlp.vocab)
msg.good(
- "Sucessfully compiled vocab",
- "{} entries, {} vectors".format(lex_added, vec_added),
+ "Sucessfully compiled vocab", f"{lex_added} entries, {vec_added} vectors",
)
if not output_dir.exists():
output_dir.mkdir()
@@ -123,7 +137,7 @@ def init_model(
return nlp
-def open_file(loc):
+def open_file(loc: Union[str, Path]) -> IO:
"""Handle .gz, .tar.gz or unzipped files"""
loc = ensure_path(loc)
if tarfile.is_tarfile(str(loc)):
@@ -139,7 +153,9 @@ def open_file(loc):
return loc.open("r", encoding="utf8")
-def read_attrs_from_deprecated(freqs_loc, clusters_loc):
+def read_attrs_from_deprecated(
+ msg: Printer, freqs_loc: Optional[Path], clusters_loc: Optional[Path]
+) -> List[Dict[str, Any]]:
if freqs_loc is not None:
with msg.loading("Counting frequencies..."):
probs, _ = read_freqs(freqs_loc)
@@ -167,7 +183,12 @@ def read_attrs_from_deprecated(freqs_loc, clusters_loc):
return lex_attrs
-def create_model(lang, lex_attrs, name=None, base_model=None):
+def create_model(
+ lang: str,
+ lex_attrs: List[Dict[str, Any]],
+ name: Optional[str] = None,
+ base_model: Optional[Union[str, Path]] = None,
+) -> Language:
if base_model:
nlp = load_model(base_model)
# keep the tokenizer but remove any existing pipeline components due to
@@ -194,7 +215,14 @@ def create_model(lang, lex_attrs, name=None, base_model=None):
return nlp
-def add_vectors(nlp, vectors_loc, truncate_vectors, prune_vectors, name=None):
+def add_vectors(
+ msg: Printer,
+ nlp: Language,
+ vectors_loc: Optional[Path],
+ truncate_vectors: int,
+ prune_vectors: int,
+ name: Optional[str] = None,
+) -> None:
vectors_loc = ensure_path(vectors_loc)
if vectors_loc and vectors_loc.parts[-1].endswith(".npz"):
nlp.vocab.vectors = Vectors(data=numpy.load(vectors_loc.open("rb")))
@@ -203,9 +231,11 @@ def add_vectors(nlp, vectors_loc, truncate_vectors, prune_vectors, name=None):
nlp.vocab.vectors.add(lex.orth, row=lex.rank)
else:
if vectors_loc:
- with msg.loading("Reading vectors from {}".format(vectors_loc)):
- vectors_data, vector_keys = read_vectors(vectors_loc, truncate_vectors)
- msg.good("Loaded vectors from {}".format(vectors_loc))
+ with msg.loading(f"Reading vectors from {vectors_loc}"):
+ vectors_data, vector_keys = read_vectors(
+ msg, vectors_loc, truncate_vectors
+ )
+ msg.good(f"Loaded vectors from {vectors_loc}")
else:
vectors_data, vector_keys = (None, None)
if vector_keys is not None:
@@ -215,7 +245,8 @@ def add_vectors(nlp, vectors_loc, truncate_vectors, prune_vectors, name=None):
if vectors_data is not None:
nlp.vocab.vectors = Vectors(data=vectors_data, keys=vector_keys)
if name is None:
- nlp.vocab.vectors.name = "%s_model.vectors" % nlp.meta["lang"]
+ # TODO: Is this correct? Does this matter?
+ nlp.vocab.vectors.name = f"{nlp.meta['lang']}_{nlp.meta['name']}.vectors"
else:
nlp.vocab.vectors.name = name
nlp.meta["vectors"]["name"] = nlp.vocab.vectors.name
@@ -223,7 +254,7 @@ def add_vectors(nlp, vectors_loc, truncate_vectors, prune_vectors, name=None):
nlp.vocab.prune_vectors(prune_vectors)
-def read_vectors(vectors_loc, truncate_vectors=0):
+def read_vectors(msg: Printer, vectors_loc: Path, truncate_vectors: int):
f = open_file(vectors_loc)
shape = tuple(int(size) for size in next(f).split())
if truncate_vectors >= 1:
@@ -243,7 +274,9 @@ def read_vectors(vectors_loc, truncate_vectors=0):
return vectors_data, vectors_keys
-def read_freqs(freqs_loc, max_length=100, min_doc_freq=5, min_freq=50):
+def read_freqs(
+ freqs_loc: Path, max_length: int = 100, min_doc_freq: int = 5, min_freq: int = 50
+):
counts = PreshCounter()
total = 0
with freqs_loc.open() as f:
@@ -265,14 +298,14 @@ def read_freqs(freqs_loc, max_length=100, min_doc_freq=5, min_freq=50):
word = literal_eval(key)
except SyntaxError:
# Take odd strings literally.
- word = literal_eval("'%s'" % key)
+ word = literal_eval(f"'{key}'")
smooth_count = counts.smoother(int(freq))
probs[word] = math.log(smooth_count) - log_total
oov_prob = math.log(counts.smoother(0)) - log_total
return probs, oov_prob
-def read_clusters(clusters_loc):
+def read_clusters(clusters_loc: Path) -> dict:
clusters = {}
if ftfy is None:
warnings.warn(Warnings.W004)
diff --git a/spacy/cli/link.py b/spacy/cli/link.py
deleted file mode 100644
index 8117829b5..000000000
--- a/spacy/cli/link.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-import plac
-from pathlib import Path
-from wasabi import msg
-
-from ..compat import symlink_to, path2str
-from .. import util
-
-
-@plac.annotations(
- origin=("package name or local path to model", "positional", None, str),
- link_name=("name of shortuct link to create", "positional", None, str),
- force=("force overwriting of existing link", "flag", "f", bool),
-)
-def link(origin, link_name, force=False, model_path=None):
- """
- Create a symlink for models within the spacy/data directory. Accepts
- either the name of a pip package, or the local path to the model data
- directory. Linking models allows loading them via spacy.load(link_name).
- """
- if util.is_package(origin):
- model_path = util.get_package_path(origin)
- else:
- model_path = Path(origin) if model_path is None else Path(model_path)
- if not model_path.exists():
- msg.fail(
- "Can't locate model data",
- "The data should be located in {}".format(path2str(model_path)),
- exits=1,
- )
- data_path = util.get_data_path()
- if not data_path or not data_path.exists():
- spacy_loc = Path(__file__).parent.parent
- msg.fail(
- "Can't find the spaCy data path to create model symlink",
- "Make sure a directory `/data` exists within your spaCy "
- "installation and try again. The data directory should be located "
- "here:".format(path=spacy_loc),
- exits=1,
- )
- link_path = util.get_data_path() / link_name
- if link_path.is_symlink() and not force:
- msg.fail(
- "Link '{}' already exists".format(link_name),
- "To overwrite an existing link, use the --force flag",
- exits=1,
- )
- elif link_path.is_symlink(): # does a symlink exist?
- # NB: It's important to check for is_symlink here and not for exists,
- # because invalid/outdated symlinks would return False otherwise.
- link_path.unlink()
- elif link_path.exists(): # does it exist otherwise?
- # NB: Check this last because valid symlinks also "exist".
- msg.fail(
- "Can't overwrite symlink '{}'".format(link_name),
- "This can happen if your data directory contains a directory or "
- "file of the same name.",
- exits=1,
- )
- details = "%s --> %s" % (path2str(model_path), path2str(link_path))
- try:
- symlink_to(link_path, model_path)
- except: # noqa: E722
- # This is quite dirty, but just making sure other errors are caught.
- msg.fail(
- "Couldn't link model to '{}'".format(link_name),
- "Creating a symlink in spacy/data failed. Make sure you have the "
- "required permissions and try re-running the command as admin, or "
- "use a virtualenv. You can still import the model as a module and "
- "call its load() method, or create the symlink manually.",
- )
- msg.text(details)
- raise
- msg.good("Linking successful", details)
- msg.text("You can now load the model via spacy.load('{}')".format(link_name))
diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index 8ed92259c..c457b3e17 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -1,126 +1,172 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-import plac
+from typing import Optional, Union, Any, Dict
import shutil
from pathlib import Path
-from wasabi import msg, get_raw_input
+from wasabi import Printer, get_raw_input
import srsly
+import sys
-from ..compat import path2str
+from ._util import app, Arg, Opt
+from ..schemas import validate, ModelMetaSchema
from .. import util
from .. import about
-@plac.annotations(
- input_dir=("Directory with model data", "positional", None, str),
- output_dir=("Output parent directory", "positional", None, str),
- meta_path=("Path to meta.json", "option", "m", str),
- create_meta=("Create meta.json, even if one exists", "flag", "c", bool),
- force=("Force overwriting existing model in output directory", "flag", "f", bool),
-)
-def package(input_dir, output_dir, meta_path=None, create_meta=False, force=False):
+@app.command("package")
+def package_cli(
+ # fmt: off
+ input_dir: Path = Arg(..., help="Directory with pipeline data", exists=True, file_okay=False),
+ output_dir: Path = Arg(..., help="Output parent directory", exists=True, file_okay=False),
+ meta_path: Optional[Path] = Opt(None, "--meta-path", "--meta", "-m", help="Path to meta.json", exists=True, dir_okay=False),
+ create_meta: bool = Opt(False, "--create-meta", "-c", "-C", help="Create meta.json, even if one exists"),
+ version: Optional[str] = Opt(None, "--version", "-v", help="Package version to override meta"),
+ no_sdist: bool = Opt(False, "--no-sdist", "-NS", help="Don't build .tar.gz sdist, can be set if you want to run this step manually"),
+ force: bool = Opt(False, "--force", "-f", "-F", help="Force overwriting existing data in output directory"),
+ # fmt: on
+):
"""
- Generate Python package for model data, including meta and required
- installation files. A new directory will be created in the specified
- output directory, and model data will be copied over. If --create-meta is
- set and a meta.json already exists in the output directory, the existing
- values will be used as the defaults in the command-line prompt.
+ Generate an installable Python package for a pipeline. Includes binary data,
+ meta and required installation files. A new directory will be created in the
+ specified output directory, and the data will be copied over. If
+ --create-meta is set and a meta.json already exists in the output directory,
+ the existing values will be used as the defaults in the command-line prompt.
+ After packaging, "python setup.py sdist" is run in the package directory,
+ which will create a .tar.gz archive that can be installed via "pip install".
+
+ DOCS: https://nightly.spacy.io/api/cli#package
"""
+ package(
+ input_dir,
+ output_dir,
+ meta_path=meta_path,
+ version=version,
+ create_meta=create_meta,
+ create_sdist=not no_sdist,
+ force=force,
+ silent=False,
+ )
+
+
+def package(
+ input_dir: Path,
+ output_dir: Path,
+ meta_path: Optional[Path] = None,
+ version: Optional[str] = None,
+ create_meta: bool = False,
+ create_sdist: bool = True,
+ force: bool = False,
+ silent: bool = True,
+) -> None:
+ msg = Printer(no_print=silent, pretty=not silent)
input_path = util.ensure_path(input_dir)
output_path = util.ensure_path(output_dir)
meta_path = util.ensure_path(meta_path)
if not input_path or not input_path.exists():
- msg.fail("Can't locate model data", input_path, exits=1)
+ msg.fail("Can't locate pipeline data", input_path, exits=1)
if not output_path or not output_path.exists():
msg.fail("Output directory not found", output_path, exits=1)
if meta_path and not meta_path.exists():
- msg.fail("Can't find model meta.json", meta_path, exits=1)
-
- meta_path = meta_path or input_path / "meta.json"
- if meta_path.is_file():
- meta = srsly.read_json(meta_path)
- if not create_meta: # only print if user doesn't want to overwrite
- msg.good("Loaded meta.json from file", meta_path)
- else:
- meta = generate_meta(input_dir, meta, msg)
- for key in ("lang", "name", "version"):
- if key not in meta or meta[key] == "":
- msg.fail(
- "No '{}' setting found in meta.json".format(key),
- "This setting is required to build your package.",
- exits=1,
- )
+ msg.fail("Can't find pipeline meta.json", meta_path, exits=1)
+ meta_path = meta_path or input_dir / "meta.json"
+ if not meta_path.exists() or not meta_path.is_file():
+ msg.fail("Can't load pipeline meta.json", meta_path, exits=1)
+ meta = srsly.read_json(meta_path)
+ meta = get_meta(input_dir, meta)
+ if version is not None:
+ meta["version"] = version
+ if not create_meta: # only print if user doesn't want to overwrite
+ msg.good("Loaded meta.json from file", meta_path)
+ else:
+ meta = generate_meta(meta, msg)
+ errors = validate(ModelMetaSchema, meta)
+ if errors:
+ msg.fail("Invalid pipeline meta.json")
+ print("\n".join(errors))
+ sys.exit(1)
model_name = meta["lang"] + "_" + meta["name"]
model_name_v = model_name + "-" + meta["version"]
- main_path = output_path / model_name_v
+ main_path = output_dir / model_name_v
package_path = main_path / model_name
-
if package_path.exists():
if force:
- shutil.rmtree(path2str(package_path))
+ shutil.rmtree(str(package_path))
else:
msg.fail(
"Package directory already exists",
"Please delete the directory and try again, or use the "
- "`--force` flag to overwrite existing "
- "directories.".format(path=path2str(package_path)),
+ "`--force` flag to overwrite existing directories.",
exits=1,
)
Path.mkdir(package_path, parents=True)
- shutil.copytree(path2str(input_path), path2str(package_path / model_name_v))
+ shutil.copytree(str(input_dir), str(package_path / model_name_v))
create_file(main_path / "meta.json", srsly.json_dumps(meta, indent=2))
create_file(main_path / "setup.py", TEMPLATE_SETUP)
create_file(main_path / "MANIFEST.in", TEMPLATE_MANIFEST)
create_file(package_path / "__init__.py", TEMPLATE_INIT)
- msg.good("Successfully created package '{}'".format(model_name_v), main_path)
- msg.text("To build the package, run `python setup.py sdist` in this directory.")
+ msg.good(f"Successfully created package '{model_name_v}'", main_path)
+ if create_sdist:
+ with util.working_dir(main_path):
+ util.run_command([sys.executable, "setup.py", "sdist"])
+ zip_file = main_path / "dist" / f"{model_name_v}.tar.gz"
+ msg.good(f"Successfully created zipped Python package", zip_file)
-def create_file(file_path, contents):
+def create_file(file_path: Path, contents: str) -> None:
file_path.touch()
file_path.open("w", encoding="utf-8").write(contents)
-def generate_meta(model_path, existing_meta, msg):
- meta = existing_meta or {}
- settings = [
- ("lang", "Model language", meta.get("lang", "en")),
- ("name", "Model name", meta.get("name", "model")),
- ("version", "Model version", meta.get("version", "0.0.0")),
- ("spacy_version", "Required spaCy version", ">=%s,<3.0.0" % about.__version__),
- ("description", "Model description", meta.get("description", False)),
- ("author", "Author", meta.get("author", False)),
- ("email", "Author email", meta.get("email", False)),
- ("url", "Author website", meta.get("url", False)),
- ("license", "License", meta.get("license", "CC BY-SA 3.0")),
- ]
+def get_meta(
+ model_path: Union[str, Path], existing_meta: Dict[str, Any]
+) -> Dict[str, Any]:
+ meta = {
+ "lang": "en",
+ "name": "pipeline",
+ "version": "0.0.0",
+ "description": "",
+ "author": "",
+ "email": "",
+ "url": "",
+ "license": "MIT",
+ }
+ meta.update(existing_meta)
nlp = util.load_model_from_path(Path(model_path))
- meta["pipeline"] = nlp.pipe_names
+ meta["spacy_version"] = util.get_model_version_range(about.__version__)
meta["vectors"] = {
"width": nlp.vocab.vectors_length,
"vectors": len(nlp.vocab.vectors),
"keys": nlp.vocab.vectors.n_keys,
"name": nlp.vocab.vectors.name,
}
- msg.divider("Generating meta.json")
- msg.text(
- "Enter the package settings for your model. The following information "
- "will be read from your model data: pipeline, vectors."
- )
- for setting, desc, default in settings:
- response = get_raw_input(desc, default)
- meta[setting] = default if response == "" and default else response
if about.__title__ != "spacy":
meta["parent_package"] = about.__title__
return meta
+def generate_meta(existing_meta: Dict[str, Any], msg: Printer) -> Dict[str, Any]:
+ meta = existing_meta or {}
+ settings = [
+ ("lang", "Pipeline language", meta.get("lang", "en")),
+ ("name", "Pipeline name", meta.get("name", "pipeline")),
+ ("version", "Package version", meta.get("version", "0.0.0")),
+ ("description", "Package description", meta.get("description", None)),
+ ("author", "Author", meta.get("author", None)),
+ ("email", "Author email", meta.get("email", None)),
+ ("url", "Author website", meta.get("url", None)),
+ ("license", "License", meta.get("license", "MIT")),
+ ]
+ msg.divider("Generating meta.json")
+ msg.text(
+ "Enter the package settings for your pipeline. The following information "
+ "will be read from your pipeline data: pipeline, vectors."
+ )
+ for setting, desc, default in settings:
+ response = get_raw_input(desc, default)
+ meta[setting] = default if response == "" and default else response
+ return meta
+
+
TEMPLATE_SETUP = """
#!/usr/bin/env python
-# coding: utf8
-from __future__ import unicode_literals
-
import io
import json
from os import path, walk
@@ -166,16 +212,17 @@ def setup_package():
setup(
name=model_name,
- description=meta['description'],
- author=meta['author'],
- author_email=meta['email'],
- url=meta['url'],
+ description=meta.get('description'),
+ author=meta.get('author'),
+ author_email=meta.get('email'),
+ url=meta.get('url'),
version=meta['version'],
- license=meta['license'],
+ license=meta.get('license'),
packages=[model_name],
package_data={model_name: list_files(model_dir)},
install_requires=list_requirements(meta),
zip_safe=False,
+ entry_points={'spacy_models': ['{m} = {m}'.format(m=model_name)]}
)
@@ -186,13 +233,11 @@ if __name__ == '__main__':
TEMPLATE_MANIFEST = """
include meta.json
+include config.cfg
""".strip()
TEMPLATE_INIT = """
-# coding: utf8
-from __future__ import unicode_literals
-
from pathlib import Path
from spacy.util import load_model_from_init_py, get_model_meta
diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py
index e949f76cf..828e5f08e 100644
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@@ -1,217 +1,143 @@
-# coding: utf8
-from __future__ import print_function, unicode_literals
-
-import plac
+from typing import Optional, Dict, Any
import random
import numpy
import time
import re
from collections import Counter
from pathlib import Path
-from thinc.v2v import Affine, Maxout
-from thinc.misc import LayerNorm as LN
-from thinc.neural.util import prefer_gpu
+from thinc.api import use_pytorch_for_gpu_memory, require_gpu
+from thinc.api import set_dropout_rate, to_categorical, fix_random_seed
+from thinc.api import CosineDistance, L2Distance
from wasabi import msg
import srsly
+from functools import partial
+import typer
+from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
+from ._util import import_code
from ..errors import Errors
+from ..ml.models.multi_task import build_cloze_multi_task_model
+from ..ml.models.multi_task import build_cloze_characters_multi_task_model
from ..tokens import Doc
from ..attrs import ID, HEAD
-from .._ml import Tok2Vec, flatten, chain, create_default_optimizer
-from .._ml import masked_language_model, get_cossim_loss, get_characters_loss
-from .._ml import MultiSoftmax
from .. import util
-from .train import _load_pretrained_tok2vec
-@plac.annotations(
- texts_loc=(
- "Path to JSONL file with raw texts to learn from, with text provided as the key 'text' or tokens as the "
- "key 'tokens'",
- "positional",
- None,
- str,
- ),
- vectors_model=("Name or path to spaCy model with vectors to learn from"),
- output_dir=("Directory to write models to on each epoch", "positional", None, str),
- width=("Width of CNN layers", "option", "cw", int),
- conv_depth=("Depth of CNN layers", "option", "cd", int),
- cnn_window=("Window size for CNN layers", "option", "cW", int),
- cnn_pieces=("Maxout size for CNN layers. 1 for Mish", "option", "cP", int),
- use_chars=("Whether to use character-based embedding", "flag", "chr", bool),
- sa_depth=("Depth of self-attention layers", "option", "sa", int),
- bilstm_depth=("Depth of BiLSTM layers (requires PyTorch)", "option", "lstm", int),
- embed_rows=("Number of embedding rows", "option", "er", int),
- loss_func=(
- "Loss function to use for the objective. Either 'characters', 'L2' or 'cosine'",
- "option",
- "L",
- str,
- ),
- use_vectors=("Whether to use the static vectors as input features", "flag", "uv"),
- dropout=("Dropout rate", "option", "d", float),
- batch_size=("Number of words per training batch", "option", "bs", int),
- max_length=(
- "Max words per example. Longer examples are discarded",
- "option",
- "xw",
- int,
- ),
- min_length=(
- "Min words per example. Shorter examples are discarded",
- "option",
- "nw",
- int,
- ),
- seed=("Seed for random number generators", "option", "s", int),
- n_iter=("Number of iterations to pretrain", "option", "i", int),
- n_save_every=("Save model every X batches.", "option", "se", int),
- init_tok2vec=(
- "Path to pretrained weights for the token-to-vector parts of the models. See 'spacy pretrain'. Experimental.",
- "option",
- "t2v",
- Path,
- ),
- epoch_start=(
- "The epoch to start counting at. Only relevant when using '--init-tok2vec' and the given weight file has been "
- "renamed. Prevents unintended overwriting of existing weight files.",
- "option",
- "es",
- int,
- ),
+@app.command(
+ "pretrain",
+ context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
)
-def pretrain(
- texts_loc,
- vectors_model,
- output_dir,
- width=96,
- conv_depth=4,
- cnn_pieces=3,
- sa_depth=0,
- cnn_window=1,
- bilstm_depth=0,
- use_chars=False,
- embed_rows=2000,
- loss_func="cosine",
- use_vectors=False,
- dropout=0.2,
- n_iter=1000,
- batch_size=3000,
- max_length=500,
- min_length=5,
- seed=0,
- n_save_every=None,
- init_tok2vec=None,
- epoch_start=None,
+def pretrain_cli(
+ # fmt: off
+ ctx: typer.Context, # This is only used to read additional arguments
+ texts_loc: Path = Arg(..., help="Path to JSONL file with raw texts to learn from, with text provided as the key 'text' or tokens as the key 'tokens'", exists=True),
+ output_dir: Path = Arg(..., help="Directory to write weights to on each epoch"),
+ config_path: Path = Arg(..., help="Path to config file", exists=True, dir_okay=False),
+ code_path: Optional[Path] = Opt(None, "--code-path", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+ resume_path: Optional[Path] = Opt(None, "--resume-path", "-r", help="Path to pretrained weights from which to resume pretraining"),
+ epoch_resume: Optional[int] = Opt(None, "--epoch-resume", "-er", help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files."),
+ use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
+ # fmt: on
):
"""
Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components,
- using an approximate language-modelling objective. Specifically, we load
- pretrained vectors, and train a component like a CNN, BiLSTM, etc to predict
- vectors which match the pretrained ones. The weights are saved to a directory
- after each epoch. You can then pass a path to one of these pretrained weights
- files to the 'spacy train' command.
+ using an approximate language-modelling objective. Two objective types
+ are available, vector-based and character-based.
+
+ In the vector-based objective, we load word vectors that have been trained
+ using a word2vec-style distributional similarity algorithm, and train a
+ component like a CNN, BiLSTM, etc to predict vectors which match the
+ pretrained ones. The weights are saved to a directory after each epoch. You
+ can then pass a path to one of these pretrained weights files to the
+ 'spacy train' command.
This technique may be especially helpful if you have little labelled data.
However, it's still quite experimental, so your mileage may vary.
To load the weights back in during 'spacy train', you need to ensure
- all settings are the same between pretraining and training. The API and
- errors around this need some improvement.
+ all settings are the same between pretraining and training. Ideally,
+ this is done by using the same config file for both commands.
+
+ DOCS: https://nightly.spacy.io/api/cli#pretrain
"""
- config = dict(locals())
- for key in config:
- if isinstance(config[key], Path):
- config[key] = str(config[key])
- util.fix_random_seed(seed)
+ overrides = parse_config_overrides(ctx.args)
+ import_code(code_path)
+ pretrain(
+ texts_loc,
+ output_dir,
+ config_path,
+ config_overrides=overrides,
+ resume_path=resume_path,
+ epoch_resume=epoch_resume,
+ use_gpu=use_gpu,
+ )
- has_gpu = prefer_gpu()
- msg.info("Using GPU" if has_gpu else "Not using GPU")
- output_dir = Path(output_dir)
- if output_dir.exists() and [p for p in output_dir.iterdir()]:
- msg.warn(
- "Output directory is not empty",
- "It is better to use an empty directory or refer to a new output path, "
- "then the new directory will be created for you.",
- )
+def pretrain(
+ texts_loc: Path,
+ output_dir: Path,
+ config_path: Path,
+ config_overrides: Dict[str, Any] = {},
+ resume_path: Optional[Path] = None,
+ epoch_resume: Optional[int] = None,
+ use_gpu: int = -1,
+):
+ verify_cli_args(texts_loc, output_dir, config_path, resume_path, epoch_resume)
+ if use_gpu >= 0:
+ msg.info("Using GPU")
+ require_gpu(use_gpu)
+ else:
+ msg.info("Using CPU")
+ msg.info(f"Loading config from: {config_path}")
+ with show_validation_error(config_path):
+ config = util.load_config(config_path, overrides=config_overrides)
+ nlp, config = util.load_model_from_config(config)
+ pretrain_config = config["pretraining"]
+ if not pretrain_config:
+ # TODO: What's the solution here? How do we handle optional blocks?
+ msg.fail("The [pretraining] block in your config is empty", exits=1)
if not output_dir.exists():
output_dir.mkdir()
- msg.good("Created output directory: {}".format(output_dir))
- srsly.write_json(output_dir / "config.json", config)
- msg.good("Saved settings to config.json")
-
- # Load texts from file or stdin
+ msg.good(f"Created output directory: {output_dir}")
+ seed = pretrain_config["seed"]
+ if seed is not None:
+ fix_random_seed(seed)
+ if use_gpu >= 0 and pretrain_config["use_pytorch_for_gpu_memory"]:
+ use_pytorch_for_gpu_memory()
+ config.to_disk(output_dir / "config.cfg")
+ msg.good("Saved config file in the output directory")
if texts_loc != "-": # reading from a file
- texts_loc = Path(texts_loc)
- if not texts_loc.exists():
- msg.fail("Input text file doesn't exist", texts_loc, exits=1)
with msg.loading("Loading input texts..."):
texts = list(srsly.read_jsonl(texts_loc))
- if not texts:
- msg.fail("Input file is empty", texts_loc, exits=1)
- msg.good("Loaded input texts")
random.shuffle(texts)
else: # reading from stdin
- msg.text("Reading input text from stdin...")
+ msg.info("Reading input text from stdin...")
texts = srsly.read_jsonl("-")
- with msg.loading("Loading model '{}'...".format(vectors_model)):
- nlp = util.load_model(vectors_model)
- msg.good("Loaded model '{}'".format(vectors_model))
- pretrained_vectors = None if not use_vectors else nlp.vocab.vectors.name
- model = create_pretraining_model(
- nlp,
- Tok2Vec(
- width,
- embed_rows,
- conv_depth=conv_depth,
- pretrained_vectors=pretrained_vectors,
- bilstm_depth=bilstm_depth, # Requires PyTorch. Experimental.
- subword_features=not use_chars, # Set to False for Chinese etc
- cnn_maxout_pieces=cnn_pieces, # If set to 1, use Mish activation.
- ),
- objective=loss_func
- )
- # Load in pretrained weights
- if init_tok2vec is not None:
- components = _load_pretrained_tok2vec(nlp, init_tok2vec)
- msg.text("Loaded pretrained tok2vec for: {}".format(components))
- # Parse the epoch number from the given weight file
- model_name = re.search(r"model\d+\.bin", str(init_tok2vec))
- if model_name:
- # Default weight file name so read epoch_start from it by cutting off 'model' and '.bin'
- epoch_start = int(model_name.group(0)[5:][:-4]) + 1
- else:
- if not epoch_start:
- msg.fail(
- "You have to use the '--epoch-start' argument when using a renamed weight file for "
- "'--init-tok2vec'",
- exits=True,
- )
- elif epoch_start < 0:
- msg.fail(
- "The argument '--epoch-start' has to be greater or equal to 0. '%d' is invalid"
- % epoch_start,
- exits=True,
- )
- else:
- # Without '--init-tok2vec' the '--epoch-start' argument is ignored
- epoch_start = 0
+ tok2vec_path = pretrain_config["tok2vec_model"]
+ tok2vec = config
+ for subpath in tok2vec_path.split("."):
+ tok2vec = tok2vec.get(subpath)
+ model = create_pretraining_model(nlp, tok2vec, pretrain_config)
+ optimizer = pretrain_config["optimizer"]
+
+ # Load in pretrained weights to resume from
+ if resume_path is not None:
+ _resume_model(model, resume_path, epoch_resume)
+ else:
+ # Without '--resume-path' the '--epoch-resume' argument is ignored
+ epoch_resume = 0
- optimizer = create_default_optimizer(model.ops)
tracker = ProgressTracker(frequency=10000)
- msg.divider("Pre-training tok2vec layer - starting at epoch %d" % epoch_start)
+ msg.divider(f"Pre-training tok2vec layer - starting at epoch {epoch_resume}")
row_settings = {"widths": (3, 10, 10, 6, 4), "aligns": ("r", "r", "r", "r", "r")}
msg.row(("#", "# Words", "Total Loss", "Loss", "w/s"), **row_settings)
def _save_model(epoch, is_temp=False):
is_temp_str = ".temp" if is_temp else ""
with model.use_params(optimizer.averages):
- with (output_dir / ("model%d%s.bin" % (epoch, is_temp_str))).open(
- "wb"
- ) as file_:
- file_.write(model.tok2vec.to_bytes())
+ with (output_dir / f"model{epoch}{is_temp_str}.bin").open("wb") as file_:
+ file_.write(model.get_ref("tok2vec").to_bytes())
log = {
"nr_word": tracker.nr_word,
"loss": tracker.loss,
@@ -222,26 +148,26 @@ def pretrain(
file_.write(srsly.json_dumps(log) + "\n")
skip_counter = 0
- for epoch in range(epoch_start, n_iter + epoch_start):
- for batch_id, batch in enumerate(
- util.minibatch_by_words(((text, None) for text in texts), size=batch_size)
- ):
+ objective = create_objective(pretrain_config["objective"])
+ for epoch in range(epoch_resume, pretrain_config["max_epochs"]):
+ batches = util.minibatch_by_words(texts, size=pretrain_config["batch_size"])
+ for batch_id, batch in enumerate(batches):
docs, count = make_docs(
nlp,
- [text for (text, _) in batch],
- max_length=max_length,
- min_length=min_length,
+ batch,
+ max_length=pretrain_config["max_length"],
+ min_length=pretrain_config["min_length"],
)
skip_counter += count
- loss = make_update(
- model, docs, optimizer, objective=loss_func, drop=dropout
- )
+ loss = make_update(model, docs, optimizer, objective)
progress = tracker.update(epoch, loss, docs)
if progress:
msg.row(progress, **row_settings)
if texts_loc == "-" and tracker.words_per_epoch[epoch] >= 10 ** 7:
break
- if n_save_every and (batch_id % n_save_every == 0):
+ if pretrain_config["n_save_every"] and (
+ batch_id % pretrain_config["n_save_every"] == 0
+ ):
_save_model(epoch, is_temp=True)
_save_model(epoch)
tracker.epoch_loss = 0.0
@@ -249,24 +175,36 @@ def pretrain(
# Reshuffle the texts if texts were loaded from a file
random.shuffle(texts)
if skip_counter > 0:
- msg.warn("Skipped {count} empty values".format(count=str(skip_counter)))
+ msg.warn(f"Skipped {skip_counter} empty values")
msg.good("Successfully finished pretrain")
-def make_update(model, docs, optimizer, drop=0.0, objective="L2"):
+def _resume_model(model, resume_path, epoch_resume):
+ msg.info(f"Resume training tok2vec from: {resume_path}")
+ with resume_path.open("rb") as file_:
+ weights_data = file_.read()
+ model.get_ref("tok2vec").from_bytes(weights_data)
+ # Parse the epoch number from the given weight file
+ model_name = re.search(r"model\d+\.bin", str(resume_path))
+ if model_name:
+ # Default weight file name so read epoch_start from it by cutting off 'model' and '.bin'
+ epoch_resume = int(model_name.group(0)[5:][:-4]) + 1
+ msg.info(f"Resuming from epoch: {epoch_resume}")
+ else:
+ msg.info(f"Resuming from epoch: {epoch_resume}")
+
+
+def make_update(model, docs, optimizer, objective_func):
"""Perform an update over a single batch of documents.
docs (iterable): A batch of `Doc` objects.
- drop (float): The dropout rate.
optimizer (callable): An optimizer.
RETURNS loss: A float for the loss.
"""
- predictions, backprop = model.begin_update(docs, drop=drop)
- if objective == "characters":
- loss, gradients = get_characters_loss(model.ops, docs, predictions)
- else:
- loss, gradients = get_vectors_loss(model.ops, docs, predictions, objective)
- backprop(gradients, sgd=optimizer)
+ predictions, backprop = model.begin_update(docs)
+ loss, gradients = objective_func(model.ops, docs, predictions)
+ backprop(gradients)
+ model.finish_update(optimizer)
# Don't want to return a cupy object here
# The gradients are modified in-place by the BERT MLM,
# so we get an accurate loss
@@ -298,18 +236,43 @@ def make_docs(nlp, batch, min_length, max_length):
heads = numpy.asarray(heads, dtype="uint64")
heads = heads.reshape((len(doc), 1))
doc = doc.from_array([HEAD], heads)
- if len(doc) >= min_length and len(doc) < max_length:
+ if min_length <= len(doc) < max_length:
docs.append(doc)
return docs, skip_count
-def get_vectors_loss(ops, docs, prediction, objective="L2"):
- """Compute a mean-squared error loss between the documents' vectors and
- the prediction.
+def create_objective(config):
+ """Create the objective for pretraining.
- Note that this is ripe for customization! We could compute the vectors
- in some other word, e.g. with an LSTM language model, or use some other
- type of objective.
+ We'd like to replace this with a registry function but it's tricky because
+ we're also making a model choice based on this. For now we hard-code support
+ for two types (characters, vectors). For characters you can specify
+ n_characters, for vectors you can specify the loss.
+
+ Bleh.
+ """
+ objective_type = config["type"]
+ if objective_type == "characters":
+ return partial(get_characters_loss, nr_char=config["n_characters"])
+ elif objective_type == "vectors":
+ if config["loss"] == "cosine":
+ return partial(
+ get_vectors_loss,
+ distance=CosineDistance(normalize=True, ignore_zeros=True),
+ )
+ elif config["loss"] == "L2":
+ return partial(
+ get_vectors_loss, distance=L2Distance(normalize=True, ignore_zeros=True)
+ )
+ else:
+ raise ValueError("Unexpected loss type", config["loss"])
+ else:
+ raise ValueError("Unexpected objective_type", objective_type)
+
+
+def get_vectors_loss(ops, docs, prediction, distance):
+ """Compute a loss based on a distance between the documents' vectors and
+ the prediction.
"""
# The simplest way to implement this would be to vstack the
# token.vector values, but that's a bit inefficient, especially on GPU.
@@ -317,47 +280,51 @@ def get_vectors_loss(ops, docs, prediction, objective="L2"):
# and look them up all at once. This prevents data copying.
ids = ops.flatten([doc.to_array(ID).ravel() for doc in docs])
target = docs[0].vocab.vectors.data[ids]
- if objective == "L2":
- d_target = prediction - target
- loss = (d_target ** 2).sum()
- elif objective == "cosine":
- loss, d_target = get_cossim_loss(prediction, target)
- else:
- raise ValueError(Errors.E142.format(loss_func=objective))
+ d_target, loss = distance(prediction, target)
return loss, d_target
-def create_pretraining_model(nlp, tok2vec, objective="cosine", nr_char=10):
+def get_characters_loss(ops, docs, prediction, nr_char):
+ """Compute a loss based on a number of characters predicted from the docs."""
+ target_ids = numpy.vstack([doc.to_utf8_array(nr_char=nr_char) for doc in docs])
+ target_ids = target_ids.reshape((-1,))
+ target = ops.asarray(to_categorical(target_ids, n_classes=256), dtype="f")
+ target = target.reshape((-1, 256 * nr_char))
+ diff = prediction - target
+ loss = (diff ** 2).sum()
+ d_target = diff / float(prediction.shape[0])
+ return loss, d_target
+
+
+def create_pretraining_model(nlp, tok2vec, pretrain_config):
"""Define a network for the pretraining. We simply add an output layer onto
the tok2vec input model. The tok2vec input model needs to be a model that
takes a batch of Doc objects (as a list), and returns a list of arrays.
Each array in the output needs to have one row per token in the doc.
+ The actual tok2vec layer is stored as a reference, and only this bit will be
+ serialized to file and read back in when calling the 'train' command.
"""
- if objective == "characters":
- out_sizes = [256] * nr_char
- output_layer = chain(
- LN(Maxout(300, pieces=3)),
- MultiSoftmax(out_sizes, 300)
+ # TODO
+ maxout_pieces = 3
+ hidden_size = 300
+ if pretrain_config["objective"]["type"] == "vectors":
+ model = build_cloze_multi_task_model(
+ nlp.vocab, tok2vec, hidden_size=hidden_size, maxout_pieces=maxout_pieces
)
- else:
- output_size = nlp.vocab.vectors.data.shape[1]
- output_layer = chain(
- LN(Maxout(300, pieces=3)), Affine(output_size, drop_factor=0.0)
+ elif pretrain_config["objective"]["type"] == "characters":
+ model = build_cloze_characters_multi_task_model(
+ nlp.vocab,
+ tok2vec,
+ hidden_size=hidden_size,
+ maxout_pieces=maxout_pieces,
+ nr_char=pretrain_config["objective"]["n_characters"],
)
- # This is annoying, but the parser etc have the flatten step after
- # the tok2vec. To load the weights in cleanly, we need to match
- # the shape of the models' components exactly. So what we cann
- # "tok2vec" has to be the same set of processes as what the components do.
- tok2vec = chain(tok2vec, flatten)
- model = chain(tok2vec, output_layer)
- model = masked_language_model(nlp.vocab, model)
- model.tok2vec = tok2vec
- model.output_layer = output_layer
- model.begin_training([nlp.make_doc("Give it a doc to infer shapes")])
+ model.initialize(X=[nlp.make_doc("Give it a doc to infer shapes")])
+ set_dropout_rate(model, pretrain_config["dropout"])
return model
-class ProgressTracker(object):
+class ProgressTracker:
def __init__(self, frequency=1000000):
self.loss = 0.0
self.prev_loss = 0.0
@@ -403,3 +370,43 @@ def _smart_round(figure, width=10, max_decimal=4):
n_decimal = min(n_decimal, max_decimal)
format_str = "%." + str(n_decimal) + "f"
return format_str % figure
+
+
+def verify_cli_args(texts_loc, output_dir, config_path, resume_path, epoch_resume):
+ if not config_path or not config_path.exists():
+ msg.fail("Config file not found", config_path, exits=1)
+ if output_dir.exists() and [p for p in output_dir.iterdir()]:
+ if resume_path:
+ msg.warn(
+ "Output directory is not empty.",
+ "If you're resuming a run in this directory, the old weights "
+ "for the consecutive epochs will be overwritten with the new ones.",
+ )
+ else:
+ msg.warn(
+ "Output directory is not empty. ",
+ "It is better to use an empty directory or refer to a new output path, "
+ "then the new directory will be created for you.",
+ )
+ if texts_loc != "-": # reading from a file
+ texts_loc = Path(texts_loc)
+ if not texts_loc.exists():
+ msg.fail("Input text file doesn't exist", texts_loc, exits=1)
+
+ for text in srsly.read_jsonl(texts_loc):
+ break
+ else:
+ msg.fail("Input file is empty", texts_loc, exits=1)
+
+ if resume_path is not None:
+ model_name = re.search(r"model\d+\.bin", str(resume_path))
+ if not model_name and not epoch_resume:
+ msg.fail(
+ "You have to use the --epoch-resume setting when using a renamed weight file for --resume-path",
+ exits=True,
+ )
+ elif not model_name and epoch_resume < 0:
+ msg.fail(
+ f"The argument --epoch-resume has to be greater or equal to 0. {epoch_resume} is invalid",
+ exits=True,
+ )
diff --git a/spacy/cli/profile.py b/spacy/cli/profile.py
index 4ee72fc23..43226730d 100644
--- a/spacy/cli/profile.py
+++ b/spacy/cli/profile.py
@@ -1,7 +1,4 @@
-# coding: utf8
-from __future__ import unicode_literals, division, print_function
-
-import plac
+from typing import Optional, Sequence, Union, Iterator
import tqdm
from pathlib import Path
import srsly
@@ -9,36 +6,65 @@ import cProfile
import pstats
import sys
import itertools
-import thinc.extra.datasets
-from wasabi import msg
+from wasabi import msg, Printer
+import typer
+from ._util import app, debug_cli, Arg, Opt, NAME
+from ..language import Language
from ..util import load_model
-@plac.annotations(
- model=("Model to load", "positional", None, str),
- inputs=("Location of input file. '-' for stdin.", "positional", None, str),
- n_texts=("Maximum number of texts to use if available", "option", "n", int),
-)
-def profile(model, inputs=None, n_texts=10000):
+@debug_cli.command("profile")
+@app.command("profile", hidden=True)
+def profile_cli(
+ # fmt: off
+ ctx: typer.Context, # This is only used to read current calling context
+ model: str = Arg(..., help="Trained pipeline to load"),
+ inputs: Optional[Path] = Arg(None, help="Location of input file. '-' for stdin.", exists=True, allow_dash=True),
+ n_texts: int = Opt(10000, "--n-texts", "-n", help="Maximum number of texts to use if available"),
+ # fmt: on
+):
"""
- Profile a spaCy pipeline, to find out which functions take the most time.
+ Profile which functions take the most time in a spaCy pipeline.
Input should be formatted as one JSON object per line with a key "text".
It can either be provided as a JSONL file, or be read from sys.sytdin.
If no input file is specified, the IMDB dataset is loaded via Thinc.
+
+ DOCS: https://nightly.spacy.io/api/cli#debug-profile
"""
+ if ctx.parent.command.name == NAME: # called as top-level command
+ msg.warn(
+ "The profile command is now available via the 'debug profile' "
+ "subcommand. You can run python -m spacy debug --help for an "
+ "overview of the other available debugging commands."
+ )
+ profile(model, inputs=inputs, n_texts=n_texts)
+
+
+def profile(model: str, inputs: Optional[Path] = None, n_texts: int = 10000) -> None:
+
if inputs is not None:
inputs = _read_inputs(inputs, msg)
if inputs is None:
+ try:
+ import ml_datasets
+ except ImportError:
+ msg.fail(
+ "This command, when run without an input file, "
+ "requires the ml_datasets library to be installed: "
+ "pip install ml_datasets",
+ exits=1,
+ )
+
n_inputs = 25000
with msg.loading("Loading IMDB dataset via Thinc..."):
- imdb_train, _ = thinc.extra.datasets.imdb()
+ imdb_train, _ = ml_datasets.imdb()
inputs, _ = zip(*imdb_train)
- msg.info("Loaded IMDB dataset and using {} examples".format(n_inputs))
+ msg.info(f"Loaded IMDB dataset and using {n_inputs} examples")
inputs = inputs[:n_inputs]
- with msg.loading("Loading model '{}'...".format(model)):
+ with msg.loading(f"Loading pipeline '{model}'..."):
nlp = load_model(model)
- msg.good("Loaded model '{}'".format(model))
+ msg.good(f"Loaded pipeline '{model}'")
texts = list(itertools.islice(inputs, n_texts))
cProfile.runctx("parse_texts(nlp, texts)", globals(), locals(), "Profile.prof")
s = pstats.Stats("Profile.prof")
@@ -46,12 +72,12 @@ def profile(model, inputs=None, n_texts=10000):
s.strip_dirs().sort_stats("time").print_stats()
-def parse_texts(nlp, texts):
+def parse_texts(nlp: Language, texts: Sequence[str]) -> None:
for doc in nlp.pipe(tqdm.tqdm(texts), batch_size=16):
pass
-def _read_inputs(loc, msg):
+def _read_inputs(loc: Union[Path, str], msg: Printer) -> Iterator[str]:
if loc == "-":
msg.info("Reading input from sys.stdin")
file_ = sys.stdin
@@ -60,7 +86,7 @@ def _read_inputs(loc, msg):
input_path = Path(loc)
if not input_path.exists() or not input_path.is_file():
msg.fail("Not a valid input data file", loc, exits=1)
- msg.info("Using data from {}".format(input_path.parts[-1]))
+ msg.info(f"Using data from {input_path.parts[-1]}")
file_ = input_path.open()
for line in file_:
data = srsly.json_loads(line)
diff --git a/bin/__init__.py b/spacy/cli/project/__init__.py
similarity index 100%
rename from bin/__init__.py
rename to spacy/cli/project/__init__.py
diff --git a/spacy/cli/project/assets.py b/spacy/cli/project/assets.py
new file mode 100644
index 000000000..2b623675d
--- /dev/null
+++ b/spacy/cli/project/assets.py
@@ -0,0 +1,139 @@
+from typing import Optional
+from pathlib import Path
+from wasabi import msg
+import re
+import shutil
+import requests
+
+from ...util import ensure_path, working_dir
+from .._util import project_cli, Arg, PROJECT_FILE, load_project_config, get_checksum
+from .._util import download_file, git_sparse_checkout
+
+
+@project_cli.command("assets")
+def project_assets_cli(
+ # fmt: off
+ project_dir: Path = Arg(Path.cwd(), help="Path to cloned project. Defaults to current working directory.", exists=True, file_okay=False),
+ # fmt: on
+):
+ """Fetch project assets like datasets and pretrained weights. Assets are
+ defined in the "assets" section of the project.yml. If a checksum is
+ provided in the project.yml, the file is only downloaded if no local file
+ with the same checksum exists.
+
+ DOCS: https://nightly.spacy.io/api/cli#project-assets
+ """
+ project_assets(project_dir)
+
+
+def project_assets(project_dir: Path) -> None:
+ """Fetch assets for a project using DVC if possible.
+
+ project_dir (Path): Path to project directory.
+ """
+ project_path = ensure_path(project_dir)
+ config = load_project_config(project_path)
+ assets = config.get("assets", {})
+ if not assets:
+ msg.warn(f"No assets specified in {PROJECT_FILE}", exits=0)
+ msg.info(f"Fetching {len(assets)} asset(s)")
+ for asset in assets:
+ dest = Path(asset["dest"])
+ checksum = asset.get("checksum")
+ if "git" in asset:
+ if dest.exists():
+ # If there's already a file, check for checksum
+ if checksum and checksum == get_checksum(dest):
+ msg.good(f"Skipping download with matching checksum: {dest}")
+ continue
+ else:
+ shutil.rmtree(dest)
+ git_sparse_checkout(
+ asset["git"]["repo"],
+ asset["git"]["path"],
+ dest,
+ branch=asset["git"].get("branch"),
+ )
+ else:
+ url = asset.get("url")
+ if not url:
+ # project.yml defines asset without URL that the user has to place
+ check_private_asset(dest, checksum)
+ continue
+ fetch_asset(project_path, url, dest, checksum)
+
+
+def check_private_asset(dest: Path, checksum: Optional[str] = None) -> None:
+ """Check and validate assets without a URL (private assets that the user
+ has to provide themselves) and give feedback about the checksum.
+
+ dest (Path): Desintation path of the asset.
+ checksum (Optional[str]): Optional checksum of the expected file.
+ """
+ if not Path(dest).exists():
+ err = f"No URL provided for asset. You need to add this file yourself: {dest}"
+ msg.warn(err)
+ else:
+ if checksum and checksum == get_checksum(dest):
+ msg.good(f"Asset exists with matching checksum: {dest}")
+ else:
+ msg.fail(f"Asset available but with incorrect checksum: {dest}")
+
+
+def fetch_asset(
+ project_path: Path, url: str, dest: Path, checksum: Optional[str] = None
+) -> None:
+ """Fetch an asset from a given URL or path. If a checksum is provided and a
+ local file exists, it's only re-downloaded if the checksum doesn't match.
+
+ project_path (Path): Path to project directory.
+ url (str): URL or path to asset.
+ checksum (Optional[str]): Optional expected checksum of local file.
+ RETURNS (Optional[Path]): The path to the fetched asset or None if fetching
+ the asset failed.
+ """
+ dest_path = (project_path / dest).resolve()
+ if dest_path.exists() and checksum:
+ # If there's already a file, check for checksum
+ if checksum == get_checksum(dest_path):
+ msg.good(f"Skipping download with matching checksum: {dest}")
+ return dest_path
+ # We might as well support the user here and create parent directories in
+ # case the asset dir isn't listed as a dir to create in the project.yml
+ if not dest_path.parent.exists():
+ dest_path.parent.mkdir(parents=True)
+ with working_dir(project_path):
+ url = convert_asset_url(url)
+ try:
+ download_file(url, dest_path)
+ msg.good(f"Downloaded asset {dest}")
+ except requests.exceptions.RequestException as e:
+ if Path(url).exists() and Path(url).is_file():
+ # If it's a local file, copy to destination
+ shutil.copy(url, str(dest_path))
+ msg.good(f"Copied local asset {dest}")
+ else:
+ msg.fail(f"Download failed: {dest}", e)
+ return
+ if checksum and checksum != get_checksum(dest_path):
+ msg.fail(f"Checksum doesn't match value defined in {PROJECT_FILE}: {dest}")
+
+
+def convert_asset_url(url: str) -> str:
+ """Check and convert the asset URL if needed.
+
+ url (str): The asset URL.
+ RETURNS (str): The converted URL.
+ """
+ # If the asset URL is a regular GitHub URL it's likely a mistake
+ if re.match(r"(http(s?)):\/\/github.com", url) and "releases/download" not in url:
+ converted = url.replace("github.com", "raw.githubusercontent.com")
+ converted = re.sub(r"/(tree|blob)/", "/", converted)
+ msg.warn(
+ "Downloading from a regular GitHub URL. This will only download "
+ "the source of the page, not the actual file. Converting the URL "
+ "to a raw URL.",
+ converted,
+ )
+ return converted
+ return url
diff --git a/spacy/cli/project/clone.py b/spacy/cli/project/clone.py
new file mode 100644
index 000000000..a419feb0f
--- /dev/null
+++ b/spacy/cli/project/clone.py
@@ -0,0 +1,86 @@
+from typing import Optional
+from pathlib import Path
+from wasabi import msg
+import subprocess
+import re
+
+from ... import about
+from ...util import ensure_path
+from .._util import project_cli, Arg, Opt, COMMAND, PROJECT_FILE
+from .._util import git_sparse_checkout
+
+
+@project_cli.command("clone")
+def project_clone_cli(
+ # fmt: off
+ name: str = Arg(..., help="The name of the template to clone"),
+ dest: Optional[Path] = Arg(None, help="Where to clone the project. Defaults to current working directory", exists=False),
+ repo: str = Opt(about.__projects__, "--repo", "-r", help="The repository to clone from"),
+ # fmt: on
+):
+ """Clone a project template from a repository. Calls into "git" and will
+ only download the files from the given subdirectory. The GitHub repo
+ defaults to the official spaCy template repo, but can be customized
+ (including using a private repo).
+
+ DOCS: https://nightly.spacy.io/api/cli#project-clone
+ """
+ if dest is None:
+ dest = Path.cwd() / name
+ project_clone(name, dest, repo=repo)
+
+
+def project_clone(name: str, dest: Path, *, repo: str = about.__projects__) -> None:
+ """Clone a project template from a repository.
+
+ name (str): Name of subdirectory to clone.
+ dest (Path): Destination path of cloned project.
+ repo (str): URL of Git repo containing project templates.
+ """
+ dest = ensure_path(dest)
+ check_clone(name, dest, repo)
+ project_dir = dest.resolve()
+ repo_name = re.sub(r"(http(s?)):\/\/github.com/", "", repo)
+ try:
+ git_sparse_checkout(repo, name, dest)
+ except subprocess.CalledProcessError:
+ err = f"Could not clone '{name}' from repo '{repo_name}'"
+ msg.fail(err, exits=1)
+ msg.good(f"Cloned '{name}' from {repo_name}", project_dir)
+ if not (project_dir / PROJECT_FILE).exists():
+ msg.warn(f"No {PROJECT_FILE} found in directory")
+ else:
+ msg.good(f"Your project is now ready!")
+ print(f"To fetch the assets, run:\n{COMMAND} project assets {dest}")
+
+
+def check_clone(name: str, dest: Path, repo: str) -> None:
+ """Check and validate that the destination path can be used to clone. Will
+ check that Git is available and that the destination path is suitable.
+
+ name (str): Name of the directory to clone from the repo.
+ dest (Path): Local destination of cloned directory.
+ repo (str): URL of the repo to clone from.
+ """
+ try:
+ subprocess.run(["git", "--version"], stdout=subprocess.DEVNULL)
+ except Exception:
+ msg.fail(
+ f"Cloning spaCy project templates requires Git and the 'git' command. ",
+ f"To clone a project without Git, copy the files from the '{name}' "
+ f"directory in the {repo} to {dest} manually and then run:",
+ f"{COMMAND} project init {dest}",
+ exits=1,
+ )
+ if not dest:
+ msg.fail(f"Not a valid directory to clone project: {dest}", exits=1)
+ if dest.exists():
+ # Directory already exists (not allowed, clone needs to create it)
+ msg.fail(f"Can't clone project, directory already exists: {dest}", exits=1)
+ if not dest.parent.exists():
+ # We're not creating parents, parent dir should exist
+ msg.fail(
+ f"Can't clone project, parent directory doesn't exist: {dest.parent}. "
+ f"Create the necessary folder(s) first before continuing.",
+ exits=1,
+ )
diff --git a/spacy/cli/project/document.py b/spacy/cli/project/document.py
new file mode 100644
index 000000000..d0265029a
--- /dev/null
+++ b/spacy/cli/project/document.py
@@ -0,0 +1,119 @@
+from pathlib import Path
+from wasabi import msg, MarkdownRenderer
+
+from ...util import working_dir
+from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config
+
+
+DOCS_URL = "https://nightly.spacy.io"
+INTRO = f"""> ⚠️ This project template uses the new [**spaCy v3.0**]({DOCS_URL}), which
+> is currently available as a nightly pre-release. You can install it from pip as `spacy-nightly`:
+> `pip install spacy-nightly`. Make sure to use a fresh virtual environment."""
+INTRO_PROJECT = f"""The [`{PROJECT_FILE}`]({PROJECT_FILE}) defines the data assets required by the
+project, as well as the available commands and workflows. For details, see the
+[spaCy projects documentation]({DOCS_URL}/usage/projects)."""
+INTRO_COMMANDS = f"""The following commands are defined by the project. They
+can be executed using [`spacy project run [name]`]({DOCS_URL}/api/cli#project-run).
+Commands are only re-run if their inputs have changed."""
+INTRO_WORKFLOWS = f"""The following workflows are defined by the project. They
+can be executed using [`spacy project run [name]`]({DOCS_URL}/api/cli#project-run)
+and will run the specified commands in order. Commands are only re-run if their
+inputs have changed."""
+INTRO_ASSETS = f"""The following assets are defined by the project. They can
+be fetched by running [`spacy project assets`]({DOCS_URL}/api/cli#project-assets)
+in the project directory."""
+# These markers are added to the Markdown and can be used to update the file in
+# place if it already exists. Only the auto-generated part will be replaced.
+MARKER_START = ""
+MARKER_END = ""
+# If this marker is used in an existing README, it's ignored and not replaced
+MARKER_IGNORE = ""
+
+
+@project_cli.command("document")
+def project_document_cli(
+ # fmt: off
+ project_dir: Path = Arg(Path.cwd(), help="Path to cloned project. Defaults to current working directory.", exists=True, file_okay=False),
+ output_file: Path = Opt("-", "--output", "-o", help="Path to output Markdown file for output. Defaults to - for standard output"),
+ no_emoji: bool = Opt(False, "--no-emoji", "-NE", help="Don't use emoji")
+ # fmt: on
+):
+ """
+ Auto-generate a README.md for a project. If the content is saved to a file,
+ hidden markers are added so you can add custom content before or after the
+ auto-generated section and only the auto-generated docs will be replaced
+ when you re-run the command.
+
+ DOCS: https://nightly.spacy.io/api/cli#project-document
+ """
+ project_document(project_dir, output_file, no_emoji=no_emoji)
+
+
+def project_document(
+ project_dir: Path, output_file: Path, *, no_emoji: bool = False
+) -> None:
+ is_stdout = str(output_file) == "-"
+ config = load_project_config(project_dir)
+ md = MarkdownRenderer(no_emoji=no_emoji)
+ md.add(MARKER_START)
+ title = config.get("title")
+ description = config.get("description")
+ md.add(md.title(1, f"spaCy Project{f': {title}' if title else ''}", "🪐"))
+ md.add(INTRO)
+ if description:
+ md.add(description)
+ md.add(md.title(2, PROJECT_FILE, "📋"))
+ md.add(INTRO_PROJECT)
+ # Commands
+ cmds = config.get("commands", [])
+ data = [(md.code(cmd["name"]), cmd.get("help", "")) for cmd in cmds]
+ if data:
+ md.add(md.title(3, "Commands", "⏯"))
+ md.add(INTRO_COMMANDS)
+ md.add(md.table(data, ["Command", "Description"]))
+ # Workflows
+ wfs = config.get("workflows", {}).items()
+ data = [(md.code(n), " → ".join(md.code(w) for w in stp)) for n, stp in wfs]
+ if data:
+ md.add(md.title(3, "Workflows", "⏭"))
+ md.add(INTRO_WORKFLOWS)
+ md.add(md.table(data, ["Workflow", "Steps"]))
+ # Assets
+ assets = config.get("assets", [])
+ data = []
+ for a in assets:
+ source = "Git" if a.get("git") else "URL" if a.get("url") else "Local"
+ dest_path = a["dest"]
+ dest = md.code(dest_path)
+ if source == "Local":
+ # Only link assets if they're in the repo
+ with working_dir(project_dir) as p:
+ if (p / dest_path).exists():
+ dest = md.link(dest, dest_path)
+ data.append((dest, source, a.get("description", "")))
+ if data:
+ md.add(md.title(3, "Assets", "🗂"))
+ md.add(INTRO_ASSETS)
+ md.add(md.table(data, ["File", "Source", "Description"]))
+ md.add(MARKER_END)
+ # Output result
+ if is_stdout:
+ print(md.text)
+ else:
+ content = md.text
+ if output_file.exists():
+ with output_file.open("r", encoding="utf8") as f:
+ existing = f.read()
+ if MARKER_IGNORE in existing:
+ msg.warn("Found ignore marker in existing file: skipping", output_file)
+ return
+ if MARKER_START in existing and MARKER_END in existing:
+ msg.info("Found existing file: only replacing auto-generated docs")
+ before = existing.split(MARKER_START)[0]
+ after = existing.split(MARKER_END)[1]
+ content = f"{before}{content}{after}"
+ else:
+ msg.warn("Replacing existing file")
+ with output_file.open("w") as f:
+ f.write(content)
+ msg.good("Saved project documentation", output_file)
diff --git a/spacy/cli/project/dvc.py b/spacy/cli/project/dvc.py
new file mode 100644
index 000000000..541253234
--- /dev/null
+++ b/spacy/cli/project/dvc.py
@@ -0,0 +1,204 @@
+"""This module contains helpers and subcommands for integrating spaCy projects
+with Data Version Controk (DVC). https://dvc.org"""
+from typing import Dict, Any, List, Optional, Iterable
+import subprocess
+from pathlib import Path
+from wasabi import msg
+
+from .._util import PROJECT_FILE, load_project_config, get_hash, project_cli
+from .._util import Arg, Opt, NAME, COMMAND
+from ...util import working_dir, split_command, join_command, run_command
+from ...util import SimpleFrozenList
+
+
+DVC_CONFIG = "dvc.yaml"
+DVC_DIR = ".dvc"
+UPDATE_COMMAND = "dvc"
+DVC_CONFIG_COMMENT = f"""# This file is auto-generated by spaCy based on your {PROJECT_FILE}. If you've
+# edited your {PROJECT_FILE}, you can regenerate this file by running:
+# {COMMAND} project {UPDATE_COMMAND}"""
+
+
+@project_cli.command(UPDATE_COMMAND)
+def project_update_dvc_cli(
+ # fmt: off
+ project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False),
+ workflow: Optional[str] = Arg(None, help=f"Name of workflow defined in {PROJECT_FILE}. Defaults to first workflow if not set."),
+ verbose: bool = Opt(False, "--verbose", "-V", help="Print more info"),
+ force: bool = Opt(False, "--force", "-F", help="Force update DVC config"),
+ # fmt: on
+):
+ """Auto-generate Data Version Control (DVC) config. A DVC
+ project can only define one pipeline, so you need to specify one workflow
+ defined in the project.yml. If no workflow is specified, the first defined
+ workflow is used. The DVC config will only be updated if the project.yml
+ changed.
+
+ DOCS: https://nightly.spacy.io/api/cli#project-dvc
+ """
+ project_update_dvc(project_dir, workflow, verbose=verbose, force=force)
+
+
+def project_update_dvc(
+ project_dir: Path,
+ workflow: Optional[str] = None,
+ *,
+ verbose: bool = False,
+ force: bool = False,
+) -> None:
+ """Update the auto-generated Data Version Control (DVC) config file. A DVC
+ project can only define one pipeline, so you need to specify one workflow
+ defined in the project.yml. Will only update the file if the checksum changed.
+
+ project_dir (Path): The project directory.
+ workflow (Optional[str]): Optional name of workflow defined in project.yml.
+ If not set, the first workflow will be used.
+ verbose (bool): Print more info.
+ force (bool): Force update DVC config.
+ """
+ config = load_project_config(project_dir)
+ updated = update_dvc_config(
+ project_dir, config, workflow, verbose=verbose, force=force
+ )
+ help_msg = "To execute the workflow with DVC, run: dvc repro"
+ if updated:
+ msg.good(f"Updated DVC config from {PROJECT_FILE}", help_msg)
+ else:
+ msg.info(f"No changes found in {PROJECT_FILE}, no update needed", help_msg)
+
+
+def update_dvc_config(
+ path: Path,
+ config: Dict[str, Any],
+ workflow: Optional[str] = None,
+ verbose: bool = False,
+ silent: bool = False,
+ force: bool = False,
+) -> bool:
+ """Re-run the DVC commands in dry mode and update dvc.yaml file in the
+ project directory. The file is auto-generated based on the config. The
+ first line of the auto-generated file specifies the hash of the config
+ dict, so if any of the config values change, the DVC config is regenerated.
+
+ path (Path): The path to the project directory.
+ config (Dict[str, Any]): The loaded project.yml.
+ verbose (bool): Whether to print additional info (via DVC).
+ silent (bool): Don't output anything (via DVC).
+ force (bool): Force update, even if hashes match.
+ RETURNS (bool): Whether the DVC config file was updated.
+ """
+ ensure_dvc(path)
+ workflows = config.get("workflows", {})
+ workflow_names = list(workflows.keys())
+ check_workflows(workflow_names, workflow)
+ if not workflow:
+ workflow = workflow_names[0]
+ config_hash = get_hash(config)
+ path = path.resolve()
+ dvc_config_path = path / DVC_CONFIG
+ if dvc_config_path.exists():
+ # Check if the file was generated using the current config, if not, redo
+ with dvc_config_path.open("r", encoding="utf8") as f:
+ ref_hash = f.readline().strip().replace("# ", "")
+ if ref_hash == config_hash and not force:
+ return False # Nothing has changed in project.yml, don't need to update
+ dvc_config_path.unlink()
+ dvc_commands = []
+ config_commands = {cmd["name"]: cmd for cmd in config.get("commands", [])}
+ for name in workflows[workflow]:
+ command = config_commands[name]
+ deps = command.get("deps", [])
+ outputs = command.get("outputs", [])
+ outputs_no_cache = command.get("outputs_no_cache", [])
+ if not deps and not outputs and not outputs_no_cache:
+ continue
+ # Default to the working dir as the project path since dvc.yaml is auto-generated
+ # and we don't want arbitrary paths in there
+ project_cmd = ["python", "-m", NAME, "project", "run", name]
+ deps_cmd = [c for cl in [["-d", p] for p in deps] for c in cl]
+ outputs_cmd = [c for cl in [["-o", p] for p in outputs] for c in cl]
+ outputs_nc_cmd = [c for cl in [["-O", p] for p in outputs_no_cache] for c in cl]
+ dvc_cmd = ["run", "-n", name, "-w", str(path), "--no-exec"]
+ if command.get("no_skip"):
+ dvc_cmd.append("--always-changed")
+ full_cmd = [*dvc_cmd, *deps_cmd, *outputs_cmd, *outputs_nc_cmd, *project_cmd]
+ dvc_commands.append(join_command(full_cmd))
+ with working_dir(path):
+ dvc_flags = {"--verbose": verbose, "--quiet": silent}
+ run_dvc_commands(dvc_commands, flags=dvc_flags)
+ with dvc_config_path.open("r+", encoding="utf8") as f:
+ content = f.read()
+ f.seek(0, 0)
+ f.write(f"# {config_hash}\n{DVC_CONFIG_COMMENT}\n{content}")
+ return True
+
+
+def run_dvc_commands(
+ commands: Iterable[str] = SimpleFrozenList(), flags: Dict[str, bool] = {},
+) -> None:
+ """Run a sequence of DVC commands in a subprocess, in order.
+
+ commands (List[str]): The string commands without the leading "dvc".
+ flags (Dict[str, bool]): Conditional flags to be added to command. Makes it
+ easier to pass flags like --quiet that depend on a variable or
+ command-line setting while avoiding lots of nested conditionals.
+ """
+ for command in commands:
+ command = split_command(command)
+ dvc_command = ["dvc", *command]
+ # Add the flags if they are set to True
+ for flag, is_active in flags.items():
+ if is_active:
+ dvc_command.append(flag)
+ run_command(dvc_command)
+
+
+def check_workflows(workflows: List[str], workflow: Optional[str] = None) -> None:
+ """Validate workflows provided in project.yml and check that a given
+ workflow can be used to generate a DVC config.
+
+ workflows (List[str]): Names of the available workflows.
+ workflow (Optional[str]): The name of the workflow to convert.
+ """
+ if not workflows:
+ msg.fail(
+ f"No workflows defined in {PROJECT_FILE}. To generate a DVC config, "
+ f"define at least one list of commands.",
+ exits=1,
+ )
+ if workflow is not None and workflow not in workflows:
+ msg.fail(
+ f"Workflow '{workflow}' not defined in {PROJECT_FILE}. "
+ f"Available workflows: {', '.join(workflows)}",
+ exits=1,
+ )
+ if not workflow:
+ msg.warn(
+ f"No workflow specified for DVC pipeline. Using the first workflow "
+ f"defined in {PROJECT_FILE}: '{workflows[0]}'"
+ )
+
+
+def ensure_dvc(project_dir: Path) -> None:
+ """Ensure that the "dvc" command is available and that the current project
+ directory is an initialized DVC project.
+ """
+ try:
+ subprocess.run(["dvc", "--version"], stdout=subprocess.DEVNULL)
+ except Exception:
+ msg.fail(
+ "To use spaCy projects with DVC (Data Version Control), DVC needs "
+ "to be installed and the 'dvc' command needs to be available",
+ "You can install the Python package from pip (pip install dvc) or "
+ "conda (conda install -c conda-forge dvc). For more details, see the "
+ "documentation: https://dvc.org/doc/install",
+ exits=1,
+ )
+ if not (project_dir / ".dvc").exists():
+ msg.fail(
+ "Project not initialized as a DVC project",
+ "To initialize a DVC project, you can run 'dvc init' in the project "
+ "directory. For more details, see the documentation: "
+ "https://dvc.org/doc/command-reference/init",
+ exits=1,
+ )
diff --git a/spacy/cli/project/pull.py b/spacy/cli/project/pull.py
new file mode 100644
index 000000000..655e2f459
--- /dev/null
+++ b/spacy/cli/project/pull.py
@@ -0,0 +1,44 @@
+from pathlib import Path
+from wasabi import msg
+from .remote_storage import RemoteStorage
+from .remote_storage import get_command_hash
+from .._util import project_cli, Arg
+from .._util import load_project_config
+from .run import update_lockfile
+
+
+@project_cli.command("pull")
+def project_pull_cli(
+ # fmt: off
+ remote: str = Arg("default", help="Name or path of remote storage"),
+ project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False),
+ # fmt: on
+):
+ """Retrieve available precomputed outputs from a remote storage.
+ You can alias remotes in your project.yml by mapping them to storage paths.
+ A storage can be anything that the smart-open library can upload to, e.g.
+ AWS, Google Cloud Storage, SSH, local directories etc.
+
+ DOCS: https://nightly.spacy.io/api/cli#project-pull
+ """
+ for url, output_path in project_pull(project_dir, remote):
+ if url is not None:
+ msg.good(f"Pulled {output_path} from {url}")
+
+
+def project_pull(project_dir: Path, remote: str, *, verbose: bool = False):
+ config = load_project_config(project_dir)
+ if remote in config.get("remotes", {}):
+ remote = config["remotes"][remote]
+ storage = RemoteStorage(project_dir, remote)
+ for cmd in config.get("commands", []):
+ deps = [project_dir / dep for dep in cmd.get("deps", [])]
+ if any(not dep.exists() for dep in deps):
+ continue
+ cmd_hash = get_command_hash("", "", deps, cmd["script"])
+ for output_path in cmd.get("outputs", []):
+ url = storage.pull(output_path, command_hash=cmd_hash)
+ yield url, output_path
+
+ if cmd.get("outputs") and all(loc.exists() for loc in cmd["outputs"]):
+ update_lockfile(project_dir, cmd)
diff --git a/spacy/cli/project/push.py b/spacy/cli/project/push.py
new file mode 100644
index 000000000..fcee2231a
--- /dev/null
+++ b/spacy/cli/project/push.py
@@ -0,0 +1,54 @@
+from pathlib import Path
+from wasabi import msg
+from .remote_storage import RemoteStorage
+from .remote_storage import get_content_hash, get_command_hash
+from .._util import load_project_config
+from .._util import project_cli, Arg
+
+
+@project_cli.command("push")
+def project_push_cli(
+ # fmt: off
+ remote: str = Arg("default", help="Name or path of remote storage"),
+ project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False),
+ # fmt: on
+):
+ """Persist outputs to a remote storage. You can alias remotes in your
+ project.yml by mapping them to storage paths. A storage can be anything that
+ the smart-open library can upload to, e.g. AWS, Google Cloud Storage, SSH,
+ local directories etc.
+
+ DOCS: https://nightly.spacy.io/api/cli#project-push
+ """
+ for output_path, url in project_push(project_dir, remote):
+ if url is None:
+ msg.info(f"Skipping {output_path}")
+ else:
+ msg.good(f"Pushed {output_path} to {url}")
+
+
+def project_push(project_dir: Path, remote: str):
+ """Persist outputs to a remote storage. You can alias remotes in your project.yml
+ by mapping them to storage paths. A storage can be anything that the smart-open
+ library can upload to, e.g. gcs, aws, ssh, local directories etc
+ """
+ config = load_project_config(project_dir)
+ if remote in config.get("remotes", {}):
+ remote = config["remotes"][remote]
+ storage = RemoteStorage(project_dir, remote)
+ for cmd in config.get("commands", []):
+ deps = [project_dir / dep for dep in cmd.get("deps", [])]
+ if any(not dep.exists() for dep in deps):
+ continue
+ cmd_hash = get_command_hash(
+ "", "", [project_dir / dep for dep in cmd.get("deps", [])], cmd["script"]
+ )
+ for output_path in cmd.get("outputs", []):
+ output_loc = project_dir / output_path
+ if output_loc.exists():
+ url = storage.push(
+ output_path,
+ command_hash=cmd_hash,
+ content_hash=get_content_hash(output_loc),
+ )
+ yield output_path, url
diff --git a/spacy/cli/project/remote_storage.py b/spacy/cli/project/remote_storage.py
new file mode 100644
index 000000000..e7e7cbbe8
--- /dev/null
+++ b/spacy/cli/project/remote_storage.py
@@ -0,0 +1,169 @@
+from typing import Optional, List, Dict, TYPE_CHECKING
+import os
+import site
+import hashlib
+import urllib.parse
+import tarfile
+from pathlib import Path
+
+from .._util import get_hash, get_checksum, download_file, ensure_pathy
+from ...util import make_tempdir
+
+if TYPE_CHECKING:
+ from pathy import Pathy # noqa: F401
+
+
+class RemoteStorage:
+ """Push and pull outputs to and from a remote file storage.
+
+ Remotes can be anything that `smart-open` can support: AWS, GCS, file system,
+ ssh, etc.
+ """
+
+ def __init__(self, project_root: Path, url: str, *, compression="gz"):
+ self.root = project_root
+ self.url = ensure_pathy(url)
+ self.compression = compression
+
+ def push(self, path: Path, command_hash: str, content_hash: str) -> "Pathy":
+ """Compress a file or directory within a project and upload it to a remote
+ storage. If an object exists at the full URL, nothing is done.
+
+ Within the remote storage, files are addressed by their project path
+ (url encoded) and two user-supplied hashes, representing their creation
+ context and their file contents. If the URL already exists, the data is
+ not uploaded. Paths are archived and compressed prior to upload.
+ """
+ loc = self.root / path
+ if not loc.exists():
+ raise IOError(f"Cannot push {loc}: does not exist.")
+ url = self.make_url(path, command_hash, content_hash)
+ if url.exists():
+ return None
+ tmp: Path
+ with make_tempdir() as tmp:
+ tar_loc = tmp / self.encode_name(str(path))
+ mode_string = f"w:{self.compression}" if self.compression else "w"
+ with tarfile.open(tar_loc, mode=mode_string) as tar_file:
+ tar_file.add(str(loc), arcname=str(path))
+ with tar_loc.open(mode="rb") as input_file:
+ with url.open(mode="wb") as output_file:
+ output_file.write(input_file.read())
+ return url
+
+ def pull(
+ self,
+ path: Path,
+ *,
+ command_hash: Optional[str] = None,
+ content_hash: Optional[str] = None,
+ ) -> Optional["Pathy"]:
+ """Retrieve a file from the remote cache. If the file already exists,
+ nothing is done.
+
+ If the command_hash and/or content_hash are specified, only matching
+ results are returned. If no results are available, an error is raised.
+ """
+ dest = self.root / path
+ if dest.exists():
+ return None
+ url = self.find(path, command_hash=command_hash, content_hash=content_hash)
+ if url is None:
+ return url
+ else:
+ # Make sure the destination exists
+ if not dest.parent.exists():
+ dest.parent.mkdir(parents=True)
+ tmp: Path
+ with make_tempdir() as tmp:
+ tar_loc = tmp / url.parts[-1]
+ download_file(url, tar_loc)
+ mode_string = f"r:{self.compression}" if self.compression else "r"
+ with tarfile.open(tar_loc, mode=mode_string) as tar_file:
+ # This requires that the path is added correctly, relative
+ # to root. This is how we set things up in push()
+ tar_file.extractall(self.root)
+ return url
+
+ def find(
+ self,
+ path: Path,
+ *,
+ command_hash: Optional[str] = None,
+ content_hash: Optional[str] = None,
+ ) -> Optional["Pathy"]:
+ """Find the best matching version of a file within the storage,
+ or `None` if no match can be found. If both the creation and content hash
+ are specified, only exact matches will be returned. Otherwise, the most
+ recent matching file is preferred.
+ """
+ name = self.encode_name(str(path))
+ if command_hash is not None and content_hash is not None:
+ url = self.make_url(path, command_hash, content_hash)
+ urls = [url] if url.exists() else []
+ elif command_hash is not None:
+ urls = list((self.url / name / command_hash).iterdir())
+ else:
+ urls = list((self.url / name).iterdir())
+ if content_hash is not None:
+ urls = [url for url in urls if url.parts[-1] == content_hash]
+ return urls[-1] if urls else None
+
+ def make_url(self, path: Path, command_hash: str, content_hash: str) -> "Pathy":
+ """Construct a URL from a subpath, a creation hash and a content hash."""
+ return self.url / self.encode_name(str(path)) / command_hash / content_hash
+
+ def encode_name(self, name: str) -> str:
+ """Encode a subpath into a URL-safe name."""
+ return urllib.parse.quote_plus(name)
+
+
+def get_content_hash(loc: Path) -> str:
+ return get_checksum(loc)
+
+
+def get_command_hash(
+ site_hash: str, env_hash: str, deps: List[Path], cmd: List[str]
+) -> str:
+ """Create a hash representing the execution of a command. This includes the
+ currently installed packages, whatever environment variables have been marked
+ as relevant, and the command.
+ """
+ hashes = [site_hash, env_hash] + [get_checksum(dep) for dep in sorted(deps)]
+ hashes.extend(cmd)
+ creation_bytes = "".join(hashes).encode("utf8")
+ return hashlib.md5(creation_bytes).hexdigest()
+
+
+def get_site_hash():
+ """Hash the current Python environment's site-packages contents, including
+ the name and version of the libraries. The list we're hashing is what
+ `pip freeze` would output.
+ """
+ site_dirs = site.getsitepackages()
+ if site.ENABLE_USER_SITE:
+ site_dirs.extend(site.getusersitepackages())
+ packages = set()
+ for site_dir in site_dirs:
+ site_dir = Path(site_dir)
+ for subpath in site_dir.iterdir():
+ if subpath.parts[-1].endswith("dist-info"):
+ packages.add(subpath.parts[-1].replace(".dist-info", ""))
+ package_bytes = "".join(sorted(packages)).encode("utf8")
+ return hashlib.md5sum(package_bytes).hexdigest()
+
+
+def get_env_hash(env: Dict[str, str]) -> str:
+ """Construct a hash of the environment variables that will be passed into
+ the commands.
+
+ Values in the env dict may be references to the current os.environ, using
+ the syntax $ENV_VAR to mean os.environ[ENV_VAR]
+ """
+ env_vars = {}
+ for key, value in env.items():
+ if value.startswith("$"):
+ env_vars[key] = os.environ.get(value[1:], "")
+ else:
+ env_vars[key] = value
+ return get_hash(env_vars)
diff --git a/spacy/cli/project/run.py b/spacy/cli/project/run.py
new file mode 100644
index 000000000..eb7b8cc5b
--- /dev/null
+++ b/spacy/cli/project/run.py
@@ -0,0 +1,249 @@
+from typing import Optional, List, Dict, Sequence, Any, Iterable
+from pathlib import Path
+from wasabi import msg
+import sys
+import srsly
+
+from ...util import working_dir, run_command, split_command, is_cwd, join_command
+from ...util import SimpleFrozenList
+from .._util import PROJECT_FILE, PROJECT_LOCK, load_project_config, get_hash
+from .._util import get_checksum, project_cli, Arg, Opt, COMMAND
+
+
+@project_cli.command("run")
+def project_run_cli(
+ # fmt: off
+ subcommand: str = Arg(None, help=f"Name of command defined in the {PROJECT_FILE}"),
+ project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False),
+ force: bool = Opt(False, "--force", "-F", help="Force re-running steps, even if nothing changed"),
+ dry: bool = Opt(False, "--dry", "-D", help="Perform a dry run and don't execute scripts"),
+ show_help: bool = Opt(False, "--help", help="Show help message and available subcommands")
+ # fmt: on
+):
+ """Run a named command or workflow defined in the project.yml. If a workflow
+ name is specified, all commands in the workflow are run, in order. If
+ commands define dependencies and/or outputs, they will only be re-run if
+ state has changed.
+
+ DOCS: https://nightly.spacy.io/api/cli#project-run
+ """
+ if show_help or not subcommand:
+ print_run_help(project_dir, subcommand)
+ else:
+ project_run(project_dir, subcommand, force=force, dry=dry)
+
+
+def project_run(
+ project_dir: Path, subcommand: str, *, force: bool = False, dry: bool = False
+) -> None:
+ """Run a named script defined in the project.yml. If the script is part
+ of the default pipeline (defined in the "run" section), DVC is used to
+ execute the command, so it can determine whether to rerun it. It then
+ calls into "exec" to execute it.
+
+ project_dir (Path): Path to project directory.
+ subcommand (str): Name of command to run.
+ force (bool): Force re-running, even if nothing changed.
+ dry (bool): Perform a dry run and don't execute commands.
+ """
+ config = load_project_config(project_dir)
+ commands = {cmd["name"]: cmd for cmd in config.get("commands", [])}
+ workflows = config.get("workflows", {})
+ validate_subcommand(commands.keys(), workflows.keys(), subcommand)
+ if subcommand in workflows:
+ msg.info(f"Running workflow '{subcommand}'")
+ for cmd in workflows[subcommand]:
+ project_run(project_dir, cmd, force=force, dry=dry)
+ else:
+ cmd = commands[subcommand]
+ for dep in cmd.get("deps", []):
+ if not (project_dir / dep).exists():
+ err = f"Missing dependency specified by command '{subcommand}': {dep}"
+ err_kwargs = {"exits": 1} if not dry else {}
+ msg.fail(err, **err_kwargs)
+ with working_dir(project_dir) as current_dir:
+ rerun = check_rerun(current_dir, cmd)
+ if not rerun and not force:
+ msg.info(f"Skipping '{cmd['name']}': nothing changed")
+ else:
+ msg.divider(subcommand)
+ run_commands(cmd["script"], dry=dry)
+ if not dry:
+ update_lockfile(current_dir, cmd)
+
+
+def print_run_help(project_dir: Path, subcommand: Optional[str] = None) -> None:
+ """Simulate a CLI help prompt using the info available in the project.yml.
+
+ project_dir (Path): The project directory.
+ subcommand (Optional[str]): The subcommand or None. If a subcommand is
+ provided, the subcommand help is shown. Otherwise, the top-level help
+ and a list of available commands is printed.
+ """
+ config = load_project_config(project_dir)
+ config_commands = config.get("commands", [])
+ commands = {cmd["name"]: cmd for cmd in config_commands}
+ workflows = config.get("workflows", {})
+ project_loc = "" if is_cwd(project_dir) else project_dir
+ if subcommand:
+ validate_subcommand(commands.keys(), workflows.keys(), subcommand)
+ print(f"Usage: {COMMAND} project run {subcommand} {project_loc}")
+ if subcommand in commands:
+ help_text = commands[subcommand].get("help")
+ if help_text:
+ print(f"\n{help_text}\n")
+ elif subcommand in workflows:
+ steps = workflows[subcommand]
+ print(f"\nWorkflow consisting of {len(steps)} commands:")
+ steps_data = [
+ (f"{i + 1}. {step}", commands[step].get("help", ""))
+ for i, step in enumerate(steps)
+ ]
+ msg.table(steps_data)
+ help_cmd = f"{COMMAND} project run [COMMAND] {project_loc} --help"
+ print(f"For command details, run: {help_cmd}")
+ else:
+ print("")
+ title = config.get("title")
+ if title:
+ print(f"{title}\n")
+ if config_commands:
+ print(f"Available commands in {PROJECT_FILE}")
+ print(f"Usage: {COMMAND} project run [COMMAND] {project_loc}")
+ msg.table([(cmd["name"], cmd.get("help", "")) for cmd in config_commands])
+ if workflows:
+ print(f"Available workflows in {PROJECT_FILE}")
+ print(f"Usage: {COMMAND} project run [WORKFLOW] {project_loc}")
+ msg.table([(name, " -> ".join(steps)) for name, steps in workflows.items()])
+
+
+def run_commands(
+ commands: Iterable[str] = SimpleFrozenList(),
+ silent: bool = False,
+ dry: bool = False,
+) -> None:
+ """Run a sequence of commands in a subprocess, in order.
+
+ commands (List[str]): The string commands.
+ silent (bool): Don't print the commands.
+ dry (bool): Perform a dry run and don't execut anything.
+ """
+ for command in commands:
+ command = split_command(command)
+ # Not sure if this is needed or a good idea. Motivation: users may often
+ # use commands in their config that reference "python" and we want to
+ # make sure that it's always executing the same Python that spaCy is
+ # executed with and the pip in the same env, not some other Python/pip.
+ # Also ensures cross-compatibility if user 1 writes "python3" (because
+ # that's how it's set up on their system), and user 2 without the
+ # shortcut tries to re-run the command.
+ if len(command) and command[0] in ("python", "python3"):
+ command[0] = sys.executable
+ elif len(command) and command[0] in ("pip", "pip3"):
+ command = [sys.executable, "-m", "pip", *command[1:]]
+ if not silent:
+ print(f"Running command: {join_command(command)}")
+ if not dry:
+ run_command(command)
+
+
+def validate_subcommand(
+ commands: Sequence[str], workflows: Sequence[str], subcommand: str
+) -> None:
+ """Check that a subcommand is valid and defined. Raises an error otherwise.
+
+ commands (Sequence[str]): The available commands.
+ subcommand (str): The subcommand.
+ """
+ if not commands and not workflows:
+ msg.fail(f"No commands or workflows defined in {PROJECT_FILE}", exits=1)
+ if subcommand not in commands and subcommand not in workflows:
+ help_msg = []
+ if commands:
+ help_msg.append(f"Available commands: {', '.join(commands)}")
+ if workflows:
+ help_msg.append(f"Available workflows: {', '.join(workflows)}")
+ msg.fail(
+ f"Can't find command or workflow '{subcommand}' in {PROJECT_FILE}",
+ ". ".join(help_msg),
+ exits=1,
+ )
+
+
+def check_rerun(project_dir: Path, command: Dict[str, Any]) -> bool:
+ """Check if a command should be rerun because its settings or inputs/outputs
+ changed.
+
+ project_dir (Path): The current project directory.
+ command (Dict[str, Any]): The command, as defined in the project.yml.
+ RETURNS (bool): Whether to re-run the command.
+ """
+ lock_path = project_dir / PROJECT_LOCK
+ if not lock_path.exists(): # We don't have a lockfile, run command
+ return True
+ data = srsly.read_yaml(lock_path)
+ if command["name"] not in data: # We don't have info about this command
+ return True
+ entry = data[command["name"]]
+ # Always run commands with no outputs (otherwise they'd always be skipped)
+ if not entry.get("outs", []):
+ return True
+ # If the entry in the lockfile matches the lockfile entry that would be
+ # generated from the current command, we don't rerun because it means that
+ # all inputs/outputs, hashes and scripts are the same and nothing changed
+ return get_hash(get_lock_entry(project_dir, command)) != get_hash(entry)
+
+
+def update_lockfile(project_dir: Path, command: Dict[str, Any]) -> None:
+ """Update the lockfile after running a command. Will create a lockfile if
+ it doesn't yet exist and will add an entry for the current command, its
+ script and dependencies/outputs.
+
+ project_dir (Path): The current project directory.
+ command (Dict[str, Any]): The command, as defined in the project.yml.
+ """
+ lock_path = project_dir / PROJECT_LOCK
+ if not lock_path.exists():
+ srsly.write_yaml(lock_path, {})
+ data = {}
+ else:
+ data = srsly.read_yaml(lock_path)
+ data[command["name"]] = get_lock_entry(project_dir, command)
+ srsly.write_yaml(lock_path, data)
+
+
+def get_lock_entry(project_dir: Path, command: Dict[str, Any]) -> Dict[str, Any]:
+ """Get a lockfile entry for a given command. An entry includes the command,
+ the script (command steps) and a list of dependencies and outputs with
+ their paths and file hashes, if available. The format is based on the
+ dvc.lock files, to keep things consistent.
+
+ project_dir (Path): The current project directory.
+ command (Dict[str, Any]): The command, as defined in the project.yml.
+ RETURNS (Dict[str, Any]): The lockfile entry.
+ """
+ deps = get_fileinfo(project_dir, command.get("deps", []))
+ outs = get_fileinfo(project_dir, command.get("outputs", []))
+ outs_nc = get_fileinfo(project_dir, command.get("outputs_no_cache", []))
+ return {
+ "cmd": f"{COMMAND} run {command['name']}",
+ "script": command["script"],
+ "deps": deps,
+ "outs": [*outs, *outs_nc],
+ }
+
+
+def get_fileinfo(project_dir: Path, paths: List[str]) -> List[Dict[str, str]]:
+ """Generate the file information for a list of paths (dependencies, outputs).
+ Includes the file path and the file's checksum.
+
+ project_dir (Path): The current project directory.
+ paths (List[str]): The file paths.
+ RETURNS (List[Dict[str, str]]): The lockfile entry for a file.
+ """
+ data = []
+ for path in paths:
+ file_path = project_dir / path
+ md5 = get_checksum(file_path) if file_path.exists() else None
+ data.append({"path": path, "md5": md5})
+ return data
diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
new file mode 100644
index 000000000..43c852d13
--- /dev/null
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -0,0 +1,237 @@
+{# This is a template for training configs used for the quickstart widget in
+the docs and the init config command. It encodes various best practices and
+can help generate the best possible configuration, given a user's requirements. #}
+{%- set use_transformer = (transformer_data and hardware != "cpu") -%}
+{%- set transformer = transformer_data[optimize] if use_transformer else {} -%}
+[paths]
+train = ""
+dev = ""
+
+[system]
+use_pytorch_for_gpu_memory = {{ "true" if use_transformer else "false" }}
+
+[nlp]
+lang = "{{ lang }}"
+{%- set full_pipeline = ["transformer" if use_transformer else "tok2vec"] + components %}
+pipeline = {{ full_pipeline|pprint()|replace("'", '"')|safe }}
+tokenizer = {"@tokenizers": "spacy.Tokenizer.v1"}
+
+[components]
+
+{# TRANSFORMER PIPELINE #}
+{%- if use_transformer -%}
+[components.transformer]
+factory = "transformer"
+
+[components.transformer.model]
+@architectures = "spacy-transformers.TransformerModel.v1"
+name = "{{ transformer["name"] }}"
+tokenizer_config = {"use_fast": true}
+
+[components.transformer.model.get_spans]
+@span_getters = "spacy-transformers.strided_spans.v1"
+window = 128
+stride = 96
+
+{% if "tagger" in components %}
+[components.tagger]
+factory = "tagger"
+
+[components.tagger.model]
+@architectures = "spacy.Tagger.v1"
+nO = null
+
+[components.tagger.model.tok2vec]
+@architectures = "spacy-transformers.TransformerListener.v1"
+grad_factor = 1.0
+
+[components.tagger.model.tok2vec.pooling]
+@layers = "reduce_mean.v1"
+{%- endif %}
+
+{% if "parser" in components -%}
+[components.parser]
+factory = "parser"
+
+[components.parser.model]
+@architectures = "spacy.TransitionBasedParser.v1"
+nr_feature_tokens = 8
+hidden_width = 128
+maxout_pieces = 3
+use_upper = false
+nO = null
+
+[components.parser.model.tok2vec]
+@architectures = "spacy-transformers.TransformerListener.v1"
+grad_factor = 1.0
+
+[components.parser.model.tok2vec.pooling]
+@layers = "reduce_mean.v1"
+{%- endif %}
+
+{% if "ner" in components -%}
+[components.ner]
+factory = "ner"
+
+[components.ner.model]
+@architectures = "spacy.TransitionBasedParser.v1"
+nr_feature_tokens = 3
+hidden_width = 64
+maxout_pieces = 2
+use_upper = false
+nO = null
+
+[components.ner.model.tok2vec]
+@architectures = "spacy-transformers.TransformerListener.v1"
+grad_factor = 1.0
+
+[components.ner.model.tok2vec.pooling]
+@layers = "reduce_mean.v1"
+{% endif -%}
+
+{# NON-TRANSFORMER PIPELINE #}
+{% else -%}
+
+{%- if hardware == "gpu" -%}
+# There are no recommended transformer weights available for language '{{ lang }}'
+# yet, so the pipeline described here is not transformer-based.
+{%- endif %}
+
+[components.tok2vec]
+factory = "tok2vec"
+
+[components.tok2vec.model]
+@architectures = "spacy.Tok2Vec.v1"
+
+[components.tok2vec.model.embed]
+@architectures = "spacy.MultiHashEmbed.v1"
+width = ${components.tok2vec.model.encode.width}
+rows = {{ 2000 if optimize == "efficiency" else 7000 }}
+also_embed_subwords = {{ "true" if has_letters else "false" }}
+also_use_static_vectors = {{ "true" if optimize == "accuracy" else "false" }}
+
+[components.tok2vec.model.encode]
+@architectures = "spacy.MaxoutWindowEncoder.v1"
+width = {{ 96 if optimize == "efficiency" else 256 }}
+depth = {{ 4 if optimize == "efficiency" else 8 }}
+window_size = 1
+maxout_pieces = 3
+
+{% if "tagger" in components %}
+[components.tagger]
+factory = "tagger"
+
+[components.tagger.model]
+@architectures = "spacy.Tagger.v1"
+nO = null
+
+[components.tagger.model.tok2vec]
+@architectures = "spacy.Tok2VecListener.v1"
+width = ${components.tok2vec.model.encode.width}
+{%- endif %}
+
+{% if "parser" in components -%}
+[components.parser]
+factory = "parser"
+
+[components.parser.model]
+@architectures = "spacy.TransitionBasedParser.v1"
+nr_feature_tokens = 8
+hidden_width = 128
+maxout_pieces = 3
+use_upper = true
+nO = null
+
+[components.parser.model.tok2vec]
+@architectures = "spacy.Tok2VecListener.v1"
+width = ${components.tok2vec.model.encode.width}
+{%- endif %}
+
+{% if "ner" in components %}
+[components.ner]
+factory = "ner"
+
+[components.ner.model]
+@architectures = "spacy.TransitionBasedParser.v1"
+nr_feature_tokens = 6
+hidden_width = 64
+maxout_pieces = 2
+use_upper = true
+nO = null
+
+[components.ner.model.tok2vec]
+@architectures = "spacy.Tok2VecListener.v1"
+width = ${components.tok2vec.model.encode.width}
+{% endif %}
+{% endif %}
+
+{% for pipe in components %}
+{% if pipe not in ["tagger", "parser", "ner"] %}
+{# Other components defined by the user: we just assume they're factories #}
+[components.{{ pipe }}]
+factory = "{{ pipe }}"
+{% endif %}
+{% endfor %}
+
+[training]
+{% if use_transformer or optimize == "efficiency" or not word_vectors -%}
+vectors = null
+{% else -%}
+vectors = "{{ word_vectors }}"
+{% endif -%}
+{% if use_transformer -%}
+accumulate_gradient = {{ transformer["size_factor"] }}
+{% endif %}
+
+[training.optimizer]
+@optimizers = "Adam.v1"
+
+[training.optimizer.learn_rate]
+@schedules = "warmup_linear.v1"
+warmup_steps = 250
+total_steps = 20000
+initial_rate = 5e-5
+
+[training.train_corpus]
+@readers = "spacy.Corpus.v1"
+path = ${paths.train}
+max_length = {{ 500 if hardware == "gpu" else 2000 }}
+
+[training.dev_corpus]
+@readers = "spacy.Corpus.v1"
+path = ${paths.dev}
+max_length = 0
+
+{% if use_transformer %}
+[training.batcher]
+@batchers = "spacy.batch_by_padded.v1"
+discard_oversize = true
+size = 2000
+buffer = 256
+{%- else %}
+[training.batcher]
+@batchers = "spacy.batch_by_words.v1"
+discard_oversize = false
+tolerance = 0.2
+
+[training.batcher.size]
+@schedules = "compounding.v1"
+start = 100
+stop = 1000
+compound = 1.001
+{% endif %}
+
+[training.score_weights]
+{%- if "tagger" in components %}
+tag_acc = {{ (1.0 / components|length)|round(2) }}
+{%- endif -%}
+{%- if "parser" in components %}
+dep_uas = 0.0
+dep_las = {{ (1.0 / components|length)|round(2) }}
+sents_f = 0.0
+{%- endif %}
+{%- if "ner" in components %}
+ents_f = {{ (1.0 / components|length)|round(2) }}
+ents_p = 0.0
+ents_r = 0.0
+{%- endif -%}
diff --git a/spacy/cli/templates/quickstart_training_recommendations.yml b/spacy/cli/templates/quickstart_training_recommendations.yml
new file mode 100644
index 000000000..206e69954
--- /dev/null
+++ b/spacy/cli/templates/quickstart_training_recommendations.yml
@@ -0,0 +1,103 @@
+# Recommended settings and available resources for each language, if available.
+# Not all languages have recommended word vectors or transformers and for some,
+# the recommended transformer for efficiency and accuracy may be the same.
+en:
+ word_vectors: en_vectors_web_lg
+ transformer:
+ efficiency:
+ name: roberta-base
+ size_factor: 3
+ accuracy:
+ name: roberta-base
+ size_factor: 3
+de:
+ word_vectors: null
+ transformer:
+ efficiency:
+ name: bert-base-german-cased
+ size_factor: 3
+ accuracy:
+ name: bert-base-german-cased
+ size_factor: 3
+fr:
+ word_vectors: null
+ transformer:
+ efficiency:
+ name: camembert-base
+ size_factor: 3
+ accuracy:
+ name: camembert-base
+ size_factor: 3
+es:
+ word_vectors: null
+ transformer:
+ efficiency:
+ name: mrm8488/RuPERTa-base
+ size_factor: 3
+ accuracy:
+ name: mrm8488/RuPERTa-base
+ size_factor: 3
+sv:
+ word_vectors: null
+ transformer:
+ efficiency:
+ name: KB/bert-base-swedish-cased
+ size_factor: 3
+ accuracy:
+ name: KB/bert-base-swedish-cased
+ size_factor: 3
+fi:
+ word_vectors: null
+ transformer:
+ efficiency:
+ name: TurkuNLP/bert-base-finnish-cased-v1
+ size_factor: 3
+ accuracy:
+ name: TurkuNLP/bert-base-finnish-cased-v1
+ size_factor: 3
+el:
+ word_vectors: null
+ transformer:
+ efficiency:
+ name: nlpaueb/bert-base-greek-uncased-v1
+ size_factor: 3
+ accuracy:
+ name: nlpaueb/bert-base-greek-uncased-v1
+ size_factor: 3
+tr:
+ word_vectors: null
+ transformer:
+ efficiency:
+ name: dbmdz/bert-base-turkish-cased
+ size_factor: 3
+ accuracy:
+ name: dbmdz/bert-base-turkish-cased
+ size_factor: 3
+zh:
+ word_vectors: null
+ transformer:
+ efficiency:
+ name: bert-base-chinese
+ size_factor: 3
+ accuracy:
+ name: bert-base-chinese
+ size_factor: 3
+ has_letters: false
+ar:
+ word_vectors: null
+ transformer:
+ efficiency:
+ name: asafaya/bert-base-arabic
+ size_factor: 3
+ accuracy:
+ name: asafaya/bert-base-arabic
+ size_factor: 3
+pl:
+ word_vectors: null
+ transformer:
+ efficiency:
+ name: dkleczek/bert-base-polish-cased-v1
+ size_factor: 3
+ accuracy:
+ name: dkleczek/bert-base-polish-cased-v1
+ size_factor: 3
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index 0614c7519..6be47fa39 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -1,773 +1,425 @@
-# coding: utf8
-from __future__ import unicode_literals, division, print_function
-
-import plac
-import os
+from typing import Optional, Dict, Any, Tuple, Union, Callable, List
+import srsly
import tqdm
from pathlib import Path
-from thinc.neural._classes.model import Model
-from timeit import default_timer as timer
-import shutil
-import srsly
from wasabi import msg
-import contextlib
+import thinc
+import thinc.schedules
+from thinc.api import use_pytorch_for_gpu_memory, require_gpu, fix_random_seed
+from thinc.api import Config, Optimizer
import random
+import typer
+import logging
-from .._ml import create_default_optimizer
-from ..util import use_gpu as set_gpu
-from ..errors import Errors
-from ..gold import GoldCorpus
-from ..compat import path2str
-from ..lookups import Lookups
+from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
+from ._util import import_code, get_sourced_components
+from ..language import Language
from .. import util
-from .. import about
+from ..gold.example import Example
+from ..errors import Errors
-@plac.annotations(
- # fmt: off
- lang=("Model language", "positional", None, str),
- output_path=("Output directory to store model in", "positional", None, Path),
- train_path=("Location of JSON-formatted training data", "positional", None, Path),
- dev_path=("Location of JSON-formatted development data", "positional", None, Path),
- raw_text=("Path to jsonl file with unlabelled text documents.", "option", "rt", Path),
- base_model=("Name of model to update (optional)", "option", "b", str),
- pipeline=("Comma-separated names of pipeline components", "option", "p", str),
- replace_components=("Replace components from base model", "flag", "R", bool),
- vectors=("Model to load vectors from", "option", "v", str),
- width=("Width of CNN layers of Tok2Vec component", "option", "cw", int),
- conv_depth=("Depth of CNN layers of Tok2Vec component", "option", "cd", int),
- cnn_window=("Window size for CNN layers of Tok2Vec component", "option", "cW", int),
- cnn_pieces=("Maxout size for CNN layers of Tok2Vec component. 1 for Mish", "option", "cP", int),
- use_chars=("Whether to use character-based embedding of Tok2Vec component", "flag", "chr", bool),
- bilstm_depth=("Depth of BiLSTM layers of Tok2Vec component (requires PyTorch)", "option", "lstm", int),
- embed_rows=("Number of embedding rows of Tok2Vec component", "option", "er", int),
- n_iter=("Number of iterations", "option", "n", int),
- n_early_stopping=("Maximum number of training epochs without dev accuracy improvement", "option", "ne", int),
- n_examples=("Number of examples", "option", "ns", int),
- use_gpu=("Use GPU", "option", "g", int),
- version=("Model version", "option", "V", str),
- meta_path=("Optional path to meta.json to use as base.", "option", "m", Path),
- init_tok2vec=("Path to pretrained weights for the token-to-vector parts of the models. See 'spacy pretrain'. Experimental.", "option", "t2v", Path),
- parser_multitasks=("Side objectives for parser CNN, e.g. 'dep' or 'dep,tag'", "option", "pt", str),
- entity_multitasks=("Side objectives for NER CNN, e.g. 'dep' or 'dep,tag'", "option", "et", str),
- noise_level=("Amount of corruption for data augmentation", "option", "nl", float),
- orth_variant_level=("Amount of orthography variation for data augmentation", "option", "ovl", float),
- eval_beam_widths=("Beam widths to evaluate, e.g. 4,8", "option", "bw", str),
- gold_preproc=("Use gold preprocessing", "flag", "G", bool),
- learn_tokens=("Make parser learn gold-standard tokenization", "flag", "T", bool),
- textcat_multilabel=("Textcat classes aren't mutually exclusive (multilabel)", "flag", "TML", bool),
- textcat_arch=("Textcat model architecture", "option", "ta", str),
- textcat_positive_label=("Textcat positive label for binary classes with two labels", "option", "tpl", str),
- tag_map_path=("Location of JSON-formatted tag map", "option", "tm", Path),
- omit_extra_lookups=("Don't include extra lookups in model", "flag", "OEL", bool),
- verbose=("Display more information for debug", "flag", "VV", bool),
- debug=("Run data diagnostics before training", "flag", "D", bool),
- # fmt: on
+@app.command(
+ "train", context_settings={"allow_extra_args": True, "ignore_unknown_options": True}
)
-def train(
- lang,
- output_path,
- train_path,
- dev_path,
- raw_text=None,
- base_model=None,
- pipeline="tagger,parser,ner",
- replace_components=False,
- vectors=None,
- width=96,
- conv_depth=4,
- cnn_window=1,
- cnn_pieces=3,
- use_chars=False,
- bilstm_depth=0,
- embed_rows=2000,
- n_iter=30,
- n_early_stopping=None,
- n_examples=0,
- use_gpu=-1,
- version="0.0.0",
- meta_path=None,
- init_tok2vec=None,
- parser_multitasks="",
- entity_multitasks="",
- noise_level=0.0,
- orth_variant_level=0.0,
- eval_beam_widths="",
- gold_preproc=False,
- learn_tokens=False,
- textcat_multilabel=False,
- textcat_arch="bow",
- textcat_positive_label=None,
- tag_map_path=None,
- omit_extra_lookups=False,
- verbose=False,
- debug=False,
+def train_cli(
+ # fmt: off
+ ctx: typer.Context, # This is only used to read additional arguments
+ config_path: Path = Arg(..., help="Path to config file", exists=True),
+ output_path: Optional[Path] = Opt(None, "--output", "--output-path", "-o", help="Output directory to store trained pipeline in"),
+ code_path: Optional[Path] = Opt(None, "--code-path", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+ verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
+ use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
+ resume: bool = Opt(False, "--resume", "-R", help="Resume training"),
+ # fmt: on
):
"""
- Train or update a spaCy model. Requires data to be formatted in spaCy's
- JSON format. To convert data from other formats, use the `spacy convert`
- command.
+ Train or update a spaCy pipeline. Requires data in spaCy's binary format. To
+ convert data from other formats, use the `spacy convert` command. The
+ config file includes all settings and hyperparameters used during traing.
+ To override settings in the config, e.g. settings that point to local
+ paths or that you want to experiment with, you can override them as
+ command line options. For instance, --training.batch_size 128 overrides
+ the value of "batch_size" in the block "[training]". The --code argument
+ lets you pass in a Python file that's imported before training. It can be
+ used to register custom functions and architectures that can then be
+ referenced in the config.
+
+ DOCS: https://nightly.spacy.io/api/cli#train
"""
- util.fix_random_seed()
- util.set_env_log(verbose)
-
- # Make sure all files and paths exists if they are needed
- train_path = util.ensure_path(train_path)
- dev_path = util.ensure_path(dev_path)
- meta_path = util.ensure_path(meta_path)
- output_path = util.ensure_path(output_path)
- if raw_text is not None:
- raw_text = list(srsly.read_jsonl(raw_text))
- if not train_path or not train_path.exists():
- msg.fail("Training data not found", train_path, exits=1)
- if not dev_path or not dev_path.exists():
- msg.fail("Development data not found", dev_path, exits=1)
- if meta_path is not None and not meta_path.exists():
- msg.fail("Can't find model meta.json", meta_path, exits=1)
- meta = srsly.read_json(meta_path) if meta_path else {}
- if output_path.exists() and [p for p in output_path.iterdir() if p.is_dir()]:
- msg.warn(
- "Output directory is not empty",
- "This can lead to unintended side effects when saving the model. "
- "Please use an empty directory or a different path instead. If "
- "the specified output path doesn't exist, the directory will be "
- "created for you.",
- )
- if not output_path.exists():
- output_path.mkdir()
- msg.good("Created output directory: {}".format(output_path))
-
- tag_map = {}
- if tag_map_path is not None:
- tag_map = srsly.read_json(tag_map_path)
- # Take dropout and batch size as generators of values -- dropout
- # starts high and decays sharply, to force the optimizer to explore.
- # Batch size starts at 1 and grows, so that we make updates quickly
- # at the beginning of training.
- dropout_rates = util.decaying(
- util.env_opt("dropout_from", 0.2),
- util.env_opt("dropout_to", 0.2),
- util.env_opt("dropout_decay", 0.0),
- )
- batch_sizes = util.compounding(
- util.env_opt("batch_from", 100.0),
- util.env_opt("batch_to", 1000.0),
- util.env_opt("batch_compound", 1.001),
+ util.logger.setLevel(logging.DEBUG if verbose else logging.ERROR)
+ verify_cli_args(config_path, output_path)
+ overrides = parse_config_overrides(ctx.args)
+ import_code(code_path)
+ train(
+ config_path,
+ output_path=output_path,
+ config_overrides=overrides,
+ use_gpu=use_gpu,
+ resume_training=resume,
)
- if not eval_beam_widths:
- eval_beam_widths = [1]
- else:
- eval_beam_widths = [int(bw) for bw in eval_beam_widths.split(",")]
- if 1 not in eval_beam_widths:
- eval_beam_widths.append(1)
- eval_beam_widths.sort()
- has_beam_widths = eval_beam_widths != [1]
- # Set up the base model and pipeline. If a base model is specified, load
- # the model and make sure the pipeline matches the pipeline setting. If
- # training starts from a blank model, intitalize the language class.
- pipeline = [p.strip() for p in pipeline.split(",")]
- disabled_pipes = None
- pipes_added = False
- msg.text("Training pipeline: {}".format(pipeline))
+def train(
+ config_path: Path,
+ output_path: Optional[Path] = None,
+ config_overrides: Dict[str, Any] = {},
+ use_gpu: int = -1,
+ resume_training: bool = False,
+) -> None:
if use_gpu >= 0:
- activated_gpu = None
- try:
- activated_gpu = set_gpu(use_gpu)
- except Exception as e:
- msg.warn("Exception: {}".format(e))
- if activated_gpu is not None:
- msg.text("Using GPU: {}".format(use_gpu))
- else:
- msg.warn("Unable to activate GPU: {}".format(use_gpu))
- msg.text("Using CPU only")
- use_gpu = -1
- base_components = []
- if base_model:
- msg.text("Starting with base model '{}'".format(base_model))
- nlp = util.load_model(base_model)
- if nlp.lang != lang:
- msg.fail(
- "Model language ('{}') doesn't match language specified as "
- "`lang` argument ('{}') ".format(nlp.lang, lang),
- exits=1,
- )
- for pipe in pipeline:
- pipe_cfg = {}
- if pipe == "parser":
- pipe_cfg = {"learn_tokens": learn_tokens}
- elif pipe == "textcat":
- pipe_cfg = {
- "exclusive_classes": not textcat_multilabel,
- "architecture": textcat_arch,
- "positive_label": textcat_positive_label,
- }
- if pipe not in nlp.pipe_names:
- msg.text("Adding component to base model '{}'".format(pipe))
- nlp.add_pipe(nlp.create_pipe(pipe, config=pipe_cfg))
- pipes_added = True
- elif replace_components:
- msg.text("Replacing component from base model '{}'".format(pipe))
- nlp.replace_pipe(pipe, nlp.create_pipe(pipe, config=pipe_cfg))
- pipes_added = True
- else:
- if pipe == "textcat":
- textcat_cfg = nlp.get_pipe("textcat").cfg
- base_cfg = {
- "exclusive_classes": textcat_cfg["exclusive_classes"],
- "architecture": textcat_cfg["architecture"],
- "positive_label": textcat_cfg["positive_label"],
- }
- if base_cfg != pipe_cfg:
- msg.fail(
- "The base textcat model configuration does"
- "not match the provided training options. "
- "Existing cfg: {}, provided cfg: {}".format(
- base_cfg, pipe_cfg
- ),
- exits=1,
- )
- msg.text("Extending component from base model '{}'".format(pipe))
- base_components.append(pipe)
- disabled_pipes = nlp.disable_pipes(
- [p for p in nlp.pipe_names if p not in pipeline]
- )
+ msg.info(f"Using GPU: {use_gpu}")
+ require_gpu(use_gpu)
else:
- msg.text("Starting with blank model '{}'".format(lang))
- lang_cls = util.get_lang_class(lang)
- nlp = lang_cls()
- for pipe in pipeline:
- if pipe == "parser":
- pipe_cfg = {"learn_tokens": learn_tokens}
- elif pipe == "textcat":
- pipe_cfg = {
- "exclusive_classes": not textcat_multilabel,
- "architecture": textcat_arch,
- "positive_label": textcat_positive_label,
- }
- else:
- pipe_cfg = {}
- nlp.add_pipe(nlp.create_pipe(pipe, config=pipe_cfg))
-
- # Replace tag map with provided mapping
- nlp.vocab.morphology.load_tag_map(tag_map)
-
- # Create empty extra lexeme tables so the data from spacy-lookups-data
- # isn't loaded if these features are accessed
- if omit_extra_lookups:
- nlp.vocab.lookups_extra = Lookups()
- nlp.vocab.lookups_extra.add_table("lexeme_cluster")
- nlp.vocab.lookups_extra.add_table("lexeme_prob")
- nlp.vocab.lookups_extra.add_table("lexeme_settings")
-
- if vectors:
- msg.text("Loading vector from model '{}'".format(vectors))
- _load_vectors(nlp, vectors)
-
- # Multitask objectives
- multitask_options = [("parser", parser_multitasks), ("ner", entity_multitasks)]
- for pipe_name, multitasks in multitask_options:
- if multitasks:
- if pipe_name not in pipeline:
- msg.fail(
- "Can't use multitask objective without '{}' in the "
- "pipeline".format(pipe_name)
- )
- pipe = nlp.get_pipe(pipe_name)
- for objective in multitasks.split(","):
- pipe.add_multitask_objective(objective)
-
- # Prepare training corpus
- msg.text("Counting training words (limit={})".format(n_examples))
- corpus = GoldCorpus(train_path, dev_path, limit=n_examples)
- n_train_words = corpus.count_train()
-
- if base_model and not pipes_added:
- # Start with an existing model, use default optimizer
- optimizer = nlp.resume_training(device=use_gpu)
- else:
- # Start with a blank model, call begin_training
- cfg = {"device": use_gpu}
- cfg["conv_depth"] = conv_depth
- cfg["token_vector_width"] = width
- cfg["bilstm_depth"] = bilstm_depth
- cfg["cnn_maxout_pieces"] = cnn_pieces
- cfg["embed_size"] = embed_rows
- cfg["conv_window"] = cnn_window
- cfg["subword_features"] = not use_chars
- optimizer = nlp.begin_training(lambda: corpus.train_tuples, **cfg)
-
- nlp._optimizer = None
-
- # Load in pretrained weights
- if init_tok2vec is not None:
- components = _load_pretrained_tok2vec(nlp, init_tok2vec, base_components)
- msg.text("Loaded pretrained tok2vec for: {}".format(components))
-
- # Verify textcat config
- if "textcat" in pipeline:
- textcat_labels = nlp.get_pipe("textcat").cfg.get("labels", [])
- if textcat_positive_label and textcat_positive_label not in textcat_labels:
- msg.fail(
- "The textcat_positive_label (tpl) '{}' does not match any "
- "label in the training data.".format(textcat_positive_label),
- exits=1,
- )
- if textcat_positive_label and len(textcat_labels) != 2:
- msg.fail(
- "A textcat_positive_label (tpl) '{}' was provided for training "
- "data that does not appear to be a binary classification "
- "problem with two labels.".format(textcat_positive_label),
- exits=1,
- )
- train_docs = corpus.train_docs(
- nlp,
- noise_level=noise_level,
- gold_preproc=gold_preproc,
- max_length=0,
- ignore_misaligned=True,
+ msg.info("Using CPU")
+ msg.info(f"Loading config and nlp from: {config_path}")
+ with show_validation_error(config_path):
+ config = util.load_config(
+ config_path, overrides=config_overrides, interpolate=True
)
- train_labels = set()
- if textcat_multilabel:
- multilabel_found = False
- for text, gold in train_docs:
- train_labels.update(gold.cats.keys())
- if list(gold.cats.values()).count(1.0) != 1:
- multilabel_found = True
- if not multilabel_found and not base_model:
- msg.warn(
- "The textcat training instances look like they have "
- "mutually-exclusive classes. Remove the flag "
- "'--textcat-multilabel' to train a classifier with "
- "mutually-exclusive classes."
- )
- if not textcat_multilabel:
- for text, gold in train_docs:
- train_labels.update(gold.cats.keys())
- if list(gold.cats.values()).count(1.0) != 1 and not base_model:
- msg.warn(
- "Some textcat training instances do not have exactly "
- "one positive label. Modifying training options to "
- "include the flag '--textcat-multilabel' for classes "
- "that are not mutually exclusive."
- )
- nlp.get_pipe("textcat").cfg["exclusive_classes"] = False
- textcat_multilabel = True
- break
- if base_model and set(textcat_labels) != train_labels:
+ if config.get("training", {}).get("seed") is not None:
+ fix_random_seed(config["training"]["seed"])
+ if config.get("system", {}).get("use_pytorch_for_gpu_memory"):
+ # It feels kind of weird to not have a default for this.
+ use_pytorch_for_gpu_memory()
+ # Use original config here before it's resolved to functions
+ sourced_components = get_sourced_components(config)
+ with show_validation_error(config_path):
+ nlp, config = util.load_model_from_config(config)
+ if config["training"]["vectors"] is not None:
+ util.load_vectors_into_model(nlp, config["training"]["vectors"])
+ verify_config(nlp)
+ raw_text, tag_map, morph_rules, weights_data = load_from_paths(config)
+ T_cfg = config["training"]
+ optimizer = T_cfg["optimizer"]
+ train_corpus = T_cfg["train_corpus"]
+ dev_corpus = T_cfg["dev_corpus"]
+ batcher = T_cfg["batcher"]
+ train_logger = T_cfg["logger"]
+ # Components that shouldn't be updated during training
+ frozen_components = T_cfg["frozen_components"]
+ # Sourced components that require resume_training
+ resume_components = [p for p in sourced_components if p not in frozen_components]
+ msg.info(f"Pipeline: {nlp.pipe_names}")
+ if resume_components:
+ with nlp.select_pipes(enable=resume_components):
+ msg.info(f"Resuming training for: {resume_components}")
+ nlp.resume_training(sgd=optimizer)
+ with nlp.select_pipes(disable=[*frozen_components, *resume_components]):
+ nlp.begin_training(lambda: train_corpus(nlp), sgd=optimizer)
+
+ if tag_map:
+ # Replace tag map with provided mapping
+ nlp.vocab.morphology.load_tag_map(tag_map)
+ if morph_rules:
+ # Load morph rules
+ nlp.vocab.morphology.load_morph_exceptions(morph_rules)
+
+ # Load pretrained tok2vec weights - cf. CLI command 'pretrain'
+ if weights_data is not None:
+ tok2vec_path = config["pretraining"].get("tok2vec_model", None)
+ if tok2vec_path is None:
msg.fail(
- "Cannot extend textcat model using data with different "
- "labels. Base model labels: {}, training data labels: "
- "{}.".format(textcat_labels, list(train_labels)),
+ f"To pretrained tok2vec weights, the config needs to specify which "
+ f"tok2vec layer to load in the setting [pretraining.tok2vec_model].",
exits=1,
)
- if textcat_multilabel:
- msg.text(
- "Textcat evaluation score: ROC AUC score macro-averaged across "
- "the labels '{}'".format(", ".join(textcat_labels))
- )
- elif textcat_positive_label and len(textcat_labels) == 2:
- msg.text(
- "Textcat evaluation score: F1-score for the "
- "label '{}'".format(textcat_positive_label)
- )
- elif len(textcat_labels) > 1:
- if len(textcat_labels) == 2:
- msg.warn(
- "If the textcat component is a binary classifier with "
- "exclusive classes, provide '--textcat-positive-label' for "
- "an evaluation on the positive class."
- )
- msg.text(
- "Textcat evaluation score: F1-score macro-averaged across "
- "the labels '{}'".format(", ".join(textcat_labels))
- )
- else:
- msg.fail(
- "Unsupported textcat configuration. Use `spacy debug-data` "
- "for more information."
- )
+ tok2vec = config
+ for subpath in tok2vec_path.split("."):
+ tok2vec = tok2vec.get(subpath)
+ if not tok2vec:
+ err = f"Could not locate the tok2vec model at {tok2vec_path}"
+ msg.fail(err, exits=1)
+ tok2vec.from_bytes(weights_data)
+
+ # Create iterator, which yields out info after each optimization step.
+ msg.info("Start training")
+ score_weights = T_cfg["score_weights"]
+ training_step_iterator = train_while_improving(
+ nlp,
+ optimizer,
+ create_train_batches(train_corpus(nlp), batcher, T_cfg["max_epochs"]),
+ create_evaluation_callback(nlp, dev_corpus, score_weights),
+ dropout=T_cfg["dropout"],
+ accumulate_gradient=T_cfg["accumulate_gradient"],
+ patience=T_cfg["patience"],
+ max_steps=T_cfg["max_steps"],
+ eval_frequency=T_cfg["eval_frequency"],
+ raw_text=None,
+ exclude=frozen_components,
+ )
+ msg.info(f"Training. Initial learn rate: {optimizer.learn_rate}")
+ print_row, finalize_logger = train_logger(nlp)
- # fmt: off
- row_head, output_stats = _configure_training_output(pipeline, use_gpu, has_beam_widths)
- row_widths = [len(w) for w in row_head]
- row_settings = {"widths": row_widths, "aligns": tuple(["r" for i in row_head]), "spacing": 2}
- # fmt: on
- print("")
- msg.row(row_head, **row_settings)
- msg.row(["-" * width for width in row_settings["widths"]], **row_settings)
try:
- iter_since_best = 0
- best_score = 0.0
- for i in range(n_iter):
- train_docs = corpus.train_docs(
- nlp,
- noise_level=noise_level,
- orth_variant_level=orth_variant_level,
- gold_preproc=gold_preproc,
- max_length=0,
- ignore_misaligned=True,
+ progress = tqdm.tqdm(total=T_cfg["eval_frequency"], leave=False)
+ progress.set_description(f"Epoch 1")
+ for batch, info, is_best_checkpoint in training_step_iterator:
+ progress.update(1)
+ if is_best_checkpoint is not None:
+ progress.close()
+ print_row(info)
+ if is_best_checkpoint and output_path is not None:
+ update_meta(T_cfg, nlp, info)
+ with nlp.use_params(optimizer.averages):
+ nlp.to_disk(output_path / "model-best")
+ progress = tqdm.tqdm(total=T_cfg["eval_frequency"], leave=False)
+ progress.set_description(f"Epoch {info['epoch']}")
+ except Exception as e:
+ finalize_logger()
+ if output_path is not None:
+ # We don't want to swallow the traceback if we don't have a
+ # specific error.
+ msg.warn(
+ f"Aborting and saving the final best model. "
+ f"Encountered exception: {str(e)}"
+ )
+ nlp.to_disk(output_path / "model-final")
+ raise e
+ finally:
+ finalize_logger()
+ if output_path is not None:
+ final_model_path = output_path / "model-final"
+ if optimizer.averages:
+ with nlp.use_params(optimizer.averages):
+ nlp.to_disk(final_model_path)
+ else:
+ nlp.to_disk(final_model_path)
+ msg.good(f"Saved pipeline to output directory {final_model_path}")
+
+
+def create_train_batches(iterator, batcher, max_epochs: int):
+ epoch = 0
+ examples = list(iterator)
+ if not examples:
+ # Raise error if no data
+ raise ValueError(Errors.E986)
+ while max_epochs < 1 or epoch != max_epochs:
+ random.shuffle(examples)
+ for batch in batcher(examples):
+ yield epoch, batch
+ epoch += 1
+
+
+def create_evaluation_callback(
+ nlp: Language, dev_corpus: Callable, weights: Dict[str, float]
+) -> Callable[[], Tuple[float, Dict[str, float]]]:
+ def evaluate() -> Tuple[float, Dict[str, float]]:
+ dev_examples = list(dev_corpus(nlp))
+ scores = nlp.evaluate(dev_examples)
+ # Calculate a weighted sum based on score_weights for the main score
+ try:
+ weighted_score = sum(
+ scores.get(s, 0.0) * weights.get(s, 0.0) for s in weights
+ )
+ except KeyError as e:
+ keys = list(scores.keys())
+ err = Errors.E983.format(dict="score_weights", key=str(e), keys=keys)
+ raise KeyError(err) from None
+ return weighted_score, scores
+
+ return evaluate
+
+
+def train_while_improving(
+ nlp: Language,
+ optimizer: Optimizer,
+ train_data,
+ evaluate,
+ *,
+ dropout: float,
+ eval_frequency: int,
+ accumulate_gradient: int,
+ patience: int,
+ max_steps: int,
+ raw_text: List[Dict[str, str]],
+ exclude: List[str],
+):
+ """Train until an evaluation stops improving. Works as a generator,
+ with each iteration yielding a tuple `(batch, info, is_best_checkpoint)`,
+ where info is a dict, and is_best_checkpoint is in [True, False, None] --
+ None indicating that the iteration was not evaluated as a checkpoint.
+ The evaluation is conducted by calling the evaluate callback.
+
+ Positional arguments:
+ nlp: The spaCy pipeline to evaluate.
+ optimizer: The optimizer callable.
+ train_data (Iterable[Batch]): A generator of batches, with the training
+ data. Each batch should be a Sized[Tuple[Input, Annot]]. The training
+ data iterable needs to take care of iterating over the epochs and
+ shuffling.
+ evaluate (Callable[[], Tuple[float, Any]]): A callback to perform evaluation.
+ The callback should take no arguments and return a tuple
+ `(main_score, other_scores)`. The main_score should be a float where
+ higher is better. other_scores can be any object.
+
+ Every iteration, the function yields out a tuple with:
+
+ * batch: A list of Example objects.
+ * info: A dict with various information about the last update (see below).
+ * is_best_checkpoint: A value in None, False, True, indicating whether this
+ was the best evaluation so far. You should use this to save the model
+ checkpoints during training. If None, evaluation was not conducted on
+ that iteration. False means evaluation was conducted, but a previous
+ evaluation was better.
+
+ The info dict provides the following information:
+
+ epoch (int): How many passes over the data have been completed.
+ step (int): How many steps have been completed.
+ score (float): The main score from the last evaluation.
+ other_scores: : The other scores from the last evaluation.
+ losses: The accumulated losses throughout training.
+ checkpoints: A list of previous results, where each result is a
+ (score, step, epoch) tuple.
+ """
+ if isinstance(dropout, float):
+ dropouts = thinc.schedules.constant(dropout)
+ else:
+ dropouts = dropout
+ results = []
+ losses = {}
+ if raw_text:
+ random.shuffle(raw_text)
+ raw_examples = [
+ Example.from_dict(nlp.make_doc(rt["text"]), {}) for rt in raw_text
+ ]
+ raw_batches = util.minibatch(raw_examples, size=8)
+
+ for step, (epoch, batch) in enumerate(train_data):
+ dropout = next(dropouts)
+ for subbatch in subdivide_batch(batch, accumulate_gradient):
+ nlp.update(
+ subbatch, drop=dropout, losses=losses, sgd=False, exclude=exclude
)
if raw_text:
- random.shuffle(raw_text)
- raw_batches = util.minibatch(
- (nlp.make_doc(rt["text"]) for rt in raw_text), size=8
- )
- words_seen = 0
- with tqdm.tqdm(total=n_train_words, leave=False) as pbar:
- losses = {}
- for batch in util.minibatch_by_words(train_docs, size=batch_sizes):
- if not batch:
- continue
- docs, golds = zip(*batch)
- try:
- nlp.update(
- docs,
- golds,
- sgd=optimizer,
- drop=next(dropout_rates),
- losses=losses,
- )
- except ValueError as e:
- err = "Error during training"
- if init_tok2vec:
- err += " Did you provide the same parameters during 'train' as during 'pretrain'?"
- msg.fail(err, "Original error message: {}".format(e), exits=1)
- if raw_text:
- # If raw text is available, perform 'rehearsal' updates,
- # which use unlabelled data to reduce overfitting.
- raw_batch = list(next(raw_batches))
- nlp.rehearse(raw_batch, sgd=optimizer, losses=losses)
- if not int(os.environ.get("LOG_FRIENDLY", 0)):
- pbar.update(sum(len(doc) for doc in docs))
- words_seen += sum(len(doc) for doc in docs)
- with nlp.use_params(optimizer.averages):
- util.set_env_log(False)
- epoch_model_path = output_path / ("model%d" % i)
- nlp.to_disk(epoch_model_path)
- nlp_loaded = util.load_model_from_path(epoch_model_path)
- for beam_width in eval_beam_widths:
- for name, component in nlp_loaded.pipeline:
- if hasattr(component, "cfg"):
- component.cfg["beam_width"] = beam_width
- dev_docs = list(
- corpus.dev_docs(
- nlp_loaded,
- gold_preproc=gold_preproc,
- ignore_misaligned=True,
- )
- )
- nwords = sum(len(doc_gold[0]) for doc_gold in dev_docs)
- start_time = timer()
- scorer = nlp_loaded.evaluate(dev_docs, verbose=verbose)
- end_time = timer()
- if use_gpu < 0:
- gpu_wps = None
- cpu_wps = nwords / (end_time - start_time)
- else:
- gpu_wps = nwords / (end_time - start_time)
- # Only evaluate on CPU in the first iteration (for
- # timing) if GPU is enabled
- if i == 0:
- with Model.use_device("cpu"):
- nlp_loaded = util.load_model_from_path(epoch_model_path)
- for name, component in nlp_loaded.pipeline:
- if hasattr(component, "cfg"):
- component.cfg["beam_width"] = beam_width
- dev_docs = list(
- corpus.dev_docs(
- nlp_loaded,
- gold_preproc=gold_preproc,
- ignore_misaligned=True,
- )
- )
- start_time = timer()
- scorer = nlp_loaded.evaluate(dev_docs, verbose=verbose)
- end_time = timer()
- cpu_wps = nwords / (end_time - start_time)
- acc_loc = output_path / ("model%d" % i) / "accuracy.json"
- srsly.write_json(acc_loc, scorer.scores)
-
- # Update model meta.json
- meta["lang"] = nlp.lang
- meta["pipeline"] = nlp.pipe_names
- meta["spacy_version"] = ">=%s" % about.__version__
- if beam_width == 1:
- meta["speed"] = {
- "nwords": nwords,
- "cpu": cpu_wps,
- "gpu": gpu_wps,
- }
- meta.setdefault("accuracy", {})
- for component in nlp.pipe_names:
- for metric in _get_metrics(component):
- meta["accuracy"][metric] = scorer.scores[metric]
- else:
- meta.setdefault("beam_accuracy", {})
- meta.setdefault("beam_speed", {})
- for component in nlp.pipe_names:
- for metric in _get_metrics(component):
- meta["beam_accuracy"][metric] = scorer.scores[metric]
- meta["beam_speed"][beam_width] = {
- "nwords": nwords,
- "cpu": cpu_wps,
- "gpu": gpu_wps,
- }
- meta["vectors"] = {
- "width": nlp.vocab.vectors_length,
- "vectors": len(nlp.vocab.vectors),
- "keys": nlp.vocab.vectors.n_keys,
- "name": nlp.vocab.vectors.name,
- }
- meta.setdefault("name", "model%d" % i)
- meta.setdefault("version", version)
- meta["labels"] = nlp.meta["labels"]
- meta_loc = output_path / ("model%d" % i) / "meta.json"
- srsly.write_json(meta_loc, meta)
- util.set_env_log(verbose)
-
- progress = _get_progress(
- i,
- losses,
- scorer.scores,
- output_stats,
- beam_width=beam_width if has_beam_widths else None,
- cpu_wps=cpu_wps,
- gpu_wps=gpu_wps,
- )
- if i == 0 and "textcat" in pipeline:
- textcats_per_cat = scorer.scores.get("textcats_per_cat", {})
- for cat, cat_score in textcats_per_cat.items():
- if cat_score.get("roc_auc_score", 0) < 0:
- msg.warn(
- "Textcat ROC AUC score is undefined due to "
- "only one value in label '{}'.".format(cat)
- )
- msg.row(progress, **row_settings)
- # Early stopping
- if n_early_stopping is not None:
- current_score = _score_for_model(meta)
- if current_score < best_score:
- iter_since_best += 1
- else:
- iter_since_best = 0
- best_score = current_score
- if iter_since_best >= n_early_stopping:
- iter_current = i + 1
- msg.text(
- "Early stopping, best iteration "
- "is: {}".format(iter_current - iter_since_best)
- )
- msg.text(
- "Best score = {}; Final iteration "
- "score = {}".format(best_score, current_score)
- )
- break
- except Exception as e:
- msg.warn(
- "Aborting and saving the final best model. "
- "Encountered exception: {}".format(e),
- exits=1,
- )
- finally:
- best_pipes = nlp.pipe_names
- if disabled_pipes:
- disabled_pipes.restore()
- with nlp.use_params(optimizer.averages):
- final_model_path = output_path / "model-final"
- nlp.to_disk(final_model_path)
- srsly.write_json(final_model_path / "meta.json", meta)
-
- meta_loc = output_path / "model-final" / "meta.json"
- final_meta = srsly.read_json(meta_loc)
- final_meta.setdefault("accuracy", {})
- final_meta["accuracy"].update(meta.get("accuracy", {}))
- final_meta.setdefault("speed", {})
- final_meta["speed"].setdefault("cpu", None)
- final_meta["speed"].setdefault("gpu", None)
- meta.setdefault("speed", {})
- meta["speed"].setdefault("cpu", None)
- meta["speed"].setdefault("gpu", None)
- # combine cpu and gpu speeds with the base model speeds
- if final_meta["speed"]["cpu"] and meta["speed"]["cpu"]:
- speed = _get_total_speed(
- [final_meta["speed"]["cpu"], meta["speed"]["cpu"]]
- )
- final_meta["speed"]["cpu"] = speed
- if final_meta["speed"]["gpu"] and meta["speed"]["gpu"]:
- speed = _get_total_speed(
- [final_meta["speed"]["gpu"], meta["speed"]["gpu"]]
- )
- final_meta["speed"]["gpu"] = speed
- # if there were no speeds to update, overwrite with meta
+ # If raw text is available, perform 'rehearsal' updates,
+ # which use unlabelled data to reduce overfitting.
+ raw_batch = list(next(raw_batches))
+ nlp.rehearse(raw_batch, sgd=optimizer, losses=losses, exclude=exclude)
+ # TODO: refactor this so we don't have to run it separately in here
+ for name, proc in nlp.pipeline:
if (
- final_meta["speed"]["cpu"] is None
- and final_meta["speed"]["gpu"] is None
+ name not in exclude
+ and hasattr(proc, "model")
+ and proc.model not in (True, False, None)
):
- final_meta["speed"].update(meta["speed"])
- # note: beam speeds are not combined with the base model
- if has_beam_widths:
- final_meta.setdefault("beam_accuracy", {})
- final_meta["beam_accuracy"].update(meta.get("beam_accuracy", {}))
- final_meta.setdefault("beam_speed", {})
- final_meta["beam_speed"].update(meta.get("beam_speed", {}))
- srsly.write_json(meta_loc, final_meta)
- msg.good("Saved model to output directory", final_model_path)
- with msg.loading("Creating best model..."):
- best_model_path = _collate_best_model(final_meta, output_path, best_pipes)
- msg.good("Created best model", best_model_path)
+ proc.model.finish_update(optimizer)
+ optimizer.step_schedules()
+ if not (step % eval_frequency):
+ if optimizer.averages:
+ with nlp.use_params(optimizer.averages):
+ score, other_scores = evaluate()
+ else:
+ score, other_scores = evaluate()
+ results.append((score, step))
+ is_best_checkpoint = score == max(results)[0]
+ else:
+ score, other_scores = (None, None)
+ is_best_checkpoint = None
+ info = {
+ "epoch": epoch,
+ "step": step,
+ "score": score,
+ "other_scores": other_scores,
+ "losses": losses,
+ "checkpoints": results,
+ }
+ yield batch, info, is_best_checkpoint
+ if is_best_checkpoint is not None:
+ losses = {}
+ # Stop if no improvement in `patience` updates (if specified)
+ best_score, best_step = max(results)
+ if patience and (step - best_step) >= patience:
+ break
+ # Stop if we've exhausted our max steps (if specified)
+ if max_steps and step >= max_steps:
+ break
-def _score_for_model(meta):
- """ Returns mean score between tasks in pipeline that can be used for early stopping. """
- mean_acc = list()
- pipes = meta["pipeline"]
- acc = meta["accuracy"]
- if "tagger" in pipes:
- mean_acc.append(acc["tags_acc"])
- if "parser" in pipes:
- mean_acc.append((acc["uas"] + acc["las"]) / 2)
- if "ner" in pipes:
- mean_acc.append((acc["ents_p"] + acc["ents_r"] + acc["ents_f"]) / 3)
- if "textcat" in pipes:
- mean_acc.append(acc["textcat_score"])
- return sum(mean_acc) / len(mean_acc)
+def subdivide_batch(batch, accumulate_gradient):
+ batch = list(batch)
+ batch.sort(key=lambda eg: len(eg.predicted))
+ sub_len = len(batch) // accumulate_gradient
+ start = 0
+ for i in range(accumulate_gradient):
+ subbatch = batch[start : start + sub_len]
+ if subbatch:
+ yield subbatch
+ start += len(subbatch)
+ subbatch = batch[start:]
+ if subbatch:
+ yield subbatch
-@contextlib.contextmanager
-def _create_progress_bar(total):
- if int(os.environ.get("LOG_FRIENDLY", 0)):
- yield
- else:
- pbar = tqdm.tqdm(total=total, leave=False)
- yield pbar
+def update_meta(
+ training: Union[Dict[str, Any], Config], nlp: Language, info: Dict[str, Any]
+) -> None:
+ nlp.meta["performance"] = {}
+ for metric in training["score_weights"]:
+ nlp.meta["performance"][metric] = info["other_scores"].get(metric, 0.0)
+ for pipe_name in nlp.pipe_names:
+ nlp.meta["performance"][f"{pipe_name}_loss"] = info["losses"][pipe_name]
-def _load_vectors(nlp, vectors):
- util.load_model(vectors, vocab=nlp.vocab)
+def load_from_paths(
+ config: Config,
+) -> Tuple[List[Dict[str, str]], Dict[str, dict], bytes]:
+ # TODO: separate checks from loading
+ raw_text = util.ensure_path(config["training"]["raw_text"])
+ if raw_text is not None:
+ if not raw_text.exists():
+ msg.fail("Can't find raw text", raw_text, exits=1)
+ raw_text = list(srsly.read_jsonl(config["training"]["raw_text"]))
+ tag_map = {}
+ morph_rules = {}
+ weights_data = None
+ init_tok2vec = util.ensure_path(config["training"]["init_tok2vec"])
+ if init_tok2vec is not None:
+ if not init_tok2vec.exists():
+ msg.fail("Can't find pretrained tok2vec", init_tok2vec, exits=1)
+ with init_tok2vec.open("rb") as file_:
+ weights_data = file_.read()
+ return raw_text, tag_map, morph_rules, weights_data
-def _load_pretrained_tok2vec(nlp, loc, base_components):
- """Load pretrained weights for the 'token-to-vector' part of the component
- models, which is typically a CNN. See 'spacy pretrain'. Experimental.
- """
- with loc.open("rb") as file_:
- weights_data = file_.read()
- loaded = []
- for name, component in nlp.pipeline:
- if hasattr(component, "model") and hasattr(component.model, "tok2vec"):
- if name in base_components:
- raise ValueError(Errors.E200.format(component=name))
- component.tok2vec.from_bytes(weights_data)
- loaded.append(name)
- return loaded
+def verify_cli_args(config_path: Path, output_path: Optional[Path] = None) -> None:
+ # Make sure all files and paths exists if they are needed
+ if not config_path or not config_path.exists():
+ msg.fail("Config file not found", config_path, exits=1)
+ if output_path is not None:
+ if not output_path.exists():
+ output_path.mkdir()
+ msg.good(f"Created output directory: {output_path}")
-def _collate_best_model(meta, output_path, components):
- bests = {}
- meta.setdefault("accuracy", {})
- for component in components:
- bests[component] = _find_best(output_path, component)
- best_dest = output_path / "model-best"
- shutil.copytree(path2str(output_path / "model-final"), path2str(best_dest))
- for component, best_component_src in bests.items():
- shutil.rmtree(path2str(best_dest / component))
- shutil.copytree(
- path2str(best_component_src / component), path2str(best_dest / component)
- )
- accs = srsly.read_json(best_component_src / "accuracy.json")
- for metric in _get_metrics(component):
- meta["accuracy"][metric] = accs[metric]
- srsly.write_json(best_dest / "meta.json", meta)
- return best_dest
+def verify_config(nlp: Language) -> None:
+ """Perform additional checks based on the config and loaded nlp object."""
+ # TODO: maybe we should validate based on the actual components, the list
+ # in config["nlp"]["pipeline"] instead?
+ for pipe_config in nlp.config["components"].values():
+ # We can't assume that the component name == the factory
+ factory = pipe_config["factory"]
+ if factory == "textcat":
+ verify_textcat_config(nlp, pipe_config)
-def _find_best(experiment_dir, component):
- accuracies = []
- for epoch_model in experiment_dir.iterdir():
- if epoch_model.is_dir() and epoch_model.parts[-1] != "model-final":
- accs = srsly.read_json(epoch_model / "accuracy.json")
- scores = [accs.get(metric, 0.0) for metric in _get_metrics(component)]
- # remove per_type dicts from score list for max() comparison
- scores = [score for score in scores if isinstance(score, float)]
- accuracies.append((scores, epoch_model))
- if accuracies:
- return max(accuracies)[1]
- else:
- return None
-
-
-def _get_metrics(component):
- if component == "parser":
- return ("las", "uas", "las_per_type", "token_acc")
- elif component == "tagger":
- return ("tags_acc", "token_acc")
- elif component == "ner":
- return ("ents_f", "ents_p", "ents_r", "ents_per_type", "token_acc")
- elif component == "textcat":
- return ("textcat_score", "token_acc")
- return ("token_acc",)
-
-
-def _configure_training_output(pipeline, use_gpu, has_beam_widths):
- row_head = ["Itn"]
- output_stats = []
- for pipe in pipeline:
- if pipe == "tagger":
- row_head.extend(["Tag Loss ", " Tag % "])
- output_stats.extend(["tag_loss", "tags_acc"])
- elif pipe == "parser":
- row_head.extend(["Dep Loss ", " UAS ", " LAS "])
- output_stats.extend(["dep_loss", "uas", "las"])
- elif pipe == "ner":
- row_head.extend(["NER Loss ", "NER P ", "NER R ", "NER F "])
- output_stats.extend(["ner_loss", "ents_p", "ents_r", "ents_f"])
- elif pipe == "textcat":
- row_head.extend(["Textcat Loss", "Textcat"])
- output_stats.extend(["textcat_loss", "textcat_score"])
- row_head.extend(["Token %", "CPU WPS"])
- output_stats.extend(["token_acc", "cpu_wps"])
-
- if use_gpu >= 0:
- row_head.extend(["GPU WPS"])
- output_stats.extend(["gpu_wps"])
-
- if has_beam_widths:
- row_head.insert(1, "Beam W.")
- return row_head, output_stats
-
-
-def _get_progress(
- itn, losses, dev_scores, output_stats, beam_width=None, cpu_wps=0.0, gpu_wps=0.0
-):
- scores = {}
- for stat in output_stats:
- scores[stat] = 0.0
- scores["dep_loss"] = losses.get("parser", 0.0)
- scores["ner_loss"] = losses.get("ner", 0.0)
- scores["tag_loss"] = losses.get("tagger", 0.0)
- scores["textcat_loss"] = losses.get("textcat", 0.0)
- scores["cpu_wps"] = cpu_wps
- scores["gpu_wps"] = gpu_wps or 0.0
- scores.update(dev_scores)
- formatted_scores = []
- for stat in output_stats:
- format_spec = "{:.3f}"
- if stat.endswith("_wps"):
- format_spec = "{:.0f}"
- formatted_scores.append(format_spec.format(scores[stat]))
- result = [itn + 1]
- result.extend(formatted_scores)
- if beam_width is not None:
- result.insert(1, beam_width)
- return result
-
-
-def _get_total_speed(speeds):
- seconds_per_word = 0.0
- for words_per_second in speeds:
- if words_per_second is None:
- return None
- seconds_per_word += 1.0 / words_per_second
- return 1.0 / seconds_per_word
+def verify_textcat_config(nlp: Language, pipe_config: Dict[str, Any]) -> None:
+ # if 'positive_label' is provided: double check whether it's in the data and
+ # the task is binary
+ if pipe_config.get("positive_label"):
+ textcat_labels = nlp.get_pipe("textcat").cfg.get("labels", [])
+ pos_label = pipe_config.get("positive_label")
+ if pos_label not in textcat_labels:
+ msg.fail(
+ f"The textcat's 'positive_label' config setting '{pos_label}' "
+ f"does not match any label in the training data.",
+ exits=1,
+ )
+ if len(textcat_labels) != 2:
+ msg.fail(
+ f"A textcat 'positive_label' '{pos_label}' was "
+ f"provided for training data that does not appear to be a "
+ f"binary classification problem with two labels.",
+ exits=1,
+ )
diff --git a/spacy/cli/validate.py b/spacy/cli/validate.py
index 93abad6f6..9a75ed6f3 100644
--- a/spacy/cli/validate.py
+++ b/spacy/cli/validate.py
@@ -1,148 +1,110 @@
-# coding: utf8
-from __future__ import unicode_literals, print_function
-
+from typing import Tuple
from pathlib import Path
import sys
import requests
-import srsly
-from wasabi import msg
+from wasabi import msg, Printer
-from ..compat import path2str
-from ..util import get_data_path
+from ._util import app
from .. import about
+from ..util import get_package_version, get_installed_models, get_base_version
+from ..util import get_package_path, get_model_meta, is_compatible_version
-def validate():
+@app.command("validate")
+def validate_cli():
"""
- Validate that the currently installed version of spaCy is compatible
- with the installed models. Should be run after `pip install -U spacy`.
+ Validate the currently installed pipeline packages and spaCy version. Checks
+ if the installed packages are compatible and shows upgrade instructions if
+ available. Should be run after `pip install -U spacy`.
+
+ DOCS: https://nightly.spacy.io/api/cli#validate
"""
+ validate()
+
+
+def validate() -> None:
+ model_pkgs, compat = get_model_pkgs()
+ spacy_version = get_base_version(about.__version__)
+ current_compat = compat.get(spacy_version, {})
+ if not current_compat:
+ msg.warn(f"No compatible packages found for v{spacy_version} of spaCy")
+ incompat_models = {d["name"] for _, d in model_pkgs.items() if not d["compat"]}
+ na_models = [m for m in incompat_models if m not in current_compat]
+ update_models = [m for m in incompat_models if m in current_compat]
+ spacy_dir = Path(__file__).parent.parent
+
+ msg.divider(f"Installed pipeline packages (spaCy v{about.__version__})")
+ msg.info(f"spaCy installation: {spacy_dir}")
+
+ if model_pkgs:
+ header = ("NAME", "SPACY", "VERSION", "")
+ rows = []
+ for name, data in model_pkgs.items():
+ if data["compat"]:
+ comp = msg.text("", color="green", icon="good", no_print=True)
+ version = msg.text(data["version"], color="green", no_print=True)
+ else:
+ version = msg.text(data["version"], color="red", no_print=True)
+ comp = f"--> {compat.get(data['name'], ['n/a'])[0]}"
+ rows.append((data["name"], data["spacy"], version, comp))
+ msg.table(rows, header=header)
+ else:
+ msg.text("No pipeline packages found in your current environment.", exits=0)
+ if update_models:
+ msg.divider("Install updates")
+ msg.text("Use the following commands to update the packages:")
+ cmd = "python -m spacy download {}"
+ print("\n".join([cmd.format(pkg) for pkg in update_models]) + "\n")
+ if na_models:
+ msg.info(
+ f"The following packages are custom spaCy pipelines or not "
+ f"available for spaCy v{about.__version__}:",
+ ", ".join(na_models),
+ )
+ if incompat_models:
+ sys.exit(1)
+
+
+def get_model_pkgs(silent: bool = False) -> Tuple[dict, dict]:
+ msg = Printer(no_print=silent, pretty=not silent)
with msg.loading("Loading compatibility table..."):
r = requests.get(about.__compatibility__)
if r.status_code != 200:
msg.fail(
- "Server error ({})".format(r.status_code),
+ f"Server error ({r.status_code})",
"Couldn't fetch compatibility table.",
exits=1,
)
msg.good("Loaded compatibility table")
compat = r.json()["spacy"]
- version = about.__version__
- version = version.rsplit(".dev", 1)[0]
- current_compat = compat.get(version)
- if not current_compat:
- msg.fail(
- "Can't find spaCy v{} in compatibility table".format(version),
- about.__compatibility__,
- exits=1,
- )
all_models = set()
+ installed_models = get_installed_models()
for spacy_v, models in dict(compat).items():
all_models.update(models.keys())
for model, model_vs in models.items():
compat[spacy_v][model] = [reformat_version(v) for v in model_vs]
- model_links = get_model_links(current_compat)
- model_pkgs = get_model_pkgs(current_compat, all_models)
- incompat_links = {l for l, d in model_links.items() if not d["compat"]}
- incompat_models = {d["name"] for _, d in model_pkgs.items() if not d["compat"]}
- incompat_models.update(
- [d["name"] for _, d in model_links.items() if not d["compat"]]
- )
- na_models = [m for m in incompat_models if m not in current_compat]
- update_models = [m for m in incompat_models if m in current_compat]
- spacy_dir = Path(__file__).parent.parent
-
- msg.divider("Installed models (spaCy v{})".format(about.__version__))
- msg.info("spaCy installation: {}".format(path2str(spacy_dir)))
-
- if model_links or model_pkgs:
- header = ("TYPE", "NAME", "MODEL", "VERSION", "")
- rows = []
- for name, data in model_pkgs.items():
- rows.append(get_model_row(current_compat, name, data, msg))
- for name, data in model_links.items():
- rows.append(get_model_row(current_compat, name, data, msg, "link"))
- msg.table(rows, header=header)
- else:
- msg.text("No models found in your current environment.", exits=0)
- if update_models:
- msg.divider("Install updates")
- msg.text("Use the following commands to update the model packages:")
- cmd = "python -m spacy download {}"
- print("\n".join([cmd.format(pkg) for pkg in update_models]) + "\n")
- if na_models:
- msg.text(
- "The following models are not available for spaCy "
- "v{}: {}".format(about.__version__, ", ".join(na_models))
- )
- if incompat_links:
- msg.text(
- "You may also want to overwrite the incompatible links using the "
- "`python -m spacy link` command with `--force`, or remove them "
- "from the data directory. "
- "Data path: {path}".format(path=path2str(get_data_path()))
- )
- if incompat_models or incompat_links:
- sys.exit(1)
-
-
-def get_model_links(compat):
- links = {}
- data_path = get_data_path()
- if data_path:
- models = [p for p in data_path.iterdir() if is_model_path(p)]
- for model in models:
- meta_path = Path(model) / "meta.json"
- if not meta_path.exists():
- continue
- meta = srsly.read_json(meta_path)
- link = model.parts[-1]
- name = meta["lang"] + "_" + meta["name"]
- links[link] = {
- "name": name,
- "version": meta["version"],
- "compat": is_compat(compat, name, meta["version"]),
- }
- return links
-
-
-def get_model_pkgs(compat, all_models):
- import pkg_resources
-
pkgs = {}
- for pkg_name, pkg_data in pkg_resources.working_set.by_key.items():
+ for pkg_name in installed_models:
package = pkg_name.replace("-", "_")
- if package in all_models:
- version = pkg_data.version
- pkgs[pkg_name] = {
- "name": package,
- "version": version,
- "compat": is_compat(compat, package, version),
- }
- return pkgs
+ version = get_package_version(pkg_name)
+ if package in compat:
+ is_compat = version in compat[package]
+ spacy_version = about.__version__
+ else:
+ model_path = get_package_path(package)
+ model_meta = get_model_meta(model_path)
+ spacy_version = model_meta.get("spacy_version", "n/a")
+ is_compat = is_compatible_version(about.__version__, spacy_version)
+ pkgs[pkg_name] = {
+ "name": package,
+ "version": version,
+ "spacy": spacy_version,
+ "compat": is_compat,
+ }
+ return pkgs, compat
-def get_model_row(compat, name, data, msg, model_type="package"):
- if data["compat"]:
- comp = msg.text("", color="green", icon="good", no_print=True)
- version = msg.text(data["version"], color="green", no_print=True)
- else:
- version = msg.text(data["version"], color="red", no_print=True)
- comp = "--> {}".format(compat.get(data["name"], ["n/a"])[0])
- return (model_type, name, data["name"], version, comp)
-
-
-def is_model_path(model_path):
- exclude = ["cache", "pycache", "__pycache__"]
- name = model_path.parts[-1]
- return model_path.is_dir() and name not in exclude and not name.startswith(".")
-
-
-def is_compat(compat, name, version):
- return name in compat and version in compat[name]
-
-
-def reformat_version(version):
+def reformat_version(version: str) -> str:
"""Hack to reformat old versions ending on '-alpha' to match pip format."""
if version.endswith("-alpha"):
return version.replace("-alpha", "a0")
diff --git a/spacy/compat.py b/spacy/compat.py
index 0ea31c6b3..2d51ff0ae 100644
--- a/spacy/compat.py
+++ b/spacy/compat.py
@@ -1,20 +1,6 @@
-# coding: utf8
-"""
-Helpers for Python and platform compatibility. To distinguish them from
-the builtin functions, replacement functions are suffixed with an underscore,
-e.g. `unicode_`.
-
-DOCS: https://spacy.io/api/top-level#compat
-"""
-from __future__ import unicode_literals
-
-import os
+"""Helpers for Python and platform compatibility."""
import sys
-import itertools
-import ast
-import types
-
-from thinc.neural.util import copy_array
+from thinc.util import copy_array
try:
import cPickle as pickle
@@ -36,146 +22,14 @@ try:
except ImportError:
cupy = None
-try:
- from thinc.neural.optimizers import Optimizer # noqa: F401
-except ImportError:
- from thinc.neural.optimizers import Adam as Optimizer # noqa: F401
+from thinc.api import Optimizer # noqa: F401
pickle = pickle
copy_reg = copy_reg
CudaStream = CudaStream
cupy = cupy
copy_array = copy_array
-izip = getattr(itertools, "izip", zip)
is_windows = sys.platform.startswith("win")
is_linux = sys.platform.startswith("linux")
is_osx = sys.platform == "darwin"
-
-# See: https://github.com/benjaminp/six/blob/master/six.py
-is_python2 = sys.version_info[0] == 2
-is_python3 = sys.version_info[0] == 3
-is_python_pre_3_5 = is_python2 or (is_python3 and sys.version_info[1] < 5)
-
-if is_python2:
- bytes_ = str
- unicode_ = unicode # noqa: F821
- basestring_ = basestring # noqa: F821
- input_ = raw_input # noqa: F821
- path2str = lambda path: str(path).decode("utf8")
- class_types = (type, types.ClassType)
-
-elif is_python3:
- bytes_ = bytes
- unicode_ = str
- basestring_ = str
- input_ = input
- path2str = lambda path: str(path)
- class_types = (type, types.ClassType) if is_python_pre_3_5 else type
-
-
-def b_to_str(b_str):
- """Convert a bytes object to a string.
-
- b_str (bytes): The object to convert.
- RETURNS (unicode): The converted string.
- """
- if is_python2:
- return b_str
- # Important: if no encoding is set, string becomes "b'...'"
- return str(b_str, encoding="utf8")
-
-
-def symlink_to(orig, dest):
- """Create a symlink. Used for model shortcut links.
-
- orig (unicode / Path): The origin path.
- dest (unicode / Path): The destination path of the symlink.
- """
- if is_windows:
- import subprocess
-
- subprocess.check_call(
- ["mklink", "/d", path2str(orig), path2str(dest)], shell=True
- )
- else:
- orig.symlink_to(dest)
-
-
-def symlink_remove(link):
- """Remove a symlink. Used for model shortcut links.
-
- link (unicode / Path): The path to the symlink.
- """
- # https://stackoverflow.com/q/26554135/6400719
- if os.path.isdir(path2str(link)) and is_windows:
- # this should only be on Py2.7 and windows
- os.rmdir(path2str(link))
- else:
- os.unlink(path2str(link))
-
-
-def is_config(python2=None, python3=None, windows=None, linux=None, osx=None):
- """Check if a specific configuration of Python version and operating system
- matches the user's setup. Mostly used to display targeted error messages.
-
- python2 (bool): spaCy is executed with Python 2.x.
- python3 (bool): spaCy is executed with Python 3.x.
- windows (bool): spaCy is executed on Windows.
- linux (bool): spaCy is executed on Linux.
- osx (bool): spaCy is executed on OS X or macOS.
- RETURNS (bool): Whether the configuration matches the user's platform.
-
- DOCS: https://spacy.io/api/top-level#compat.is_config
- """
- return (
- python2 in (None, is_python2)
- and python3 in (None, is_python3)
- and windows in (None, is_windows)
- and linux in (None, is_linux)
- and osx in (None, is_osx)
- )
-
-
-def import_file(name, loc):
- """Import module from a file. Used to load models from a directory.
-
- name (unicode): Name of module to load.
- loc (unicode / Path): Path to the file.
- RETURNS: The loaded module.
- """
- loc = path2str(loc)
- if is_python_pre_3_5:
- import imp
-
- return imp.load_source(name, loc)
- else:
- import importlib.util
-
- spec = importlib.util.spec_from_file_location(name, str(loc))
- module = importlib.util.module_from_spec(spec)
- spec.loader.exec_module(module)
- return module
-
-
-def unescape_unicode(string):
- """Python2.7's re module chokes when compiling patterns that have ranges
- between escaped unicode codepoints if the two codepoints are unrecognised
- in the unicode database. For instance:
-
- re.compile('[\\uAA77-\\uAA79]').findall("hello")
-
- Ends up matching every character (on Python 2). This problem doesn't occur
- if we're dealing with unicode literals.
- """
- if string is None:
- return string
- # We only want to unescape the unicode, so we first must protect the other
- # backslashes.
- string = string.replace("\\", "\\\\")
- # Now we remove that protection for the unicode.
- string = string.replace("\\\\u", "\\u")
- string = string.replace("\\\\U", "\\U")
- # Now we unescape by evaling the string with the AST. This can't execute
- # code -- it only does the representational level.
- return ast.literal_eval("u'''" + string + "'''")
diff --git a/spacy/default_config.cfg b/spacy/default_config.cfg
new file mode 100644
index 000000000..9507f0f0a
--- /dev/null
+++ b/spacy/default_config.cfg
@@ -0,0 +1,96 @@
+[paths]
+train = ""
+dev = ""
+raw = null
+init_tok2vec = null
+
+[system]
+seed = 0
+use_pytorch_for_gpu_memory = false
+
+[nlp]
+lang = null
+pipeline = []
+disabled = []
+load_vocab_data = true
+before_creation = null
+after_creation = null
+after_pipeline_creation = null
+
+[nlp.tokenizer]
+@tokenizers = "spacy.Tokenizer.v1"
+
+[components]
+
+# Training hyper-parameters and additional features.
+[training]
+seed = ${system.seed}
+dropout = 0.1
+accumulate_gradient = 1
+# Extra resources for transfer-learning or pseudo-rehearsal
+init_tok2vec = ${paths.init_tok2vec}
+raw_text = ${paths.raw}
+vectors = null
+# Controls early-stopping. 0 or -1 mean unlimited.
+patience = 1600
+max_epochs = 0
+max_steps = 20000
+eval_frequency = 200
+# Control how scores are printed and checkpoints are evaluated.
+score_weights = {}
+# Names of pipeline components that shouldn't be updated during training
+frozen_components = []
+
+[training.logger]
+@loggers = "spacy.ConsoleLogger.v1"
+
+[training.train_corpus]
+@readers = "spacy.Corpus.v1"
+path = ${paths.train}
+# Whether to train on sequences with 'gold standard' sentence boundaries
+# and tokens. If you set this to true, take care to ensure your run-time
+# data is passed in sentence-by-sentence via some prior preprocessing.
+gold_preproc = false
+# Limitations on training document length
+max_length = 2000
+# Limitation on number of training examples
+limit = 0
+
+[training.dev_corpus]
+@readers = "spacy.Corpus.v1"
+path = ${paths.dev}
+# Whether to train on sequences with 'gold standard' sentence boundaries
+# and tokens. If you set this to true, take care to ensure your run-time
+# data is passed in sentence-by-sentence via some prior preprocessing.
+gold_preproc = false
+# Limitations on training document length
+max_length = 2000
+# Limitation on number of training examples
+limit = 0
+
+[training.batcher]
+@batchers = "spacy.batch_by_words.v1"
+discard_oversize = false
+tolerance = 0.2
+
+[training.batcher.size]
+@schedules = "compounding.v1"
+start = 100
+stop = 1000
+compound = 1.001
+
+[training.optimizer]
+@optimizers = "Adam.v1"
+beta1 = 0.9
+beta2 = 0.999
+L2_is_weight_decay = true
+L2 = 0.01
+grad_clip = 1.0
+use_averages = false
+eps = 1e-8
+
+[training.optimizer.learn_rate]
+@schedules = "warmup_linear.v1"
+warmup_steps = 250
+total_steps = 20000
+initial_rate = 0.001
diff --git a/spacy/default_config_pretraining.cfg b/spacy/default_config_pretraining.cfg
new file mode 100644
index 000000000..7032eac03
--- /dev/null
+++ b/spacy/default_config_pretraining.cfg
@@ -0,0 +1,25 @@
+[pretraining]
+max_epochs = 1000
+min_length = 5
+max_length = 500
+dropout = 0.2
+n_save_every = null
+batch_size = 3000
+seed = ${system.seed}
+use_pytorch_for_gpu_memory = ${system.use_pytorch_for_gpu_memory}
+tok2vec_model = "components.tok2vec.model"
+
+[pretraining.objective]
+type = "characters"
+n_characters = 4
+
+[pretraining.optimizer]
+@optimizers = "Adam.v1"
+beta1 = 0.9
+beta2 = 0.999
+L2_is_weight_decay = true
+L2 = 0.01
+grad_clip = 1.0
+use_averages = true
+eps = 1e-8
+learn_rate = 0.001
diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py
index a0cccbbde..0e80c3b5f 100644
--- a/spacy/displacy/__init__.py
+++ b/spacy/displacy/__init__.py
@@ -1,17 +1,14 @@
-# coding: utf8
"""
spaCy's built in visualization suite for dependencies and named entities.
-DOCS: https://spacy.io/api/top-level#displacy
-USAGE: https://spacy.io/usage/visualizers
+DOCS: https://nightly.spacy.io/api/top-level#displacy
+USAGE: https://nightly.spacy.io/usage/visualizers
"""
-from __future__ import unicode_literals
-
+from typing import Union, Iterable, Optional, Dict, Any, Callable
import warnings
from .render import DependencyRenderer, EntityRenderer
from ..tokens import Doc, Span
-from ..compat import b_to_str
from ..errors import Errors, Warnings
from ..util import is_in_jupyter
@@ -21,21 +18,27 @@ RENDER_WRAPPER = None
def render(
- docs, style="dep", page=False, minify=False, jupyter=None, options={}, manual=False
-):
+ docs: Union[Iterable[Union[Doc, Span]], Doc, Span],
+ style: str = "dep",
+ page: bool = False,
+ minify: bool = False,
+ jupyter: Optional[bool] = None,
+ options: Dict[str, Any] = {},
+ manual: bool = False,
+) -> str:
"""Render displaCy visualisation.
- docs (list or Doc): Document(s) to visualise.
- style (unicode): Visualisation style, 'dep' or 'ent'.
+ docs (Union[Iterable[Doc], Doc]): Document(s) to visualise.
+ style (str): Visualisation style, 'dep' or 'ent'.
page (bool): Render markup as full HTML page.
minify (bool): Minify HTML markup.
jupyter (bool): Override Jupyter auto-detection.
options (dict): Visualiser-specific options, e.g. colors.
manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
- RETURNS (unicode): Rendered HTML markup.
+ RETURNS (str): Rendered HTML markup.
- DOCS: https://spacy.io/api/top-level#displacy.render
- USAGE: https://spacy.io/usage/visualizers
+ DOCS: https://nightly.spacy.io/api/top-level#displacy.render
+ USAGE: https://nightly.spacy.io/usage/visualizers
"""
factories = {
"dep": (DependencyRenderer, parse_deps),
@@ -48,8 +51,8 @@ def render(
docs = [obj if not isinstance(obj, Span) else obj.as_doc() for obj in docs]
if not all(isinstance(obj, (Doc, Span, dict)) for obj in docs):
raise ValueError(Errors.E096)
- renderer, converter = factories[style]
- renderer = renderer(options=options)
+ renderer_func, converter = factories[style]
+ renderer = renderer_func(options=options)
parsed = [converter(doc, options) for doc in docs] if not manual else docs
_html["parsed"] = renderer.render(parsed, page=page, minify=minify).strip()
html = _html["parsed"]
@@ -65,55 +68,53 @@ def render(
def serve(
- docs,
- style="dep",
- page=True,
- minify=False,
- options={},
- manual=False,
- port=5000,
- host="0.0.0.0",
-):
+ docs: Union[Iterable[Doc], Doc],
+ style: str = "dep",
+ page: bool = True,
+ minify: bool = False,
+ options: Dict[str, Any] = {},
+ manual: bool = False,
+ port: int = 5000,
+ host: str = "0.0.0.0",
+) -> None:
"""Serve displaCy visualisation.
docs (list or Doc): Document(s) to visualise.
- style (unicode): Visualisation style, 'dep' or 'ent'.
+ style (str): Visualisation style, 'dep' or 'ent'.
page (bool): Render markup as full HTML page.
minify (bool): Minify HTML markup.
options (dict): Visualiser-specific options, e.g. colors.
manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
port (int): Port to serve visualisation.
- host (unicode): Host to serve visualisation.
+ host (str): Host to serve visualisation.
- DOCS: https://spacy.io/api/top-level#displacy.serve
- USAGE: https://spacy.io/usage/visualizers
+ DOCS: https://nightly.spacy.io/api/top-level#displacy.serve
+ USAGE: https://nightly.spacy.io/usage/visualizers
"""
from wsgiref import simple_server
if is_in_jupyter():
warnings.warn(Warnings.W011)
-
render(docs, style=style, page=page, minify=minify, options=options, manual=manual)
httpd = simple_server.make_server(host, port, app)
- print("\nUsing the '{}' visualizer".format(style))
- print("Serving on http://{}:{} ...\n".format(host, port))
+ print(f"\nUsing the '{style}' visualizer")
+ print(f"Serving on http://{host}:{port} ...\n")
try:
httpd.serve_forever()
except KeyboardInterrupt:
- print("Shutting down server on port {}.".format(port))
+ print(f"Shutting down server on port {port}.")
finally:
httpd.server_close()
def app(environ, start_response):
- # Headers and status need to be bytes in Python 2, see #1227
- headers = [(b_to_str(b"Content-type"), b_to_str(b"text/html; charset=utf-8"))]
- start_response(b_to_str(b"200 OK"), headers)
+ headers = [("Content-type", "text/html; charset=utf-8")]
+ start_response("200 OK", headers)
res = _html["parsed"].encode(encoding="utf-8")
return [res]
-def parse_deps(orig_doc, options={}):
+def parse_deps(orig_doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
"""Generate dependency parse in {'words': [], 'arcs': []} format.
doc (Doc): Document do parse.
@@ -156,7 +157,6 @@ def parse_deps(orig_doc, options={}):
}
for w in doc
]
-
arcs = []
for word in doc:
if word.i < word.head.i:
@@ -175,7 +175,7 @@ def parse_deps(orig_doc, options={}):
return {"words": words, "arcs": arcs, "settings": get_doc_settings(orig_doc)}
-def parse_ents(doc, options={}):
+def parse_ents(doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
"""Generate named entities in [{start: i, end: i, label: 'label'}] format.
doc (Doc): Document do parse.
@@ -192,7 +192,7 @@ def parse_ents(doc, options={}):
return {"text": doc.text, "ents": ents, "title": title, "settings": settings}
-def set_render_wrapper(func):
+def set_render_wrapper(func: Callable[[str], str]) -> None:
"""Set an optional wrapper function that is called around the generated
HTML markup on displacy.render. This can be used to allow integration into
other platforms, similar to Jupyter Notebooks that require functions to be
@@ -209,7 +209,7 @@ def set_render_wrapper(func):
RENDER_WRAPPER = func
-def get_doc_settings(doc):
+def get_doc_settings(doc: Doc) -> Dict[str, Any]:
return {
"lang": doc.lang_,
"direction": doc.vocab.writing_system.get("direction", "ltr"),
diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py
index 431e02841..ba56beca3 100644
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@@ -1,30 +1,44 @@
-# coding: utf8
-from __future__ import unicode_literals
-
+from typing import Dict, Any, List, Optional, Union
import uuid
-from .templates import (
- TPL_DEP_SVG,
- TPL_DEP_WORDS,
- TPL_DEP_WORDS_LEMMA,
- TPL_DEP_ARCS,
- TPL_ENTS,
-)
+from .templates import TPL_DEP_SVG, TPL_DEP_WORDS, TPL_DEP_WORDS_LEMMA, TPL_DEP_ARCS
from .templates import TPL_ENT, TPL_ENT_RTL, TPL_FIGURE, TPL_TITLE, TPL_PAGE
+from .templates import TPL_ENTS
from ..util import minify_html, escape_html, registry
from ..errors import Errors
DEFAULT_LANG = "en"
DEFAULT_DIR = "ltr"
+DEFAULT_ENTITY_COLOR = "#ddd"
+DEFAULT_LABEL_COLORS = {
+ "ORG": "#7aecec",
+ "PRODUCT": "#bfeeb7",
+ "GPE": "#feca74",
+ "LOC": "#ff9561",
+ "PERSON": "#aa9cfc",
+ "NORP": "#c887fb",
+ "FACILITY": "#9cc9cc",
+ "EVENT": "#ffeb80",
+ "LAW": "#ff8197",
+ "LANGUAGE": "#ff8197",
+ "WORK_OF_ART": "#f0d0ff",
+ "DATE": "#bfe1d9",
+ "TIME": "#bfe1d9",
+ "MONEY": "#e4e7d2",
+ "QUANTITY": "#e4e7d2",
+ "ORDINAL": "#e4e7d2",
+ "CARDINAL": "#e4e7d2",
+ "PERCENT": "#e4e7d2",
+}
-class DependencyRenderer(object):
+class DependencyRenderer:
"""Render dependency parses as SVGs."""
style = "dep"
- def __init__(self, options={}):
+ def __init__(self, options: Dict[str, Any] = {}) -> None:
"""Initialise dependency renderer.
options (dict): Visualiser-specific options (compact, word_spacing,
@@ -44,13 +58,15 @@ class DependencyRenderer(object):
self.direction = DEFAULT_DIR
self.lang = DEFAULT_LANG
- def render(self, parsed, page=False, minify=False):
+ def render(
+ self, parsed: List[Dict[str, Any]], page: bool = False, minify: bool = False
+ ) -> str:
"""Render complete markup.
parsed (list): Dependency parses to render.
page (bool): Render parses wrapped as full HTML page.
minify (bool): Minify HTML markup.
- RETURNS (unicode): Rendered SVG or HTML markup.
+ RETURNS (str): Rendered SVG or HTML markup.
"""
# Create a random ID prefix to make sure parses don't receive the
# same ID, even if they're identical
@@ -61,7 +77,7 @@ class DependencyRenderer(object):
settings = p.get("settings", {})
self.direction = settings.get("direction", DEFAULT_DIR)
self.lang = settings.get("lang", DEFAULT_LANG)
- render_id = "{}-{}".format(id_prefix, i)
+ render_id = f"{id_prefix}-{i}"
svg = self.render_svg(render_id, p["words"], p["arcs"])
rendered.append(svg)
if page:
@@ -75,13 +91,18 @@ class DependencyRenderer(object):
return minify_html(markup)
return markup
- def render_svg(self, render_id, words, arcs):
+ def render_svg(
+ self,
+ render_id: Union[int, str],
+ words: List[Dict[str, Any]],
+ arcs: List[Dict[str, Any]],
+ ) -> str:
"""Render SVG.
- render_id (int): Unique ID, typically index of document.
+ render_id (Union[int, str]): Unique ID, typically index of document.
words (list): Individual words and their tags.
arcs (list): Individual arcs and their start, end, direction and label.
- RETURNS (unicode): Rendered SVG markup.
+ RETURNS (str): Rendered SVG markup.
"""
self.levels = self.get_levels(arcs)
self.highest_level = len(self.levels)
@@ -89,15 +110,15 @@ class DependencyRenderer(object):
self.width = self.offset_x + len(words) * self.distance
self.height = self.offset_y + 3 * self.word_spacing
self.id = render_id
- words = [
+ words_svg = [
self.render_word(w["text"], w["tag"], w.get("lemma", None), i)
for i, w in enumerate(words)
]
- arcs = [
+ arcs_svg = [
self.render_arrow(a["label"], a["start"], a["end"], a["dir"], i)
for i, a in enumerate(arcs)
]
- content = "".join(words) + "".join(arcs)
+ content = "".join(words_svg) + "".join(arcs_svg)
return TPL_DEP_SVG.format(
id=self.id,
width=self.width,
@@ -110,15 +131,13 @@ class DependencyRenderer(object):
lang=self.lang,
)
- def render_word(
- self, text, tag, lemma, i,
- ):
+ def render_word(self, text: str, tag: str, lemma: str, i: int) -> str:
"""Render individual word.
- text (unicode): Word text.
- tag (unicode): Part-of-speech tag.
+ text (str): Word text.
+ tag (str): Part-of-speech tag.
i (int): Unique ID, typically word index.
- RETURNS (unicode): Rendered SVG markup.
+ RETURNS (str): Rendered SVG markup.
"""
y = self.offset_y + self.word_spacing
x = self.offset_x + i * self.distance
@@ -131,15 +150,17 @@ class DependencyRenderer(object):
)
return TPL_DEP_WORDS.format(text=html_text, tag=tag, x=x, y=y)
- def render_arrow(self, label, start, end, direction, i):
+ def render_arrow(
+ self, label: str, start: int, end: int, direction: str, i: int
+ ) -> str:
"""Render individual arrow.
- label (unicode): Dependency label.
+ label (str): Dependency label.
start (int): Index of start word.
end (int): Index of end word.
- direction (unicode): Arrow direction, 'left' or 'right'.
+ direction (str): Arrow direction, 'left' or 'right'.
i (int): Unique ID, typically arrow index.
- RETURNS (unicode): Rendered SVG markup.
+ RETURNS (str): Rendered SVG markup.
"""
if start < 0 or end < 0:
error_args = dict(start=start, end=end, label=label, dir=direction)
@@ -175,48 +196,36 @@ class DependencyRenderer(object):
arc=arc,
)
- def get_arc(self, x_start, y, y_curve, x_end):
+ def get_arc(self, x_start: int, y: int, y_curve: int, x_end: int) -> str:
"""Render individual arc.
x_start (int): X-coordinate of arrow start point.
y (int): Y-coordinate of arrow start and end point.
y_curve (int): Y-corrdinate of Cubic Bézier y_curve point.
x_end (int): X-coordinate of arrow end point.
- RETURNS (unicode): Definition of the arc path ('d' attribute).
+ RETURNS (str): Definition of the arc path ('d' attribute).
"""
template = "M{x},{y} C{x},{c} {e},{c} {e},{y}"
if self.compact:
template = "M{x},{y} {x},{c} {e},{c} {e},{y}"
return template.format(x=x_start, y=y, c=y_curve, e=x_end)
- def get_arrowhead(self, direction, x, y, end):
+ def get_arrowhead(self, direction: str, x: int, y: int, end: int) -> str:
"""Render individual arrow head.
- direction (unicode): Arrow direction, 'left' or 'right'.
+ direction (str): Arrow direction, 'left' or 'right'.
x (int): X-coordinate of arrow start point.
y (int): Y-coordinate of arrow start and end point.
end (int): X-coordinate of arrow end point.
- RETURNS (unicode): Definition of the arrow head path ('d' attribute).
+ RETURNS (str): Definition of the arrow head path ('d' attribute).
"""
if direction == "left":
- pos1, pos2, pos3 = (x, x - self.arrow_width + 2, x + self.arrow_width - 2)
+ p1, p2, p3 = (x, x - self.arrow_width + 2, x + self.arrow_width - 2)
else:
- pos1, pos2, pos3 = (
- end,
- end + self.arrow_width - 2,
- end - self.arrow_width + 2,
- )
- arrowhead = (
- pos1,
- y + 2,
- pos2,
- y - self.arrow_width,
- pos3,
- y - self.arrow_width,
- )
- return "M{},{} L{},{} {},{}".format(*arrowhead)
+ p1, p2, p3 = (end, end + self.arrow_width - 2, end - self.arrow_width + 2)
+ return f"M{p1},{y + 2} L{p2},{y - self.arrow_width} {p3},{y - self.arrow_width}"
- def get_levels(self, arcs):
+ def get_levels(self, arcs: List[Dict[str, Any]]) -> List[int]:
"""Calculate available arc height "levels".
Used to calculate arrow heights dynamically and without wasting space.
@@ -227,46 +236,34 @@ class DependencyRenderer(object):
return sorted(list(levels))
-class EntityRenderer(object):
+class EntityRenderer:
"""Render named entities as HTML."""
style = "ent"
- def __init__(self, options={}):
+ def __init__(self, options: Dict[str, Any] = {}) -> None:
"""Initialise dependency renderer.
options (dict): Visualiser-specific options (colors, ents)
"""
- colors = {
- "ORG": "#7aecec",
- "PRODUCT": "#bfeeb7",
- "GPE": "#feca74",
- "LOC": "#ff9561",
- "PERSON": "#aa9cfc",
- "NORP": "#c887fb",
- "FACILITY": "#9cc9cc",
- "EVENT": "#ffeb80",
- "LAW": "#ff8197",
- "LANGUAGE": "#ff8197",
- "WORK_OF_ART": "#f0d0ff",
- "DATE": "#bfe1d9",
- "TIME": "#bfe1d9",
- "MONEY": "#e4e7d2",
- "QUANTITY": "#e4e7d2",
- "ORDINAL": "#e4e7d2",
- "CARDINAL": "#e4e7d2",
- "PERCENT": "#e4e7d2",
- }
+ colors = dict(DEFAULT_LABEL_COLORS)
user_colors = registry.displacy_colors.get_all()
for user_color in user_colors.values():
+ if callable(user_color):
+ # Since this comes from the function registry, we want to make
+ # sure we support functions that *return* a dict of colors
+ user_color = user_color()
+ if not isinstance(user_color, dict):
+ raise ValueError(Errors.E925.format(obj=type(user_color)))
colors.update(user_color)
colors.update(options.get("colors", {}))
- self.default_color = "#ddd"
- self.colors = colors
+ self.default_color = DEFAULT_ENTITY_COLOR
+ self.colors = {label.upper(): color for label, color in colors.items()}
self.ents = options.get("ents", None)
+ if self.ents is not None:
+ self.ents = [ent.upper() for ent in self.ents]
self.direction = DEFAULT_DIR
self.lang = DEFAULT_LANG
-
template = options.get("template")
if template:
self.ent_template = template
@@ -276,13 +273,15 @@ class EntityRenderer(object):
else:
self.ent_template = TPL_ENT
- def render(self, parsed, page=False, minify=False):
+ def render(
+ self, parsed: List[Dict[str, Any]], page: bool = False, minify: bool = False
+ ) -> str:
"""Render complete markup.
parsed (list): Dependency parses to render.
page (bool): Render parses wrapped as full HTML page.
minify (bool): Minify HTML markup.
- RETURNS (unicode): Rendered HTML markup.
+ RETURNS (str): Rendered HTML markup.
"""
rendered = []
for i, p in enumerate(parsed):
@@ -300,12 +299,14 @@ class EntityRenderer(object):
return minify_html(markup)
return markup
- def render_ents(self, text, spans, title):
+ def render_ents(
+ self, text: str, spans: List[Dict[str, Any]], title: Optional[str]
+ ) -> str:
"""Render entities in text.
- text (unicode): Original text.
+ text (str): Original text.
spans (list): Individual entity spans and their start, end and label.
- title (unicode or None): Document title set in Doc.user_data['title'].
+ title (str / None): Document title set in Doc.user_data['title'].
"""
markup = ""
offset = 0
diff --git a/spacy/displacy/templates.py b/spacy/displacy/templates.py
index f29eab86f..b9cbf717b 100644
--- a/spacy/displacy/templates.py
+++ b/spacy/displacy/templates.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
# Setting explicit height and max-width: none on the SVG is required for
# Jupyter to render it properly in a cell
@@ -55,14 +51,14 @@ TPL_ENTS = """
TPL_ENT = """
{text}
- {label}
+ {label}
"""
TPL_ENT_RTL = """
{text}
- {label}
+ {label}
"""
diff --git a/spacy/errors.py b/spacy/errors.py
index 7f9164694..8f0666753 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
def add_codes(err_cls):
"""Add error codes to string messages via class attribute names."""
@@ -19,24 +15,14 @@ def add_codes(err_cls):
# fmt: off
@add_codes
-class Warnings(object):
- W001 = ("As of spaCy v2.0, the keyword argument `path=` is deprecated. "
- "You can now call spacy.load with the path as its first argument, "
- "and the model's meta.json will be used to determine the language "
- "to load. For example:\nnlp = spacy.load('{path}')")
- W002 = ("Tokenizer.from_list is now deprecated. Create a new Doc object "
- "instead and pass in the strings as the `words` keyword argument, "
- "for example:\nfrom spacy.tokens import Doc\n"
- "doc = Doc(nlp.vocab, words=[...])")
- W003 = ("Positional arguments to Doc.merge are deprecated. Instead, use "
- "the keyword arguments, for example tag=, lemma= or ent_type=.")
+class Warnings:
W004 = ("No text fixing enabled. Run `pip install ftfy` to enable fixing "
"using ftfy.fix_text if necessary.")
W005 = ("Doc object not parsed. This means displaCy won't be able to "
"generate a dependency visualization for it. Make sure the Doc "
"was processed with a model that supports dependency parsing, and "
"not just a language class like `English()`. For more info, see "
- "the docs:\nhttps://spacy.io/usage/models")
+ "the docs:\nhttps://nightly.spacy.io/usage/models")
W006 = ("No entities to visualize found in Doc object. If this is "
"surprising to you, make sure the Doc was processed using a model "
"that supports named entity recognition, and check the `doc.ents` "
@@ -49,12 +35,6 @@ class Warnings(object):
"use context-sensitive tensors. You can always add your own word "
"vectors, or use one of the larger models instead if available.")
W008 = ("Evaluating {obj}.similarity based on empty vectors.")
- W009 = ("Custom factory '{name}' provided by entry points of another "
- "package overwrites built-in factory.")
- W010 = ("As of v2.1.0, the PhraseMatcher doesn't have a phrase length "
- "limit anymore, so the max_length argument is now deprecated. "
- "If you did not specify this parameter, make sure you call the "
- "constructor with named arguments instead of positional ones.")
W011 = ("It looks like you're calling displacy.serve from within a "
"Jupyter notebook or a similar environment. This likely means "
"you're already running a local web server, so there's no need to "
@@ -68,39 +48,15 @@ class Warnings(object):
"components are applied. To only create tokenized Doc objects, "
"try using `nlp.make_doc(text)` or process all texts as a stream "
"using `list(nlp.tokenizer.pipe(all_texts))`.")
- W013 = ("As of v2.1.0, {obj}.merge is deprecated. Please use the more "
- "efficient and less error-prone Doc.retokenize context manager "
- "instead.")
- W014 = ("As of v2.1.0, the `disable` keyword argument on the serialization "
- "methods is and should be replaced with `exclude`. This makes it "
- "consistent with the other serializable objects.")
- W015 = ("As of v2.1.0, the use of keyword arguments to exclude fields from "
- "being serialized or deserialized is deprecated. Please use the "
- "`exclude` argument instead. For example: exclude=['{arg}'].")
- W016 = ("The keyword argument `n_threads` is now deprecated. As of v2.2.2, "
- "the argument `n_process` controls parallel inference via "
- "multiprocessing.")
W017 = ("Alias '{alias}' already exists in the Knowledge Base.")
W018 = ("Entity '{entity}' already exists in the Knowledge Base - "
"ignoring the duplicate entry.")
- W019 = ("Changing vectors name from {old} to {new}, to avoid clash with "
- "previously loaded vectors. See Issue #3853.")
W020 = ("Unnamed vectors. This won't allow multiple vectors models to be "
"loaded. (Shape: {shape})")
W021 = ("Unexpected hash collision in PhraseMatcher. Matches may be "
"incorrect. Modify PhraseMatcher._terminal_hash to fix.")
- W022 = ("Training a new part-of-speech tagger using a model with no "
- "lemmatization rules or data. This means that the trained model "
- "may not be able to lemmatize correctly. If this is intentional "
- "or the language you're using doesn't have lemmatization data, "
- "please ignore this warning. If this is surprising, make sure you "
- "have the spacy-lookups-data package installed.")
- W023 = ("Multiprocessing of Language.pipe is not supported in Python 2. "
- "'n_process' will be set to 1.")
W024 = ("Entity '{entity}' - Alias '{alias}' combination already exists in "
"the Knowledge Base.")
- W025 = ("'{name}' requires '{attr}' to be assigned, but none of the "
- "previous components in the pipeline declare that they assign it.")
W026 = ("Unable to set all sentence boundaries from dependency parses.")
W027 = ("Found a large training file of {size} bytes. Note that it may "
"be more efficient to split your training data into multiple "
@@ -108,70 +64,97 @@ class Warnings(object):
W028 = ("Doc.from_array was called with a vector of type '{type}', "
"but is expecting one of type 'uint64' instead. This may result "
"in problems with the vocab further on in the pipeline.")
- W029 = ("Unable to align tokens with entities from character offsets. "
- "Discarding entity annotation for the text: {text}.")
W030 = ("Some entities could not be aligned in the text \"{text}\" with "
"entities \"{entities}\". Use "
"`spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)`"
- " to check the alignment. Misaligned entities (with BILUO tag '-') "
- "will be ignored during training.")
- W031 = ("Model '{model}' ({model_version}) requires spaCy {version} and "
- "is incompatible with the current spaCy version ({current}). This "
- "may lead to unexpected results or runtime errors. To resolve "
- "this, download a newer compatible model or retrain your custom "
- "model with the current spaCy version. For more details and "
- "available updates, run: python -m spacy validate")
- W032 = ("Unable to determine model compatibility for model '{model}' "
- "({model_version}) with the current spaCy version ({current}). "
- "This may lead to unexpected results or runtime errors. To resolve "
- "this, download a newer compatible model or retrain your custom "
- "model with the current spaCy version. For more details and "
- "available updates, run: python -m spacy validate")
- W033 = ("Training a new {model} using a model with an empty lexeme "
- "normalization table. This may degrade the performance to some "
- "degree. If this is intentional or this language doesn't have a "
- "normalization table, please ignore this warning.")
+ " to check the alignment. Misaligned entities ('-') will be "
+ "ignored during training.")
+ W033 = ("Training a new {model} using a model with no lexeme normalization "
+ "table. This may degrade the performance of the model to some "
+ "degree. If this is intentional or the language you're using "
+ "doesn't have a normalization table, please ignore this warning. "
+ "If this is surprising, make sure you have the spacy-lookups-data "
+ "package installed. The languages with lexeme normalization tables "
+ "are currently: {langs}")
W034 = ("Please install the package spacy-lookups-data in order to include "
"the default lexeme normalization table for the language '{lang}'.")
W035 = ('Discarding subpattern "{pattern}" due to an unrecognized '
"attribute or operator.")
+ # TODO: fix numbering after merging develop into master
+ W090 = ("Could not locate any binary .spacy files in path '{path}'.")
+ W091 = ("Could not clean/remove the temp directory at {dir}: {msg}.")
+ W092 = ("Ignoring annotations for sentence starts, as dependency heads are set.")
+ W093 = ("Could not find any data to train the {name} on. Is your "
+ "input data correctly formatted?")
+ W094 = ("Model '{model}' ({model_version}) specifies an under-constrained "
+ "spaCy version requirement: {version}. This can lead to compatibility "
+ "problems with older versions, or as new spaCy versions are "
+ "released, because the model may say it's compatible when it's "
+ 'not. Consider changing the "spacy_version" in your meta.json to a '
+ "version range, with a lower and upper pin. For example: {example}")
+ W095 = ("Model '{model}' ({model_version}) requires spaCy {version} and is "
+ "incompatible with the current version ({current}). This may lead "
+ "to unexpected results or runtime errors. To resolve this, "
+ "download a newer compatible model or retrain your custom model "
+ "with the current spaCy version. For more details and available "
+ "updates, run: python -m spacy validate")
+ W096 = ("The method 'disable_pipes' has become deprecated - use 'select_pipes' "
+ "instead.")
+ W097 = ("No Model config was provided to create the '{name}' component, "
+ "and no default configuration could be found either.")
+ W098 = ("No Model config was provided to create the '{name}' component, "
+ "so a default configuration was used.")
+ W099 = ("Expected 'dict' type for the 'model' argument of pipe '{pipe}', "
+ "but got '{type}' instead, so ignoring it.")
+ W100 = ("Skipping unsupported morphological feature(s): '{feature}'. "
+ "Provide features as a dict {{\"Field1\": \"Value1,Value2\"}} or "
+ "string \"Field1=Value1,Value2|Field2=Value3\".")
+ W101 = ("Skipping `Doc` custom extension '{name}' while merging docs.")
+ W102 = ("Skipping unsupported user data '{key}: {value}' while merging docs.")
+ W103 = ("Unknown {lang} word segmenter '{segmenter}'. Supported "
+ "word segmenters: {supported}. Defaulting to {default}.")
+ W104 = ("Skipping modifications for '{target}' segmenter. The current "
+ "segmenter is '{current}'.")
+ W105 = ("As of spaCy v3.0, the {matcher}.pipe method is deprecated. If you "
+ "need to match on a stream of documents, you can use nlp.pipe and "
+ "call the {matcher} on each Doc object.")
+
@add_codes
-class Errors(object):
+class Errors:
E001 = ("No component '{name}' found in pipeline. Available names: {opts}")
- E002 = ("Can't find factory for '{name}'. This usually happens when spaCy "
- "calls `nlp.create_pipe` with a component name that's not built "
- "in - for example, when constructing the pipeline from a model's "
- "meta.json. If you're using a custom component, you can write to "
- "`Language.factories['{name}']` or remove it from the model meta "
- "and add it via `nlp.add_pipe` instead.")
+ E002 = ("Can't find factory for '{name}' for language {lang} ({lang_code}). "
+ "This usually happens when spaCy calls nlp.{method} with custom "
+ "component name that's not registered on the current language class. "
+ "If you're using a custom component, make sure you've added the "
+ "decorator @Language.component (for function components) or "
+ "@Language.factory (for class components).\n\nAvailable "
+ "factories: {opts}")
E003 = ("Not a valid pipeline component. Expected callable, but "
- "got {component} (name: '{name}').")
- E004 = ("If you meant to add a built-in component, use `create_pipe`: "
- "`nlp.add_pipe(nlp.create_pipe('{component}'))`")
+ "got {component} (name: '{name}'). If you're using a custom "
+ "component factory, double-check that it correctly returns your "
+ "initialized component.")
+ E004 = ("Can't set up pipeline component: a factory for '{name}' already "
+ "exists. Existing factory: {func}. New factory: {new_func}")
E005 = ("Pipeline component '{name}' returned None. If you're using a "
"custom component, maybe you forgot to return the processed Doc?")
- E006 = ("Invalid constraints. You can only set one of the following: "
- "before, after, first, last.")
+ E006 = ("Invalid constraints for adding pipeline component. You can only "
+ "set one of the following: before (component name or index), "
+ "after (component name or index), first (True) or last (True). "
+ "Invalid configuration: {args}. Existing components: {opts}")
E007 = ("'{name}' already exists in pipeline. Existing names: {opts}")
- E008 = ("Some current components would be lost when restoring previous "
- "pipeline state. If you added components after calling "
- "`nlp.disable_pipes()`, you should remove them explicitly with "
- "`nlp.remove_pipe()` before the pipeline is restored. Names of "
- "the new components: {names}")
- E009 = ("The `update` method expects same number of docs and golds, but "
- "got: {n_docs} docs, {n_golds} golds.")
+ E008 = ("Can't restore disabled pipeline component '{name}' because it "
+ "doesn't exist in the pipeline anymore. If you want to remove "
+ "components from the pipeline, you should do it before calling "
+ "`nlp.select_pipes()` or after restoring the disabled components.")
E010 = ("Word vectors set to length 0. This may be because you don't have "
"a model installed or loaded, or because your model doesn't "
"include word vectors. For more info, see the docs:\n"
- "https://spacy.io/usage/models")
+ "https://nightly.spacy.io/usage/models")
E011 = ("Unknown operator: '{op}'. Options: {opts}")
E012 = ("Cannot add pattern for zero tokens to matcher.\nKey: {key}")
- E013 = ("Error selecting action in matcher")
E014 = ("Unknown tag ID: {tag}")
- E015 = ("Conflicting morphology exception for ({tag}, {orth}). Use "
- "`force=True` to overwrite.")
E016 = ("MultitaskObjective target should be function or one of: dep, "
"tag, ent, dep_tag_offset, ent_tag.")
E017 = ("Can only add unicode or bytes. Got type: {value_type}")
@@ -179,21 +162,8 @@ class Errors(object):
"refers to an issue with the `Vocab` or `StringStore`.")
E019 = ("Can't create transition with unknown action ID: {action}. Action "
"IDs are enumerated in spacy/syntax/{src}.pyx.")
- E020 = ("Could not find a gold-standard action to supervise the "
- "dependency parser. The tree is non-projective (i.e. it has "
- "crossing arcs - see spacy/syntax/nonproj.pyx for definitions). "
- "The ArcEager transition system only supports projective trees. "
- "To learn non-projective representations, transform the data "
- "before training and after parsing. Either pass "
- "`make_projective=True` to the GoldParse class, or use "
- "spacy.syntax.nonproj.preprocess_training_data.")
- E021 = ("Could not find a gold-standard action to supervise the "
- "dependency parser. The GoldParse was projective. The transition "
- "system has {n_actions} actions. State at failure: {state}")
E022 = ("Could not find a transition with the name '{name}' in the NER "
"model.")
- E023 = ("Error cleaning up beam: The same state occurred twice at "
- "memory address {addr} and position {i}.")
E024 = ("Could not find an optimal move to supervise the parser. Usually, "
"this means that the model can't be updated in a way that's valid "
"and satisfies the correct annotations specified in the GoldParse. "
@@ -201,9 +171,9 @@ class Errors(object):
"training a named entity recognizer, also make sure that none of "
"your annotated entity spans have leading or trailing whitespace "
"or punctuation. "
- "You can also use the experimental `debug-data` command to "
+ "You can also use the experimental `debug data` command to "
"validate your JSON-formatted training data. For details, run:\n"
- "python -m spacy debug-data --help")
+ "python -m spacy debug data --help")
E025 = ("String is too long: {length} characters. Max is 2**30.")
E026 = ("Error accessing token at position {i}: out of bounds in Doc of "
"length {length}.")
@@ -215,10 +185,10 @@ class Errors(object):
"list of (unicode, bool) tuples. Got bytes instance: {value}")
E029 = ("noun_chunks requires the dependency parse, which requires a "
"statistical model to be installed and loaded. For more info, see "
- "the documentation:\nhttps://spacy.io/usage/models")
+ "the documentation:\nhttps://nightly.spacy.io/usage/models")
E030 = ("Sentence boundaries unset. You can add the 'sentencizer' "
"component to the pipeline with: "
- "nlp.add_pipe(nlp.create_pipe('sentencizer')) "
+ "nlp.add_pipe('sentencizer'). "
"Alternatively, add the dependency parser, or set sentence "
"boundaries by setting doc[i].is_sent_start.")
E031 = ("Invalid token: empty string ('') at position {i}.")
@@ -228,16 +198,12 @@ class Errors(object):
"the HEAD attribute would potentially override the sentence "
"boundaries set by SENT_START.")
E033 = ("Cannot load into non-empty Doc of length {length}.")
- E034 = ("Doc.merge received {n_args} non-keyword arguments. Expected "
- "either 3 arguments (deprecated), or 0 (use keyword arguments).\n"
- "Arguments supplied:\n{args}\nKeyword arguments:{kwargs}")
E035 = ("Error creating span with start {start} and end {end} for Doc of "
"length {length}.")
E036 = ("Error calculating span: Can't find a token starting at character "
"offset {start}.")
E037 = ("Error calculating span: Can't find a token ending at character "
"offset {end}.")
- E038 = ("Error finding sentence for span. Infinite loop detected.")
E039 = ("Array bounds exceeded while searching for root word. This likely "
"means the parse tree is in an invalid state. Please report this "
"issue here: http://github.com/explosion/spaCy/issues")
@@ -254,15 +220,10 @@ class Errors(object):
E047 = ("Can't assign a value to unregistered extension attribute "
"'{name}'. Did you forget to call the `set_extension` method?")
E048 = ("Can't import language {lang} from spacy.lang: {err}")
- E049 = ("Can't find spaCy data directory: '{path}'. Check your "
- "installation and permissions, or use spacy.util.set_data_path "
- "to customise the location if necessary.")
- E050 = ("Can't find model '{name}'. It doesn't seem to be a shortcut "
- "link, a Python package or a valid path to a data directory.")
- E051 = ("Cant' load '{name}'. If you're using a shortcut link, make sure "
- "it points to a valid package (not just a data directory).")
+ E050 = ("Can't find model '{name}'. It doesn't seem to be a Python "
+ "package or a valid path to a data directory.")
E052 = ("Can't find model directory: {path}")
- E053 = ("Could not read meta.json from {path}")
+ E053 = ("Could not read {name} from {path}")
E054 = ("No valid '{setting}' setting found in model meta.json.")
E055 = ("Invalid ORTH value in exception:\nKey: {key}\nOrths: {orths}")
E056 = ("Invalid tokenizer exception: ORTH values combined don't match "
@@ -273,8 +234,6 @@ class Errors(object):
E059 = ("One (and only one) keyword arg must be set. Got: {kwargs}")
E060 = ("Cannot add new key to vectors: the table is full. Current shape: "
"({rows}, {cols}).")
- E061 = ("Bad file name: {filename}. Example of a valid file name: "
- "'vectors.128.f.bin'")
E062 = ("Cannot find empty bit for new lexical flag. All bits between 0 "
"and 63 are occupied. You can replace one by specifying the "
"`flag_id` explicitly, e.g. "
@@ -288,39 +247,17 @@ class Errors(object):
"Query string: {string}\nOrth cached: {orth}\nOrth ID: {orth_id}")
E065 = ("Only one of the vector table's width and shape can be specified. "
"Got width {width} and shape {shape}.")
- E066 = ("Error creating model helper for extracting columns. Can only "
- "extract columns by positive integer. Got: {value}.")
E067 = ("Invalid BILUO tag sequence: Got a tag starting with 'I' (inside "
"an entity) without a preceding 'B' (beginning of an entity). "
"Tag sequence:\n{tags}")
E068 = ("Invalid BILUO tag: '{tag}'.")
- E069 = ("Invalid gold-standard parse tree. Found cycle between word "
- "IDs: {cycle} (tokens: {cycle_tokens}) in the document starting "
- "with tokens: {doc_tokens}.")
- E070 = ("Invalid gold-standard data. Number of documents ({n_docs}) "
- "does not align with number of annotations ({n_annots}).")
E071 = ("Error creating lexeme: specified orth ID ({orth}) does not "
"match the one in the vocab ({vocab_orth}).")
- E072 = ("Error serializing lexeme: expected data length {length}, "
- "got {bad_length}.")
E073 = ("Cannot assign vector of length {new_length}. Existing vectors "
"are of length {length}. You can use `vocab.reset_vectors` to "
"clear the existing vectors and resize the table.")
E074 = ("Error interpreting compiled match pattern: patterns are expected "
"to end with the attribute {attr}. Got: {bad_attr}.")
- E075 = ("Error accepting match: length ({length}) > maximum length "
- "({max_len}).")
- E076 = ("Error setting tensor on Doc: tensor has {rows} rows, while Doc "
- "has {words} words.")
- E077 = ("Error computing {value}: number of Docs ({n_docs}) does not "
- "equal number of GoldParse objects ({n_golds}) in batch.")
- E078 = ("Error computing score: number of words in Doc ({words_doc}) does "
- "not equal number of words in GoldParse ({words_gold}).")
- E079 = ("Error computing states in beam: number of predicted beams "
- "({pbeams}) does not equal number of gold beams ({gbeams}).")
- E080 = ("Duplicate state found in beam: {key}.")
- E081 = ("Error getting gradient in beam: number of histories ({n_hist}) "
- "does not equal number of losses ({losses}).")
E082 = ("Error deprojectivizing parse: number of heads ({n_heads}), "
"projective heads ({n_proj_heads}) and labels ({n_labels}) do not "
"match.")
@@ -328,8 +265,6 @@ class Errors(object):
"`getter` (plus optional `setter`) is allowed. Got: {nr_defined}")
E084 = ("Error assigning label ID {label} to span: not in StringStore.")
E085 = ("Can't create lexeme for string '{string}'.")
- E086 = ("Error deserializing lexeme '{string}': orth ID {orth_id} does "
- "not match hash {hash_id} in StringStore.")
E087 = ("Unknown displaCy style: {style}.")
E088 = ("Text of length {length} exceeds maximum of {max_length}. The "
"v2.x parser and NER models require roughly 1GB of temporary "
@@ -345,10 +280,6 @@ class Errors(object):
"existing extension, set `force=True` on `{obj}.set_extension`.")
E091 = ("Invalid extension attribute {name}: expected callable or None, "
"but got: {value}")
- E092 = ("Could not find or assign name for word vectors. Ususally, the "
- "name is read from the model's meta.json in vector.name. "
- "Alternatively, it is built from the 'lang' and 'name' keys in "
- "the meta.json. Vector names are required to avoid issue #1660.")
E093 = ("token.ent_iob values make invalid sequence: I without B\n{seq}")
E094 = ("Error reading line {line_num} in vectors file {loc}.")
E095 = ("Can't write to frozen dictionary. This is likely an internal "
@@ -367,21 +298,15 @@ class Errors(object):
E102 = ("Can't merge non-disjoint spans. '{token}' is already part of "
"tokens to merge. If you want to find the longest non-overlapping "
"spans, you can use the util.filter_spans helper:\n"
- "https://spacy.io/api/top-level#util.filter_spans")
+ "https://nightly.spacy.io/api/top-level#util.filter_spans")
E103 = ("Trying to set conflicting doc.ents: '{span1}' and '{span2}'. A "
"token can only be part of one entity, so make sure the entities "
"you're setting don't overlap.")
- E104 = ("Can't find JSON schema for '{name}'.")
- E105 = ("The Doc.print_tree() method is now deprecated. Please use "
- "Doc.to_json() instead or write your own function.")
E106 = ("Can't find doc._.{attr} attribute specified in the underscore "
"settings: {opts}")
E107 = ("Value of doc._.{attr} is not JSON-serializable: {value}")
- E108 = ("As of spaCy v2.1, the pipe name `sbd` has been deprecated "
- "in favor of the pipe name `sentencizer`, which does the same "
- "thing. For example, use `nlp.create_pipeline('sentencizer')`")
- E109 = ("Model for component '{name}' not initialized. Did you forget to "
- "load a model, or forget to call begin_training()?")
+ E109 = ("Component '{name}' could not be run. Did you forget to "
+ "call begin_training()?")
E110 = ("Invalid displaCy render wrapper. Expected callable, got: {obj}")
E111 = ("Pickling a token is not supported, because tokens are only views "
"of the parent Doc and can't exist on their own. A pickled token "
@@ -394,8 +319,6 @@ class Errors(object):
"practically no advantage over pickling the parent Doc directly. "
"So instead of pickling the span, pickle the Doc it belongs to or "
"use Span.as_doc to convert the span to a standalone Doc object.")
- E113 = ("The newly split token can only have one root (head = 0).")
- E114 = ("The newly split token needs to have a root (head = 0).")
E115 = ("All subtokens must have associated heads.")
E116 = ("Cannot currently add labels to pretrained text classifier. Add "
"labels before training begins. This functionality was available "
@@ -418,16 +341,9 @@ class Errors(object):
"equal to span length ({span_len}).")
E122 = ("Cannot find token to be split. Did it get merged?")
E123 = ("Cannot find head of token to be split. Did it get merged?")
- E124 = ("Cannot read from file: {path}. Supported formats: {formats}")
E125 = ("Unexpected value: {value}")
E126 = ("Unexpected matcher predicate: '{bad}'. Expected one of: {good}. "
"This is likely a bug in spaCy, so feel free to open an issue.")
- E127 = ("Cannot create phrase pattern representation for length 0. This "
- "is likely a bug in spaCy.")
- E128 = ("Unsupported serialization argument: '{arg}'. The use of keyword "
- "arguments to exclude fields from being serialized or deserialized "
- "is now deprecated. Please use the `exclude` argument instead. "
- "For example: exclude=['{arg}'].")
E129 = ("Cannot write the label of an existing Span object because a Span "
"is a read-only view of the underlying Token objects stored in the "
"Doc. Instead, create a new Span object and specify the `label` "
@@ -449,29 +365,21 @@ class Errors(object):
E133 = ("The sum of prior probabilities for alias '{alias}' should not "
"exceed 1, but found {sum}.")
E134 = ("Entity '{entity}' is not defined in the Knowledge Base.")
- E135 = ("If you meant to replace a built-in component, use `create_pipe`: "
- "`nlp.replace_pipe('{name}', nlp.create_pipe('{name}'))`")
- E136 = ("This additional feature requires the jsonschema library to be "
- "installed:\npip install jsonschema")
E137 = ("Expected 'dict' type, but got '{type}' from '{line}'. Make sure "
"to provide a valid JSON object as input with either the `text` "
"or `tokens` key. For more info, see the docs:\n"
- "https://spacy.io/api/cli#pretrain-jsonl")
+ "https://nightly.spacy.io/api/cli#pretrain-jsonl")
E138 = ("Invalid JSONL format for raw text '{text}'. Make sure the input "
"includes either the `text` or `tokens` key. For more info, see "
- "the docs:\nhttps://spacy.io/api/cli#pretrain-jsonl")
- E139 = ("Knowledge Base for component '{name}' not initialized. Did you "
- "forget to call set_kb()?")
+ "the docs:\nhttps://nightly.spacy.io/api/cli#pretrain-jsonl")
+ E139 = ("Knowledge Base for component '{name}' is empty. Use the methods "
+ "kb.add_entity and kb.add_alias to add entries.")
E140 = ("The list of entities, prior probabilities and entity vectors "
"should be of equal length.")
E141 = ("Entity vectors should be of length {required} instead of the "
"provided {found}.")
- E142 = ("Unsupported loss_function '{loss_func}'. Use either 'L2' or "
- "'cosine'.")
E143 = ("Labels for component '{name}' not initialized. Did you forget to "
"call add_label()?")
- E144 = ("Could not find parameter `{param}` when building the entity "
- "linker model.")
E145 = ("Error reading `{param}` from input file.")
E146 = ("Could not access `{path}`.")
E147 = ("Unexpected error in the {method} functionality of the "
@@ -483,8 +391,6 @@ class Errors(object):
"the component matches the model being loaded.")
E150 = ("The language of the `nlp` object and the `vocab` should be the "
"same, but found '{nlp}' and '{vocab}' respectively.")
- E151 = ("Trying to call nlp.update without required annotation types. "
- "Expected top-level keys: {exp}. Got: {unexp}.")
E152 = ("The attribute {attr} is not supported for token patterns. "
"Please use the option validate=True with Matcher, PhraseMatcher, "
"or EntityRuler for more details.")
@@ -521,27 +427,14 @@ class Errors(object):
"that case.")
E166 = ("Can only merge DocBins with the same pre-defined attributes.\n"
"Current DocBin: {current}\nOther DocBin: {other}")
- E167 = ("Unknown morphological feature: '{feat}' ({feat_id}). This can "
- "happen if the tagger was trained with a different set of "
- "morphological features. If you're using a pretrained model, make "
- "sure that your models are up to date:\npython -m spacy validate")
- E168 = ("Unknown field: {field}")
E169 = ("Can't find module: {module}")
E170 = ("Cannot apply transition {name}: invalid for the current state.")
- E171 = ("Matcher.add received invalid on_match callback argument: expected "
+ E171 = ("Matcher.add received invalid 'on_match' callback argument: expected "
"callable or None, but got: {arg_type}")
- E172 = ("The Lemmatizer.load classmethod is deprecated. To create a "
- "Lemmatizer, initialize the class directly. See the docs for "
- "details: https://spacy.io/api/lemmatizer")
- E173 = ("As of v2.2, the Lemmatizer is initialized with an instance of "
- "Lookups containing the lemmatization tables. See the docs for "
- "details: https://spacy.io/api/lemmatizer#init")
- E174 = ("Architecture '{name}' not found in registry. Available "
- "names: {names}")
E175 = ("Can't remove rule for unknown match pattern ID: {key}")
E176 = ("Alias '{alias}' is not defined in the Knowledge Base.")
E177 = ("Ill-formed IOB input detected: {tag}")
- E178 = ("Invalid pattern. Expected list of dicts but got: {pat}. Maybe you "
+ E178 = ("Each pattern should be a list of dicts, but got: {pat}. Maybe you "
"accidentally passed a single pattern to Matcher.add instead of a "
"list of patterns? If you only want to add one pattern, make sure "
"to wrap it in a list. For example: matcher.add('{key}', [pattern])")
@@ -565,9 +458,6 @@ class Errors(object):
"{obj}.{attr}\nAttribute '{attr}' does not exist on {obj}.")
E186 = ("'{tok_a}' and '{tok_b}' are different texts.")
E187 = ("Only unicode strings are supported as labels.")
- E188 = ("Could not match the gold entity links to entities in the doc - "
- "make sure the gold EL data refers to valid results of the "
- "named entity recognizer in the `nlp` pipeline.")
E189 = ("Each argument to `get_doc` should be of equal length.")
E190 = ("Token head out of range in `Doc.from_array()` for token index "
"'{index}' with value '{value}' (equivalent to relative head "
@@ -592,17 +482,201 @@ class Errors(object):
"can not be combined with adding a pretrained Tok2Vec layer.")
E201 = ("Span index out of range.")
+ # TODO: fix numbering after merging develop into master
+ E925 = ("Invalid color values for displaCy visualizer: expected dictionary "
+ "mapping label names to colors but got: {obj}")
+ E926 = ("It looks like you're trying to modify nlp.{attr} directly. This "
+ "doesn't work because it's an immutable computed property. If you "
+ "need to modify the pipeline, use the built-in methods like "
+ "nlp.add_pipe, nlp.remove_pipe, nlp.disable_pipe or nlp.enable_pipe "
+ "instead.")
+ E927 = ("Can't write to frozen list Maybe you're trying to modify a computed "
+ "property or default function argument?")
+ E928 = ("A 'KnowledgeBase' should be written to / read from a file, but the "
+ "provided argument {loc} is an existing directory.")
+ E929 = ("A 'KnowledgeBase' could not be read from {loc} - the path does "
+ "not seem to exist.")
+ E930 = ("Received invalid get_examples callback in {name}.begin_training. "
+ "Expected function that returns an iterable of Example objects but "
+ "got: {obj}")
+ E931 = ("Encountered Pipe subclass without Pipe.{method} method in component "
+ "'{name}'. If the component is trainable and you want to use this "
+ "method, make sure it's overwritten on the subclass. If your "
+ "component isn't trainable, add a method that does nothing or "
+ "don't use the Pipe base class.")
+ E940 = ("Found NaN values in scores.")
+ E941 = ("Can't find model '{name}'. It looks like you're trying to load a "
+ "model from a shortcut, which is deprecated as of spaCy v3.0. To "
+ "load the model, use its full name instead:\n\n"
+ "nlp = spacy.load(\"{full}\")\n\nFor more details on the available "
+ "models, see the models directory: https://spacy.io/models. If you "
+ "want to create a blank model, use spacy.blank: "
+ "nlp = spacy.blank(\"{name}\")")
+ E942 = ("Executing after_{name} callback failed. Expected the function to "
+ "return an initialized nlp object but got: {value}. Maybe "
+ "you forgot to return the modified object in your function?")
+ E943 = ("Executing before_creation callback failed. Expected the function to "
+ "return an uninitialized Language subclass but got: {value}. Maybe "
+ "you forgot to return the modified object in your function or "
+ "returned the initialized nlp object instead?")
+ E944 = ("Can't copy pipeline component '{name}' from source model '{model}': "
+ "not found in pipeline. Available components: {opts}")
+ E945 = ("Can't copy pipeline component '{name}' from source. Expected loaded "
+ "nlp object, but got: {source}")
+ E947 = ("Matcher.add received invalid 'greedy' argument: expected "
+ "a string value from {expected} but got: '{arg}'")
+ E948 = ("Matcher.add received invalid 'patterns' argument: expected "
+ "a List, but got: {arg_type}")
+ E949 = ("Can only create an alignment when the texts are the same.")
+ E952 = ("The section '{name}' is not a valid section in the provided config.")
+ E953 = ("Mismatched IDs received by the Tok2Vec listener: {id1} vs. {id2}")
+ E954 = ("The Tok2Vec listener did not receive a valid input.")
+ E955 = ("Can't find table(s) '{table}' for language '{lang}' in spacy-lookups-data.")
+ E956 = ("Can't find component '{name}' in [components] block in the config. "
+ "Available components: {opts}")
+ E957 = ("Writing directly to Language.factories isn't needed anymore in "
+ "spaCy v3. Instead, you can use the @Language.factory decorator "
+ "to register your custom component factory or @Language.component "
+ "to register a simple stateless function component that just takes "
+ "a Doc and returns it.")
+ E958 = ("Language code defined in config ({bad_lang_code}) does not match "
+ "language code of current Language subclass {lang} ({lang_code})")
+ E959 = ("Can't insert component {dir} index {idx}. Existing components: {opts}")
+ E960 = ("No config data found for component '{name}'. This is likely a bug "
+ "in spaCy.")
+ E961 = ("Found non-serializable Python object in config. Configs should "
+ "only include values that can be serialized to JSON. If you need "
+ "to pass models or other objects to your component, use a reference "
+ "to a registered function or initialize the object in your "
+ "component.\n\n{config}")
+ E962 = ("Received incorrect {style} for pipe '{name}'. Expected dict, "
+ "got: {cfg_type}.")
+ E963 = ("Can't read component info from @Language.{decorator} decorator. "
+ "Maybe you forgot to call it? Make sure you're using "
+ "@Language.{decorator}() instead of @Language.{decorator}.")
+ E964 = ("The pipeline component factory for '{name}' needs to have the "
+ "following named arguments, which are passed in by spaCy:\n- nlp: "
+ "receives the current nlp object and lets you access the vocab\n- "
+ "name: the name of the component instance, can be used to identify "
+ "the component, output losses etc.")
+ E965 = ("It looks like you're using the @Language.component decorator to "
+ "register '{name}' on a class instead of a function component. If "
+ "you need to register a class or function that *returns* a component "
+ "function, use the @Language.factory decorator instead.")
+ E966 = ("nlp.add_pipe now takes the string name of the registered component "
+ "factory, not a callable component. Expected string, but got "
+ "{component} (name: '{name}').\n\n- If you created your component "
+ "with nlp.create_pipe('name'): remove nlp.create_pipe and call "
+ "nlp.add_pipe('name') instead.\n\n- If you passed in a component "
+ "like TextCategorizer(): call nlp.add_pipe with the string name "
+ "instead, e.g. nlp.add_pipe('textcat').\n\n- If you're using a custom "
+ "component: Add the decorator @Language.component (for function "
+ "components) or @Language.factory (for class components / factories) "
+ "to your custom component and assign it a name, e.g. "
+ "@Language.component('your_name'). You can then run "
+ "nlp.add_pipe('your_name') to add it to the pipeline.")
+ E967 = ("No {meta} meta information found for '{name}'. This is likely a bug in spaCy.")
+ E968 = ("nlp.replace_pipe now takes the string name of the registered component "
+ "factory, not a callable component. Expected string, but got "
+ "{component}.\n\n- If you created your component with"
+ "with nlp.create_pipe('name'): remove nlp.create_pipe and call "
+ "nlp.replace_pipe('{name}', 'name') instead.\n\n- If you passed in a "
+ "component like TextCategorizer(): call nlp.replace_pipe with the "
+ "string name instead, e.g. nlp.replace_pipe('{name}', 'textcat').\n\n"
+ "- If you're using a custom component: Add the decorator "
+ "@Language.component (for function components) or @Language.factory "
+ "(for class components / factories) to your custom component and "
+ "assign it a name, e.g. @Language.component('your_name'). You can "
+ "then run nlp.replace_pipe('{name}', 'your_name').")
+ E969 = ("Expected string values for field '{field}', but received {types} instead. ")
+ E970 = ("Can not execute command '{str_command}'. Do you have '{tool}' installed?")
+ E971 = ("Found incompatible lengths in Doc.from_array: {array_length} for the "
+ "array and {doc_length} for the Doc itself.")
+ E972 = ("Example.__init__ got None for '{arg}'. Requires Doc.")
+ E973 = ("Unexpected type for NER data")
+ E974 = ("Unknown {obj} attribute: {key}")
+ E976 = ("The method 'Example.from_dict' expects a {type} as {n} argument, "
+ "but received None.")
+ E977 = ("Can not compare a MorphAnalysis with a string object. "
+ "This is likely a bug in spaCy, so feel free to open an issue.")
+ E978 = ("The {name} method takes a list of Example objects, but got: {types}")
+ E979 = ("Cannot convert {type} to an Example object.")
+ E980 = ("Each link annotation should refer to a dictionary with at most one "
+ "identifier mapping to 1.0, and all others to 0.0.")
+ E981 = ("The offsets of the annotations for 'links' could not be aligned "
+ "to token boundaries.")
+ E982 = ("The 'ent_iob' attribute of a Token should be an integer indexing "
+ "into {values}, but found {value}.")
+ E983 = ("Invalid key for '{dict}': {key}. Available keys: "
+ "{keys}")
+ E984 = ("Invalid component config for '{name}': component block needs either "
+ "a key 'factory' specifying the registered function used to "
+ "initialize the component, or a key 'source' key specifying a "
+ "spaCy model to copy the component from. For example, factory = "
+ "\"ner\" will use the 'ner' factory and all other settings in the "
+ "block will be passed to it as arguments. Alternatively, source = "
+ "\"en_core_web_sm\" will copy the component from that model.\n\n{config}")
+ E985 = ("Can't load model from config file: no 'nlp' section found.\n\n{config}")
+ E986 = ("Could not create any training batches: check your input. "
+ "Are the train and dev paths defined? "
+ "Is 'discard_oversize' set appropriately? ")
+ E987 = ("The text of an example training instance is either a Doc or "
+ "a string, but found {type} instead.")
+ E988 = ("Could not parse any training examples. Ensure the data is "
+ "formatted correctly.")
+ E989 = ("'nlp.update()' was called with two positional arguments. This "
+ "may be due to a backwards-incompatible change to the format "
+ "of the training data in spaCy 3.0 onwards. The 'update' "
+ "function should now be called with a batch of 'Example' "
+ "objects, instead of (text, annotation) tuples. ")
+ E991 = ("The function 'select_pipes' should be called with either a "
+ "'disable' argument to list the names of the pipe components "
+ "that should be disabled, or with an 'enable' argument that "
+ "specifies which pipes should not be disabled.")
+ E992 = ("The function `select_pipes` was called with `enable`={enable} "
+ "and `disable`={disable} but that information is conflicting "
+ "for the `nlp` pipeline with components {names}.")
+ E993 = ("The config for 'nlp' needs to include a key 'lang' specifying "
+ "the code of the language to initialize it with (for example "
+ "'en' for English) - this can't be 'None'.\n\n{config}")
+ E996 = ("Could not parse {file}: {msg}")
+ E997 = ("Tokenizer special cases are not allowed to modify the text. "
+ "This would map '{chunk}' to '{orth}' given token attributes "
+ "'{token_attrs}'.")
+ E999 = ("Unable to merge the `Doc` objects because they do not all share "
+ "the same `Vocab`.")
+ E1000 = ("No pkuseg model available. Provide a pkuseg model when "
+ "initializing the pipeline:\n"
+ 'cfg = {"tokenizer": {"segmenter": "pkuseg", "pkuseg_model": name_or_path}}\n'
+ 'nlp = Chinese(config=cfg)')
+ E1001 = ("Target token outside of matched span for match with tokens "
+ "'{span}' and offset '{index}' matched by patterns '{patterns}'.")
+ E1002 = ("Span index out of range.")
+ E1003 = ("Unsupported lemmatizer mode '{mode}'.")
+ E1004 = ("Missing lemmatizer table(s) found for lemmatizer mode '{mode}'. "
+ "Required tables '{tables}', found '{found}'. If you are not "
+ "providing custom lookups, make sure you have the package "
+ "spacy-lookups-data installed.")
+ E1005 = ("Unable to set attribute '{attr}' in tokenizer exception for "
+ "'{chunk}'. Tokenizer exceptions are only allowed to specify "
+ "`ORTH` and `NORM`.")
+ E1006 = ("Unable to initialize {name} model with 0 labels.")
+
@add_codes
-class TempErrors(object):
+class TempErrors:
T003 = ("Resizing pretrained Tagger models is not currently supported.")
- T004 = ("Currently parser depth is hard-coded to 1. Received: {value}.")
T007 = ("Can't yet set {attr} from Span. Vote for this feature on the "
"issue tracker: http://github.com/explosion/spaCy/issues")
- T008 = ("Bad configuration of Tagger. This is probably a bug within "
- "spaCy. We changed the name of an internal attribute for loading "
- "pretrained vectors, and the class has been passed the old name "
- "(pretrained_dims) but not the new name (pretrained_vectors).")
+
+
+# Deprecated model shortcuts, only used in errors and warnings
+OLD_MODEL_SHORTCUTS = {
+ "en": "en_core_web_sm", "de": "de_core_news_sm", "es": "es_core_news_sm",
+ "pt": "pt_core_news_sm", "fr": "fr_core_news_sm", "it": "it_core_news_sm",
+ "nl": "nl_core_news_sm", "el": "el_core_news_sm", "nb": "nb_core_news_sm",
+ "lt": "lt_core_news_sm", "xx": "xx_ent_wiki_sm"
+}
# fmt: on
@@ -612,16 +686,12 @@ class MatchPatternError(ValueError):
def __init__(self, key, errors):
"""Custom error for validating match patterns.
- key (unicode): The name of the matcher rule.
+ key (str): The name of the matcher rule.
errors (dict): Validation errors (sequence of strings) mapped to pattern
ID, i.e. the index of the added pattern.
"""
- msg = "Invalid token patterns for matcher rule '{}'\n".format(key)
+ msg = f"Invalid token patterns for matcher rule '{key}'\n"
for pattern_idx, error_msgs in errors.items():
- pattern_errors = "\n".join(["- {}".format(e) for e in error_msgs])
- msg += "\nPattern {}:\n{}\n".format(pattern_idx, pattern_errors)
+ pattern_errors = "\n".join([f"- {e}" for e in error_msgs])
+ msg += f"\nPattern {pattern_idx}:\n{pattern_errors}\n"
ValueError.__init__(self, msg)
-
-
-class AlignmentError(ValueError):
- pass
diff --git a/spacy/glossary.py b/spacy/glossary.py
index 44a8277da..c4a6a5c45 100644
--- a/spacy/glossary.py
+++ b/spacy/glossary.py
@@ -1,12 +1,8 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
def explain(term):
"""Get a description for a given POS tag, dependency label or entity type.
- term (unicode): The term to explain.
- RETURNS (unicode): The explanation, or `None` if not found in the glossary.
+ term (str): The term to explain.
+ RETURNS (str): The explanation, or `None` if not found in the glossary.
EXAMPLE:
>>> spacy.explain(u'NORP')
diff --git a/spacy/gold.pxd b/spacy/gold.pxd
deleted file mode 100644
index 20a25a939..000000000
--- a/spacy/gold.pxd
+++ /dev/null
@@ -1,41 +0,0 @@
-from cymem.cymem cimport Pool
-
-from .structs cimport TokenC
-from .typedefs cimport attr_t
-from .syntax.transition_system cimport Transition
-
-
-cdef struct GoldParseC:
- int* tags
- int* heads
- int* has_dep
- int* sent_start
- attr_t* labels
- int** brackets
- Transition* ner
-
-
-cdef class GoldParse:
- cdef Pool mem
-
- cdef GoldParseC c
-
- cdef int length
- cdef public int loss
- cdef public list words
- cdef public list tags
- cdef public list morphology
- cdef public list heads
- cdef public list labels
- cdef public dict orths
- cdef public list ner
- cdef public list ents
- cdef public dict brackets
- cdef public object cats
- cdef public dict links
-
- cdef readonly list cand_to_gold
- cdef readonly list gold_to_cand
- cdef readonly list orig_annot
-
-
diff --git a/spacy/gold.pyx b/spacy/gold.pyx
deleted file mode 100644
index e69ff5933..000000000
--- a/spacy/gold.pyx
+++ /dev/null
@@ -1,1004 +0,0 @@
-# cython: profile=True
-# coding: utf8
-from __future__ import unicode_literals, print_function
-
-import re
-import random
-import numpy
-import tempfile
-import shutil
-import itertools
-from pathlib import Path
-import srsly
-import warnings
-
-from .syntax import nonproj
-from .tokens import Doc, Span
-from .errors import Errors, AlignmentError, Warnings
-from .compat import path2str
-from . import util
-from .util import minibatch, itershuffle
-
-from libc.stdio cimport FILE, fopen, fclose, fread, fwrite, feof, fseek
-
-
-punct_re = re.compile(r"\W")
-
-
-def tags_to_entities(tags):
- entities = []
- start = None
- for i, tag in enumerate(tags):
- if tag is None:
- continue
- if tag.startswith("O"):
- # TODO: We shouldn't be getting these malformed inputs. Fix this.
- if start is not None:
- start = None
- continue
- elif tag == "-":
- continue
- elif tag.startswith("I"):
- if start is None:
- raise ValueError(Errors.E067.format(tags=tags[:i + 1]))
- continue
- if tag.startswith("U"):
- entities.append((tag[2:], i, i))
- elif tag.startswith("B"):
- start = i
- elif tag.startswith("L"):
- entities.append((tag[2:], start, i))
- start = None
- else:
- raise ValueError(Errors.E068.format(tag=tag))
- return entities
-
-
-def merge_sents(sents):
- m_deps = [[], [], [], [], [], []]
- m_cats = {}
- m_brackets = []
- i = 0
- for (ids, words, tags, heads, labels, ner), (cats, brackets) in sents:
- m_deps[0].extend(id_ + i for id_ in ids)
- m_deps[1].extend(words)
- m_deps[2].extend(tags)
- m_deps[3].extend(head + i for head in heads)
- m_deps[4].extend(labels)
- m_deps[5].extend(ner)
- m_brackets.extend((b["first"] + i, b["last"] + i, b["label"])
- for b in brackets)
- m_cats.update(cats)
- i += len(ids)
- return [(m_deps, (m_cats, m_brackets))]
-
-
-def _normalize_for_alignment(tokens):
- return [w.replace(" ", "").lower() for w in tokens]
-
-
-def align(tokens_a, tokens_b):
- """Calculate alignment tables between two tokenizations.
-
- tokens_a (List[str]): The candidate tokenization.
- tokens_b (List[str]): The reference tokenization.
- RETURNS: (tuple): A 5-tuple consisting of the following information:
- * cost (int): The number of misaligned tokens.
- * a2b (List[int]): Mapping of indices in `tokens_a` to indices in `tokens_b`.
- For instance, if `a2b[4] == 6`, that means that `tokens_a[4]` aligns
- to `tokens_b[6]`. If there's no one-to-one alignment for a token,
- it has the value -1.
- * b2a (List[int]): The same as `a2b`, but mapping the other direction.
- * a2b_multi (Dict[int, int]): A dictionary mapping indices in `tokens_a`
- to indices in `tokens_b`, where multiple tokens of `tokens_a` align to
- the same token of `tokens_b`.
- * b2a_multi (Dict[int, int]): As with `a2b_multi`, but mapping the other
- direction.
- """
- tokens_a = _normalize_for_alignment(tokens_a)
- tokens_b = _normalize_for_alignment(tokens_b)
- cost = 0
- a2b = numpy.empty(len(tokens_a), dtype="i")
- b2a = numpy.empty(len(tokens_b), dtype="i")
- a2b.fill(-1)
- b2a.fill(-1)
- a2b_multi = {}
- b2a_multi = {}
- i = 0
- j = 0
- offset_a = 0
- offset_b = 0
- while i < len(tokens_a) and j < len(tokens_b):
- a = tokens_a[i][offset_a:]
- b = tokens_b[j][offset_b:]
- if a == b:
- if offset_a == offset_b == 0:
- a2b[i] = j
- b2a[j] = i
- elif offset_a == 0:
- cost += 2
- a2b_multi[i] = j
- elif offset_b == 0:
- cost += 2
- b2a_multi[j] = i
- offset_a = offset_b = 0
- i += 1
- j += 1
- elif a == "":
- assert offset_a == 0
- cost += 1
- i += 1
- elif b == "":
- assert offset_b == 0
- cost += 1
- j += 1
- elif b.startswith(a):
- cost += 1
- if offset_a == 0:
- a2b_multi[i] = j
- i += 1
- offset_a = 0
- offset_b += len(a)
- elif a.startswith(b):
- cost += 1
- if offset_b == 0:
- b2a_multi[j] = i
- j += 1
- offset_b = 0
- offset_a += len(b)
- else:
- assert "".join(tokens_a) != "".join(tokens_b)
- raise AlignmentError(Errors.E186.format(tok_a=tokens_a, tok_b=tokens_b))
- return cost, a2b, b2a, a2b_multi, b2a_multi
-
-
-class GoldCorpus(object):
- """An annotated corpus, using the JSON file format. Manages
- annotations for tagging, dependency parsing and NER.
-
- DOCS: https://spacy.io/api/goldcorpus
- """
- def __init__(self, train, dev, gold_preproc=False, limit=None):
- """Create a GoldCorpus.
-
- train_path (unicode or Path): File or directory of training data.
- dev_path (unicode or Path): File or directory of development data.
- RETURNS (GoldCorpus): The newly created object.
- """
- self.limit = limit
- if isinstance(train, str) or isinstance(train, Path):
- train = self.read_tuples(self.walk_corpus(train))
- dev = self.read_tuples(self.walk_corpus(dev))
- # Write temp directory with one doc per file, so we can shuffle and stream
- self.tmp_dir = Path(tempfile.mkdtemp())
- self.write_msgpack(self.tmp_dir / "train", train, limit=self.limit)
- self.write_msgpack(self.tmp_dir / "dev", dev, limit=self.limit)
-
- def __del__(self):
- shutil.rmtree(path2str(self.tmp_dir))
-
- @staticmethod
- def write_msgpack(directory, doc_tuples, limit=0):
- if not directory.exists():
- directory.mkdir()
- n = 0
- for i, doc_tuple in enumerate(doc_tuples):
- srsly.write_msgpack(directory / "{}.msg".format(i), [doc_tuple])
- n += len(doc_tuple[1])
- if limit and n >= limit:
- break
-
- @staticmethod
- def walk_corpus(path):
- path = util.ensure_path(path)
- if not path.is_dir():
- return [path]
- paths = [path]
- locs = []
- seen = set()
- for path in paths:
- if str(path) in seen:
- continue
- seen.add(str(path))
- if path.parts[-1].startswith("."):
- continue
- elif path.is_dir():
- paths.extend(path.iterdir())
- elif path.parts[-1].endswith((".json", ".jsonl")):
- locs.append(path)
- return locs
-
- @staticmethod
- def read_tuples(locs, limit=0):
- i = 0
- for loc in locs:
- loc = util.ensure_path(loc)
- if loc.parts[-1].endswith("json"):
- gold_tuples = read_json_file(loc)
- elif loc.parts[-1].endswith("jsonl"):
- gold_tuples = srsly.read_jsonl(loc)
- first_gold_tuple = next(gold_tuples)
- gold_tuples = itertools.chain([first_gold_tuple], gold_tuples)
- # TODO: proper format checks with schemas
- if isinstance(first_gold_tuple, dict):
- gold_tuples = read_json_object(gold_tuples)
- elif loc.parts[-1].endswith("msg"):
- gold_tuples = srsly.read_msgpack(loc)
- else:
- supported = ("json", "jsonl", "msg")
- raise ValueError(Errors.E124.format(path=path2str(loc), formats=supported))
- for item in gold_tuples:
- yield item
- i += len(item[1])
- if limit and i >= limit:
- return
-
- @property
- def dev_tuples(self):
- locs = (self.tmp_dir / "dev").iterdir()
- yield from self.read_tuples(locs, limit=self.limit)
-
- @property
- def train_tuples(self):
- locs = (self.tmp_dir / "train").iterdir()
- yield from self.read_tuples(locs, limit=self.limit)
-
- def count_train(self):
- n = 0
- i = 0
- for raw_text, paragraph_tuples in self.train_tuples:
- for sent_tuples, brackets in paragraph_tuples:
- n += len(sent_tuples[1])
- if self.limit and i >= self.limit:
- break
- i += 1
- return n
-
- def train_docs(self, nlp, gold_preproc=False, max_length=None,
- noise_level=0.0, orth_variant_level=0.0,
- ignore_misaligned=False):
- locs = list((self.tmp_dir / 'train').iterdir())
- random.shuffle(locs)
- train_tuples = self.read_tuples(locs, limit=self.limit)
- gold_docs = self.iter_gold_docs(nlp, train_tuples, gold_preproc,
- max_length=max_length,
- noise_level=noise_level,
- orth_variant_level=orth_variant_level,
- make_projective=True,
- ignore_misaligned=ignore_misaligned)
- yield from gold_docs
-
- def train_docs_without_preprocessing(self, nlp, gold_preproc=False):
- gold_docs = self.iter_gold_docs(nlp, self.train_tuples, gold_preproc=gold_preproc)
- yield from gold_docs
-
- def dev_docs(self, nlp, gold_preproc=False, ignore_misaligned=False):
- gold_docs = self.iter_gold_docs(nlp, self.dev_tuples, gold_preproc=gold_preproc,
- ignore_misaligned=ignore_misaligned)
- yield from gold_docs
-
- @classmethod
- def iter_gold_docs(cls, nlp, tuples, gold_preproc, max_length=None,
- noise_level=0.0, orth_variant_level=0.0, make_projective=False,
- ignore_misaligned=False):
- for raw_text, paragraph_tuples in tuples:
- if gold_preproc:
- raw_text = None
- else:
- paragraph_tuples = merge_sents(paragraph_tuples)
- docs, paragraph_tuples = cls._make_docs(nlp, raw_text,
- paragraph_tuples, gold_preproc, noise_level=noise_level,
- orth_variant_level=orth_variant_level)
- golds = cls._make_golds(docs, paragraph_tuples, make_projective,
- ignore_misaligned=ignore_misaligned)
- for doc, gold in zip(docs, golds):
- if gold is not None:
- if (not max_length) or len(doc) < max_length:
- yield doc, gold
-
- @classmethod
- def _make_docs(cls, nlp, raw_text, paragraph_tuples, gold_preproc, noise_level=0.0, orth_variant_level=0.0):
- if raw_text is not None:
- raw_text, paragraph_tuples = make_orth_variants(nlp, raw_text, paragraph_tuples, orth_variant_level=orth_variant_level)
- raw_text = add_noise(raw_text, noise_level)
- return [nlp.make_doc(raw_text)], paragraph_tuples
- else:
- docs = []
- raw_text, paragraph_tuples = make_orth_variants(nlp, None, paragraph_tuples, orth_variant_level=orth_variant_level)
- return [Doc(nlp.vocab, words=add_noise(sent_tuples[1], noise_level))
- for (sent_tuples, brackets) in paragraph_tuples], paragraph_tuples
-
-
- @classmethod
- def _make_golds(cls, docs, paragraph_tuples, make_projective, ignore_misaligned=False):
- if len(docs) != len(paragraph_tuples):
- n_annots = len(paragraph_tuples)
- raise ValueError(Errors.E070.format(n_docs=len(docs), n_annots=n_annots))
- golds = []
- for doc, (sent_tuples, (cats, brackets)) in zip(docs, paragraph_tuples):
- try:
- gold = GoldParse.from_annot_tuples(doc, sent_tuples, cats=cats,
- make_projective=make_projective)
- except AlignmentError:
- if ignore_misaligned:
- gold = None
- else:
- raise
- golds.append(gold)
- return golds
-
-
-def make_orth_variants(nlp, raw, paragraph_tuples, orth_variant_level=0.0):
- if random.random() >= orth_variant_level:
- return raw, paragraph_tuples
- raw_orig = str(raw)
- lower = False
- if random.random() >= 0.5:
- lower = True
- if raw is not None:
- raw = raw.lower()
- ndsv = nlp.Defaults.single_orth_variants
- ndpv = nlp.Defaults.paired_orth_variants
- # modify words in paragraph_tuples
- variant_paragraph_tuples = []
- for sent_tuples, brackets in paragraph_tuples:
- ids, words, tags, heads, labels, ner = sent_tuples
- if lower:
- words = [w.lower() for w in words]
- # single variants
- punct_choices = [random.choice(x["variants"]) for x in ndsv]
- for word_idx in range(len(words)):
- for punct_idx in range(len(ndsv)):
- if tags[word_idx] in ndsv[punct_idx]["tags"] \
- and words[word_idx] in ndsv[punct_idx]["variants"]:
- words[word_idx] = punct_choices[punct_idx]
- # paired variants
- punct_choices = [random.choice(x["variants"]) for x in ndpv]
- for word_idx in range(len(words)):
- for punct_idx in range(len(ndpv)):
- if tags[word_idx] in ndpv[punct_idx]["tags"] \
- and words[word_idx] in itertools.chain.from_iterable(ndpv[punct_idx]["variants"]):
- # backup option: random left vs. right from pair
- pair_idx = random.choice([0, 1])
- # best option: rely on paired POS tags like `` / ''
- if len(ndpv[punct_idx]["tags"]) == 2:
- pair_idx = ndpv[punct_idx]["tags"].index(tags[word_idx])
- # next best option: rely on position in variants
- # (may not be unambiguous, so order of variants matters)
- else:
- for pair in ndpv[punct_idx]["variants"]:
- if words[word_idx] in pair:
- pair_idx = pair.index(words[word_idx])
- words[word_idx] = punct_choices[punct_idx][pair_idx]
-
- variant_paragraph_tuples.append(((ids, words, tags, heads, labels, ner), brackets))
- # modify raw to match variant_paragraph_tuples
- if raw is not None:
- variants = []
- for single_variants in ndsv:
- variants.extend(single_variants["variants"])
- for paired_variants in ndpv:
- variants.extend(list(itertools.chain.from_iterable(paired_variants["variants"])))
- # store variants in reverse length order to be able to prioritize
- # longer matches (e.g., "---" before "--")
- variants = sorted(variants, key=lambda x: len(x))
- variants.reverse()
- variant_raw = ""
- raw_idx = 0
- # add initial whitespace
- while raw_idx < len(raw) and re.match("\s", raw[raw_idx]):
- variant_raw += raw[raw_idx]
- raw_idx += 1
- for sent_tuples, brackets in variant_paragraph_tuples:
- ids, words, tags, heads, labels, ner = sent_tuples
- for word in words:
- match_found = False
- # skip whitespace words
- if word.isspace():
- match_found = True
- # add identical word
- elif word not in variants and raw[raw_idx:].startswith(word):
- variant_raw += word
- raw_idx += len(word)
- match_found = True
- # add variant word
- else:
- for variant in variants:
- if not match_found and \
- raw[raw_idx:].startswith(variant):
- raw_idx += len(variant)
- variant_raw += word
- match_found = True
- # something went wrong, abort
- # (add a warning message?)
- if not match_found:
- return raw_orig, paragraph_tuples
- # add following whitespace
- while raw_idx < len(raw) and re.match("\s", raw[raw_idx]):
- variant_raw += raw[raw_idx]
- raw_idx += 1
- return variant_raw, variant_paragraph_tuples
- return raw, variant_paragraph_tuples
-
-
-def add_noise(orig, noise_level):
- if random.random() >= noise_level:
- return orig
- elif type(orig) == list:
- corrupted = [_corrupt(word, noise_level) for word in orig]
- corrupted = [w for w in corrupted if w]
- return corrupted
- else:
- return "".join(_corrupt(c, noise_level) for c in orig)
-
-
-def _corrupt(c, noise_level):
- if random.random() >= noise_level:
- return c
- elif c in [".", "'", "!", "?", ","]:
- return "\n"
- else:
- return c.lower()
-
-
-def read_json_object(json_corpus_section):
- """Take a list of JSON-formatted documents (e.g. from an already loaded
- training data file) and yield tuples in the GoldParse format.
-
- json_corpus_section (list): The data.
- YIELDS (tuple): The reformatted data.
- """
- for json_doc in json_corpus_section:
- tuple_doc = json_to_tuple(json_doc)
- for tuple_paragraph in tuple_doc:
- yield tuple_paragraph
-
-
-def json_to_tuple(doc):
- """Convert an item in the JSON-formatted training data to the tuple format
- used by GoldParse.
-
- doc (dict): One entry in the training data.
- YIELDS (tuple): The reformatted data.
- """
- paragraphs = []
- for paragraph in doc["paragraphs"]:
- sents = []
- cats = {}
- for cat in paragraph.get("cats", {}):
- cats[cat["label"]] = cat["value"]
- for sent in paragraph["sentences"]:
- words = []
- ids = []
- tags = []
- heads = []
- labels = []
- ner = []
- for i, token in enumerate(sent["tokens"]):
- words.append(token["orth"])
- ids.append(i)
- tags.append(token.get('tag', "-"))
- heads.append(token.get("head", 0) + i)
- labels.append(token.get("dep", ""))
- # Ensure ROOT label is case-insensitive
- if labels[-1].lower() == "root":
- labels[-1] = "ROOT"
- ner.append(token.get("ner", "-"))
- sents.append([
- [ids, words, tags, heads, labels, ner],
- [cats, sent.get("brackets", [])]])
- if sents:
- yield [paragraph.get("raw", None), sents]
-
-
-def read_json_file(loc, docs_filter=None, limit=None):
- loc = util.ensure_path(loc)
- if loc.is_dir():
- for filename in loc.iterdir():
- yield from read_json_file(loc / filename, limit=limit)
- else:
- for doc in _json_iterate(loc):
- if docs_filter is not None and not docs_filter(doc):
- continue
- for json_tuple in json_to_tuple(doc):
- yield json_tuple
-
-
-def _json_iterate(loc):
- # We should've made these files jsonl...But since we didn't, parse out
- # the docs one-by-one to reduce memory usage.
- # It's okay to read in the whole file -- just don't parse it into JSON.
- cdef bytes py_raw
- loc = util.ensure_path(loc)
- with loc.open("rb") as file_:
- py_raw = file_.read()
- cdef long file_length = len(py_raw)
- if file_length > 2 ** 30:
- warnings.warn(Warnings.W027.format(size=file_length))
-
- raw = py_raw
- cdef int square_depth = 0
- cdef int curly_depth = 0
- cdef int inside_string = 0
- cdef int escape = 0
- cdef long start = -1
- cdef char c
- cdef char quote = ord('"')
- cdef char backslash = ord("\\")
- cdef char open_square = ord("[")
- cdef char close_square = ord("]")
- cdef char open_curly = ord("{")
- cdef char close_curly = ord("}")
- for i in range(file_length):
- c = raw[i]
- if escape:
- escape = False
- continue
- if c == backslash:
- escape = True
- continue
- if c == quote:
- inside_string = not inside_string
- continue
- if inside_string:
- continue
- if c == open_square:
- square_depth += 1
- elif c == close_square:
- square_depth -= 1
- elif c == open_curly:
- if square_depth == 1 and curly_depth == 0:
- start = i
- curly_depth += 1
- elif c == close_curly:
- curly_depth -= 1
- if square_depth == 1 and curly_depth == 0:
- py_str = py_raw[start : i + 1].decode("utf8")
- try:
- yield srsly.json_loads(py_str)
- except Exception:
- print(py_str)
- raise
- start = -1
-
-
-def iob_to_biluo(tags):
- out = []
- tags = list(tags)
- while tags:
- out.extend(_consume_os(tags))
- out.extend(_consume_ent(tags))
- return out
-
-
-def _consume_os(tags):
- while tags and tags[0] == "O":
- yield tags.pop(0)
-
-
-def _consume_ent(tags):
- if not tags:
- return []
- tag = tags.pop(0)
- target_in = "I" + tag[1:]
- target_last = "L" + tag[1:]
- length = 1
- while tags and tags[0] in {target_in, target_last}:
- length += 1
- tags.pop(0)
- label = tag[2:]
- if length == 1:
- if len(label) == 0:
- raise ValueError(Errors.E177.format(tag=tag))
- return ["U-" + label]
- else:
- start = "B-" + label
- end = "L-" + label
- middle = ["I-%s" % label for _ in range(1, length - 1)]
- return [start] + middle + [end]
-
-
-cdef class GoldParse:
- """Collection for training annotations.
-
- DOCS: https://spacy.io/api/goldparse
- """
- @classmethod
- def from_annot_tuples(cls, doc, annot_tuples, cats=None, make_projective=False):
- _, words, tags, heads, deps, entities = annot_tuples
- return cls(doc, words=words, tags=tags, heads=heads, deps=deps,
- entities=entities, cats=cats,
- make_projective=make_projective)
-
- def __init__(self, doc, annot_tuples=None, words=None, tags=None, morphology=None,
- heads=None, deps=None, entities=None, make_projective=False,
- cats=None, links=None, **_):
- """Create a GoldParse. The fields will not be initialized if len(doc) is zero.
-
- doc (Doc): The document the annotations refer to.
- words (iterable): A sequence of unicode word strings.
- tags (iterable): A sequence of strings, representing tag annotations.
- heads (iterable): A sequence of integers, representing syntactic
- head offsets.
- deps (iterable): A sequence of strings, representing the syntactic
- relation types.
- entities (iterable): A sequence of named entity annotations, either as
- BILUO tag strings, or as `(start_char, end_char, label)` tuples,
- representing the entity positions.
- cats (dict): Labels for text classification. Each key in the dictionary
- may be a string or an int, or a `(start_char, end_char, label)`
- tuple, indicating that the label is applied to only part of the
- document (usually a sentence). Unlike entity annotations, label
- annotations can overlap, i.e. a single word can be covered by
- multiple labelled spans. The TextCategorizer component expects
- true examples of a label to have the value 1.0, and negative
- examples of a label to have the value 0.0. Labels not in the
- dictionary are treated as missing - the gradient for those labels
- will be zero.
- links (dict): A dict with `(start_char, end_char)` keys,
- and the values being dicts with kb_id:value entries,
- representing the external IDs in a knowledge base (KB)
- mapped to either 1.0 or 0.0, indicating positive and
- negative examples respectively.
- make_projective (bool): Whether to projectivize the dependency tree.
- RETURNS (GoldParse): The newly constructed object.
- """
- self.mem = Pool()
- self.loss = 0
- self.length = len(doc)
-
- self.cats = {} if cats is None else dict(cats)
- self.links = links
-
- # orig_annot is used as an iterator in `nlp.evalate` even if self.length == 0,
- # so set a empty list to avoid error.
- # if self.lenght > 0, this is modified latter.
- self.orig_annot = []
-
- # temporary doc for aligning entity annotation
- entdoc = None
-
- # avoid allocating memory if the doc does not contain any tokens
- if self.length == 0:
- self.words = []
- self.tags = []
- self.heads = []
- self.labels = []
- self.ner = []
- self.morphology = []
-
- else:
- if words is None:
- words = [token.text for token in doc]
- if tags is None:
- tags = [None for _ in words]
- if heads is None:
- heads = [None for _ in words]
- if deps is None:
- deps = [None for _ in words]
- if morphology is None:
- morphology = [None for _ in words]
- if entities is None:
- entities = ["-" for _ in words]
- elif len(entities) == 0:
- entities = ["O" for _ in words]
- else:
- # Translate the None values to '-', to make processing easier.
- # See Issue #2603
- entities = [(ent if ent is not None else "-") for ent in entities]
- if not isinstance(entities[0], basestring):
- # Assume we have entities specified by character offset.
- # Create a temporary Doc corresponding to provided words
- # (to preserve gold tokenization) and text (to preserve
- # character offsets).
- entdoc_words, entdoc_spaces = util.get_words_and_spaces(words, doc.text)
- entdoc = Doc(doc.vocab, words=entdoc_words, spaces=entdoc_spaces)
- entdoc_entities = biluo_tags_from_offsets(entdoc, entities)
- # There may be some additional whitespace tokens in the
- # temporary doc, so check that the annotations align with
- # the provided words while building a list of BILUO labels.
- entities = []
- words_offset = 0
- for i in range(len(entdoc_words)):
- if words[i + words_offset] == entdoc_words[i]:
- entities.append(entdoc_entities[i])
- else:
- words_offset -= 1
- if len(entities) != len(words):
- warnings.warn(Warnings.W029.format(text=doc.text))
- entities = ["-" for _ in words]
-
- # These are filled by the tagger/parser/entity recogniser
- self.c.tags = self.mem.alloc(len(doc), sizeof(int))
- self.c.heads = self.mem.alloc(len(doc), sizeof(int))
- self.c.labels = self.mem.alloc(len(doc), sizeof(attr_t))
- self.c.has_dep = self.mem.alloc(len(doc), sizeof(int))
- self.c.sent_start = self.mem.alloc(len(doc), sizeof(int))
- self.c.ner = self.mem.alloc(len(doc), sizeof(Transition))
-
- self.words = [None] * len(doc)
- self.tags = [None] * len(doc)
- self.heads = [None] * len(doc)
- self.labels = [None] * len(doc)
- self.ner = [None] * len(doc)
- self.morphology = [None] * len(doc)
-
- # This needs to be done before we align the words
- if make_projective and heads is not None and deps is not None:
- heads, deps = nonproj.projectivize(heads, deps)
-
- # Do many-to-one alignment for misaligned tokens.
- # If we over-segment, we'll have one gold word that covers a sequence
- # of predicted words
- # If we under-segment, we'll have one predicted word that covers a
- # sequence of gold words.
- # If we "mis-segment", we'll have a sequence of predicted words covering
- # a sequence of gold words. That's many-to-many -- we don't do that
- # except for NER spans where the start and end can be aligned.
- cost, i2j, j2i, i2j_multi, j2i_multi = align([t.orth_ for t in doc], words)
-
- self.cand_to_gold = [(j if j >= 0 else None) for j in i2j]
- self.gold_to_cand = [(i if i >= 0 else None) for i in j2i]
-
- annot_tuples = (range(len(words)), words, tags, heads, deps, entities)
- self.orig_annot = list(zip(*annot_tuples))
-
- for i, gold_i in enumerate(self.cand_to_gold):
- if doc[i].text.isspace():
- self.words[i] = doc[i].text
- self.tags[i] = "_SP"
- self.heads[i] = None
- self.labels[i] = None
- self.ner[i] = None
- self.morphology[i] = set()
- if gold_i is None:
- if i in i2j_multi:
- self.words[i] = words[i2j_multi[i]]
- self.tags[i] = tags[i2j_multi[i]]
- self.morphology[i] = morphology[i2j_multi[i]]
- is_last = i2j_multi[i] != i2j_multi.get(i+1)
- # Set next word in multi-token span as head, until last
- if not is_last:
- self.heads[i] = i+1
- self.labels[i] = "subtok"
- else:
- head_i = heads[i2j_multi[i]]
- if head_i:
- self.heads[i] = self.gold_to_cand[head_i]
- self.labels[i] = deps[i2j_multi[i]]
- ner_tag = entities[i2j_multi[i]]
- # Assign O/- for many-to-one O/- NER tags
- if ner_tag in ("O", "-"):
- self.ner[i] = ner_tag
- else:
- self.words[i] = words[gold_i]
- self.tags[i] = tags[gold_i]
- self.morphology[i] = morphology[gold_i]
- if heads[gold_i] is None:
- self.heads[i] = None
- else:
- self.heads[i] = self.gold_to_cand[heads[gold_i]]
- self.labels[i] = deps[gold_i]
- self.ner[i] = entities[gold_i]
- # Assign O/- for one-to-many O/- NER tags
- for j, cand_j in enumerate(self.gold_to_cand):
- if cand_j is None:
- if j in j2i_multi:
- i = j2i_multi[j]
- ner_tag = entities[j]
- if ner_tag in ("O", "-"):
- self.ner[i] = ner_tag
-
- # If there is entity annotation and some tokens remain unaligned,
- # align all entities at the character level to account for all
- # possible token misalignments within the entity spans
- if any([e not in ("O", "-") for e in entities]) and None in self.ner:
- # If the temporary entdoc wasn't created above, initialize it
- if not entdoc:
- entdoc_words, entdoc_spaces = util.get_words_and_spaces(words, doc.text)
- entdoc = Doc(doc.vocab, words=entdoc_words, spaces=entdoc_spaces)
- # Get offsets based on gold words and BILUO entities
- entdoc_offsets = offsets_from_biluo_tags(entdoc, entities)
- aligned_offsets = []
- aligned_spans = []
- # Filter offsets to identify those that align with doc tokens
- for offset in entdoc_offsets:
- span = doc.char_span(offset[0], offset[1])
- if span and not span.text.isspace():
- aligned_offsets.append(offset)
- aligned_spans.append(span)
- # Convert back to BILUO for doc tokens and assign NER for all
- # aligned spans
- biluo_tags = biluo_tags_from_offsets(doc, aligned_offsets, missing=None)
- for span in aligned_spans:
- for i in range(span.start, span.end):
- self.ner[i] = biluo_tags[i]
-
- # Prevent whitespace that isn't within entities from being tagged as
- # an entity.
- for i in range(len(self.ner)):
- if self.tags[i] == "_SP":
- prev_ner = self.ner[i-1] if i >= 1 else None
- next_ner = self.ner[i+1] if (i+1) < len(self.ner) else None
- if prev_ner == "O" or next_ner == "O":
- self.ner[i] = "O"
-
- cycle = nonproj.contains_cycle(self.heads)
- if cycle is not None:
- raise ValueError(Errors.E069.format(cycle=cycle,
- cycle_tokens=" ".join(["'{}'".format(self.words[tok_id]) for tok_id in cycle]),
- doc_tokens=" ".join(words[:50])))
-
- def __len__(self):
- """Get the number of gold-standard tokens.
-
- RETURNS (int): The number of gold-standard tokens.
- """
- return self.length
-
- @property
- def is_projective(self):
- """Whether the provided syntactic annotations form a projective
- dependency tree.
- """
- return not nonproj.is_nonproj_tree(self.heads)
-
- property sent_starts:
- def __get__(self):
- return [self.c.sent_start[i] for i in range(self.length)]
-
- def __set__(self, sent_starts):
- for gold_i, is_sent_start in enumerate(sent_starts):
- i = self.gold_to_cand[gold_i]
- if i is not None:
- if is_sent_start in (1, True):
- self.c.sent_start[i] = 1
- elif is_sent_start in (-1, False):
- self.c.sent_start[i] = -1
- else:
- self.c.sent_start[i] = 0
-
-
-def docs_to_json(docs, id=0, ner_missing_tag="O"):
- """Convert a list of Doc objects into the JSON-serializable format used by
- the spacy train command.
-
- docs (iterable / Doc): The Doc object(s) to convert.
- id (int): Id for the JSON.
- RETURNS (dict): The data in spaCy's JSON format
- - each input doc will be treated as a paragraph in the output doc
- """
- if isinstance(docs, Doc):
- docs = [docs]
- json_doc = {"id": id, "paragraphs": []}
- for i, doc in enumerate(docs):
- json_para = {'raw': doc.text, "sentences": [], "cats": []}
- for cat, val in doc.cats.items():
- json_cat = {"label": cat, "value": val}
- json_para["cats"].append(json_cat)
- ent_offsets = [(e.start_char, e.end_char, e.label_) for e in doc.ents]
- biluo_tags = biluo_tags_from_offsets(doc, ent_offsets, missing=ner_missing_tag)
- for j, sent in enumerate(doc.sents):
- json_sent = {"tokens": [], "brackets": []}
- for token in sent:
- json_token = {"id": token.i, "orth": token.text}
- if doc.is_tagged:
- json_token["tag"] = token.tag_
- if doc.is_parsed:
- json_token["head"] = token.head.i-token.i
- json_token["dep"] = token.dep_
- json_token["ner"] = biluo_tags[token.i]
- json_sent["tokens"].append(json_token)
- json_para["sentences"].append(json_sent)
- json_doc["paragraphs"].append(json_para)
- return json_doc
-
-
-def biluo_tags_from_offsets(doc, entities, missing="O"):
- """Encode labelled spans into per-token tags, using the
- Begin/In/Last/Unit/Out scheme (BILUO).
-
- doc (Doc): The document that the entity offsets refer to. The output tags
- will refer to the token boundaries within the document.
- entities (iterable): A sequence of `(start, end, label)` triples. `start`
- and `end` should be character-offset integers denoting the slice into
- the original string.
- RETURNS (list): A list of unicode strings, describing the tags. Each tag
- string will be of the form either "", "O" or "{action}-{label}", where
- action is one of "B", "I", "L", "U". The string "-" is used where the
- entity offsets don't align with the tokenization in the `Doc` object.
- The training algorithm will view these as missing values. "O" denotes a
- non-entity token. "B" denotes the beginning of a multi-token entity,
- "I" the inside of an entity of three or more tokens, and "L" the end
- of an entity of two or more tokens. "U" denotes a single-token entity.
-
- EXAMPLE:
- >>> text = 'I like London.'
- >>> entities = [(len('I like '), len('I like London'), 'LOC')]
- >>> doc = nlp.tokenizer(text)
- >>> tags = biluo_tags_from_offsets(doc, entities)
- >>> assert tags == ["O", "O", 'U-LOC', "O"]
- """
- # Ensure no overlapping entity labels exist
- tokens_in_ents = {}
-
- starts = {token.idx: token.i for token in doc}
- ends = {token.idx + len(token): token.i for token in doc}
- biluo = ["-" for _ in doc]
- # Handle entity cases
- for start_char, end_char, label in entities:
- for token_index in range(start_char, end_char):
- if token_index in tokens_in_ents.keys():
- raise ValueError(Errors.E103.format(
- span1=(tokens_in_ents[token_index][0],
- tokens_in_ents[token_index][1],
- tokens_in_ents[token_index][2]),
- span2=(start_char, end_char, label)))
- tokens_in_ents[token_index] = (start_char, end_char, label)
-
- start_token = starts.get(start_char)
- end_token = ends.get(end_char)
- # Only interested if the tokenization is correct
- if start_token is not None and end_token is not None:
- if start_token == end_token:
- biluo[start_token] = "U-%s" % label
- else:
- biluo[start_token] = "B-%s" % label
- for i in range(start_token+1, end_token):
- biluo[i] = "I-%s" % label
- biluo[end_token] = "L-%s" % label
- # Now distinguish the O cases from ones where we miss the tokenization
- entity_chars = set()
- for start_char, end_char, label in entities:
- for i in range(start_char, end_char):
- entity_chars.add(i)
- for token in doc:
- for i in range(token.idx, token.idx + len(token)):
- if i in entity_chars:
- break
- else:
- biluo[token.i] = missing
- if "-" in biluo:
- ent_str = str(entities)
- warnings.warn(Warnings.W030.format(
- text=doc.text[:50] + "..." if len(doc.text) > 50 else doc.text,
- entities=ent_str[:50] + "..." if len(ent_str) > 50 else ent_str
- ))
- return biluo
-
-
-def spans_from_biluo_tags(doc, tags):
- """Encode per-token tags following the BILUO scheme into Span object, e.g.
- to overwrite the doc.ents.
-
- doc (Doc): The document that the BILUO tags refer to.
- entities (iterable): A sequence of BILUO tags with each tag describing one
- token. Each tags string will be of the form of either "", "O" or
- "{action}-{label}", where action is one of "B", "I", "L", "U".
- RETURNS (list): A sequence of Span objects.
- """
- token_offsets = tags_to_entities(tags)
- spans = []
- for label, start_idx, end_idx in token_offsets:
- span = Span(doc, start_idx, end_idx + 1, label=label)
- spans.append(span)
- return spans
-
-
-def offsets_from_biluo_tags(doc, tags):
- """Encode per-token tags following the BILUO scheme into entity offsets.
-
- doc (Doc): The document that the BILUO tags refer to.
- entities (iterable): A sequence of BILUO tags with each tag describing one
- token. Each tags string will be of the form of either "", "O" or
- "{action}-{label}", where action is one of "B", "I", "L", "U".
- RETURNS (list): A sequence of `(start, end, label)` triples. `start` and
- `end` will be character-offset integers denoting the slice into the
- original string.
- """
- spans = spans_from_biluo_tags(doc, tags)
- return [(span.start_char, span.end_char, span.label_) for span in spans]
-
-
-def is_punct_label(label):
- return label == "P" or label.lower() == "punct"
diff --git a/spacy/syntax/__init__.pxd b/spacy/gold/__init__.pxd
similarity index 100%
rename from spacy/syntax/__init__.pxd
rename to spacy/gold/__init__.pxd
diff --git a/spacy/gold/__init__.py b/spacy/gold/__init__.py
new file mode 100644
index 000000000..35e67f696
--- /dev/null
+++ b/spacy/gold/__init__.py
@@ -0,0 +1,9 @@
+from .corpus import Corpus # noqa: F401
+from .example import Example, validate_examples # noqa: F401
+from .align import Alignment # noqa: F401
+from .iob_utils import iob_to_biluo, biluo_to_iob # noqa: F401
+from .iob_utils import biluo_tags_from_offsets, offsets_from_biluo_tags # noqa: F401
+from .iob_utils import spans_from_biluo_tags, tags_to_entities # noqa: F401
+from .gold_io import docs_to_json, read_json_file # noqa: F401
+from .batchers import minibatch_by_padded_size, minibatch_by_words # noqa: F401
+from .loggers import console_logger, wandb_logger # noqa: F401
diff --git a/spacy/gold/align.py b/spacy/gold/align.py
new file mode 100644
index 000000000..e8f17a667
--- /dev/null
+++ b/spacy/gold/align.py
@@ -0,0 +1,34 @@
+from typing import List
+import numpy
+from thinc.types import Ragged
+from dataclasses import dataclass
+import tokenizations
+
+from ..errors import Errors
+
+
+@dataclass
+class Alignment:
+ x2y: Ragged
+ y2x: Ragged
+
+ @classmethod
+ def from_indices(cls, x2y: List[List[int]], y2x: List[List[int]]) -> "Alignment":
+ x2y = _make_ragged(x2y)
+ y2x = _make_ragged(y2x)
+ return Alignment(x2y=x2y, y2x=y2x)
+
+ @classmethod
+ def from_strings(cls, A: List[str], B: List[str]) -> "Alignment":
+ if "".join(A).replace(" ", "").lower() != "".join(B).replace(" ", "").lower():
+ raise ValueError(Errors.E949)
+ x2y, y2x = tokenizations.get_alignments(A, B)
+ return Alignment.from_indices(x2y=x2y, y2x=y2x)
+
+
+def _make_ragged(indices):
+ lengths = numpy.array([len(x) for x in indices], dtype="i")
+ flat = []
+ for x in indices:
+ flat.extend(x)
+ return Ragged(numpy.array(flat, dtype="i"), lengths)
diff --git a/spacy/gold/augment.py b/spacy/gold/augment.py
new file mode 100644
index 000000000..4a01c8589
--- /dev/null
+++ b/spacy/gold/augment.py
@@ -0,0 +1,112 @@
+import random
+import itertools
+
+
+def make_orth_variants_example(nlp, example, orth_variant_level=0.0): # TODO: naming
+ raw_text = example.text
+ orig_dict = example.to_dict()
+ variant_text, variant_token_annot = make_orth_variants(
+ nlp, raw_text, orig_dict["token_annotation"], orth_variant_level
+ )
+ doc = nlp.make_doc(variant_text)
+ orig_dict["token_annotation"] = variant_token_annot
+ return example.from_dict(doc, orig_dict)
+
+
+def make_orth_variants(nlp, raw_text, orig_token_dict, orth_variant_level=0.0):
+ if random.random() >= orth_variant_level:
+ return raw_text, orig_token_dict
+ if not orig_token_dict:
+ return raw_text, orig_token_dict
+ raw = raw_text
+ token_dict = orig_token_dict
+ lower = False
+ if random.random() >= 0.5:
+ lower = True
+ if raw is not None:
+ raw = raw.lower()
+ orth_variants = nlp.vocab.lookups.get_table("orth_variants", {})
+ ndsv = orth_variants.get("single", [])
+ ndpv = orth_variants.get("paired", [])
+ words = token_dict.get("words", [])
+ tags = token_dict.get("tags", [])
+ # keep unmodified if words or tags are not defined
+ if words and tags:
+ if lower:
+ words = [w.lower() for w in words]
+ # single variants
+ punct_choices = [random.choice(x["variants"]) for x in ndsv]
+ for word_idx in range(len(words)):
+ for punct_idx in range(len(ndsv)):
+ if (
+ tags[word_idx] in ndsv[punct_idx]["tags"]
+ and words[word_idx] in ndsv[punct_idx]["variants"]
+ ):
+ words[word_idx] = punct_choices[punct_idx]
+ # paired variants
+ punct_choices = [random.choice(x["variants"]) for x in ndpv]
+ for word_idx in range(len(words)):
+ for punct_idx in range(len(ndpv)):
+ if tags[word_idx] in ndpv[punct_idx]["tags"] and words[
+ word_idx
+ ] in itertools.chain.from_iterable(ndpv[punct_idx]["variants"]):
+ # backup option: random left vs. right from pair
+ pair_idx = random.choice([0, 1])
+ # best option: rely on paired POS tags like `` / ''
+ if len(ndpv[punct_idx]["tags"]) == 2:
+ pair_idx = ndpv[punct_idx]["tags"].index(tags[word_idx])
+ # next best option: rely on position in variants
+ # (may not be unambiguous, so order of variants matters)
+ else:
+ for pair in ndpv[punct_idx]["variants"]:
+ if words[word_idx] in pair:
+ pair_idx = pair.index(words[word_idx])
+ words[word_idx] = punct_choices[punct_idx][pair_idx]
+ token_dict["words"] = words
+ token_dict["tags"] = tags
+ # modify raw
+ if raw is not None:
+ variants = []
+ for single_variants in ndsv:
+ variants.extend(single_variants["variants"])
+ for paired_variants in ndpv:
+ variants.extend(
+ list(itertools.chain.from_iterable(paired_variants["variants"]))
+ )
+ # store variants in reverse length order to be able to prioritize
+ # longer matches (e.g., "---" before "--")
+ variants = sorted(variants, key=lambda x: len(x))
+ variants.reverse()
+ variant_raw = ""
+ raw_idx = 0
+ # add initial whitespace
+ while raw_idx < len(raw) and raw[raw_idx].isspace():
+ variant_raw += raw[raw_idx]
+ raw_idx += 1
+ for word in words:
+ match_found = False
+ # skip whitespace words
+ if word.isspace():
+ match_found = True
+ # add identical word
+ elif word not in variants and raw[raw_idx:].startswith(word):
+ variant_raw += word
+ raw_idx += len(word)
+ match_found = True
+ # add variant word
+ else:
+ for variant in variants:
+ if not match_found and raw[raw_idx:].startswith(variant):
+ raw_idx += len(variant)
+ variant_raw += word
+ match_found = True
+ # something went wrong, abort
+ # (add a warning message?)
+ if not match_found:
+ return raw_text, orig_token_dict
+ # add following whitespace
+ while raw_idx < len(raw) and raw[raw_idx].isspace():
+ variant_raw += raw[raw_idx]
+ raw_idx += 1
+ raw = variant_raw
+ return raw, token_dict
diff --git a/spacy/gold/batchers.py b/spacy/gold/batchers.py
new file mode 100644
index 000000000..c54242eae
--- /dev/null
+++ b/spacy/gold/batchers.py
@@ -0,0 +1,230 @@
+from typing import Union, Iterable, Sequence, TypeVar, List, Callable
+from typing import Optional, Any
+from functools import partial
+import itertools
+
+from ..util import registry, minibatch
+
+
+Sizing = Union[Iterable[int], int]
+ItemT = TypeVar("ItemT")
+BatcherT = Callable[[Iterable[ItemT]], Iterable[List[ItemT]]]
+
+
+@registry.batchers("spacy.batch_by_padded.v1")
+def configure_minibatch_by_padded_size(
+ *,
+ size: Sizing,
+ buffer: int,
+ discard_oversize: bool,
+ get_length: Optional[Callable[[ItemT], int]] = None
+) -> BatcherT:
+ """Create a batcher that uses the `batch_by_padded_size` strategy.
+
+ The padded size is defined as the maximum length of sequences within the
+ batch multiplied by the number of sequences in the batch.
+
+ size (int or Iterable[int]): The largest padded size to batch sequences into.
+ Can be a single integer, or a sequence, allowing for variable batch sizes.
+ buffer (int): The number of sequences to accumulate before sorting by length.
+ A larger buffer will result in more even sizing, but if the buffer is
+ very large, the iteration order will be less random, which can result
+ in suboptimal training.
+ discard_oversize (bool): Whether to discard sequences that are by themselves
+ longer than the largest padded batch size.
+ get_length (Callable or None): Function to get the length of a sequence item.
+ The `len` function is used by default.
+ """
+ # Avoid displacing optional values from the underlying function.
+ optionals = {"get_length": get_length} if get_length is not None else {}
+ return partial(
+ minibatch_by_padded_size,
+ size=size,
+ buffer=buffer,
+ discard_oversize=discard_oversize,
+ **optionals
+ )
+
+
+@registry.batchers("spacy.batch_by_words.v1")
+def configure_minibatch_by_words(
+ *,
+ size: Sizing,
+ tolerance: float,
+ discard_oversize: bool,
+ get_length: Optional[Callable[[ItemT], int]] = None
+) -> BatcherT:
+ """Create a batcher that uses the "minibatch by words" strategy.
+
+ size (int or Iterable[int]): The target number of words per batch.
+ Can be a single integer, or a sequence, allowing for variable batch sizes.
+ tolerance (float): What percentage of the size to allow batches to exceed.
+ discard_oversize (bool): Whether to discard sequences that by themselves
+ exceed the tolerated size.
+ get_length (Callable or None): Function to get the length of a sequence
+ item. The `len` function is used by default.
+ """
+ optionals = {"get_length": get_length} if get_length is not None else {}
+ return partial(
+ minibatch_by_words, size=size, discard_oversize=discard_oversize, **optionals
+ )
+
+
+@registry.batchers("spacy.batch_by_sequence.v1")
+def configure_minibatch(
+ size: Sizing, get_length: Optional[Callable[[ItemT], int]] = None
+) -> BatcherT:
+ """Create a batcher that creates batches of the specified size.
+
+ size (int or Iterable[int]): The target number of items per batch.
+ Can be a single integer, or a sequence, allowing for variable batch sizes.
+ """
+ optionals = {"get_length": get_length} if get_length is not None else {}
+ return partial(minibatch, size=size, **optionals)
+
+
+def minibatch_by_padded_size(
+ seqs: Iterable[ItemT],
+ size: Sizing,
+ buffer: int = 256,
+ discard_oversize: bool = False,
+ get_length: Callable = len,
+) -> Iterable[List[ItemT]]:
+ """Minibatch a sequence by the size of padded batches that would result,
+ with sequences binned by length within a window.
+
+ The padded size is defined as the maximum length of sequences within the
+ batch multiplied by the number of sequences in the batch.
+
+ size (int): The largest padded size to batch sequences into.
+ buffer (int): The number of sequences to accumulate before sorting by length.
+ A larger buffer will result in more even sizing, but if the buffer is
+ very large, the iteration order will be less random, which can result
+ in suboptimal training.
+ discard_oversize (bool): Whether to discard sequences that are by themselves
+ longer than the largest padded batch size.
+ get_length (Callable or None): Function to get the length of a sequence item.
+ The `len` function is used by default.
+ """
+ if isinstance(size, int):
+ size_ = itertools.repeat(size)
+ else:
+ size_ = size
+ for outer_batch in minibatch(seqs, size=buffer):
+ outer_batch = list(outer_batch)
+ target_size = next(size_)
+ for indices in _batch_by_length(outer_batch, target_size, get_length):
+ subbatch = [outer_batch[i] for i in indices]
+ padded_size = max(len(seq) for seq in subbatch) * len(subbatch)
+ if discard_oversize and padded_size >= target_size:
+ pass
+ else:
+ yield subbatch
+
+
+def minibatch_by_words(
+ seqs: Iterable[ItemT],
+ size: Sizing,
+ tolerance=0.2,
+ discard_oversize=False,
+ get_length=len,
+) -> Iterable[List[ItemT]]:
+ """Create minibatches of roughly a given number of words. If any examples
+ are longer than the specified batch length, they will appear in a batch by
+ themselves, or be discarded if discard_oversize=True.
+
+ seqs (Iterable[Sequence]): The sequences to minibatch.
+ size (int or Iterable[int]): The target number of words per batch.
+ Can be a single integer, or a sequence, allowing for variable batch sizes.
+ tolerance (float): What percentage of the size to allow batches to exceed.
+ discard_oversize (bool): Whether to discard sequences that by themselves
+ exceed the tolerated size.
+ get_length (Callable or None): Function to get the length of a sequence
+ item. The `len` function is used by default.
+ """
+ if isinstance(size, int):
+ size_ = itertools.repeat(size)
+ elif isinstance(size, List):
+ size_ = iter(size)
+ else:
+ size_ = size
+ target_size = next(size_)
+ tol_size = target_size * tolerance
+ batch = []
+ overflow = []
+ batch_size = 0
+ overflow_size = 0
+ for seq in seqs:
+ n_words = get_length(seq)
+ # if the current example exceeds the maximum batch size, it is returned separately
+ # but only if discard_oversize=False.
+ if n_words > target_size + tol_size:
+ if not discard_oversize:
+ yield [seq]
+ # add the example to the current batch if there's no overflow yet and it still fits
+ elif overflow_size == 0 and (batch_size + n_words) <= target_size:
+ batch.append(seq)
+ batch_size += n_words
+ # add the example to the overflow buffer if it fits in the tolerance margin
+ elif (batch_size + overflow_size + n_words) <= (target_size + tol_size):
+ overflow.append(seq)
+ overflow_size += n_words
+ # yield the previous batch and start a new one. The new one gets the overflow examples.
+ else:
+ if batch:
+ yield batch
+ target_size = next(size_)
+ tol_size = target_size * tolerance
+ batch = overflow
+ batch_size = overflow_size
+ overflow = []
+ overflow_size = 0
+ # this example still fits
+ if (batch_size + n_words) <= target_size:
+ batch.append(seq)
+ batch_size += n_words
+ # this example fits in overflow
+ elif (batch_size + n_words) <= (target_size + tol_size):
+ overflow.append(seq)
+ overflow_size += n_words
+ # this example does not fit with the previous overflow: start another new batch
+ else:
+ if batch:
+ yield batch
+ target_size = next(size_)
+ tol_size = target_size * tolerance
+ batch = [seq]
+ batch_size = n_words
+ batch.extend(overflow)
+ if batch:
+ yield batch
+
+
+def _batch_by_length(
+ seqs: Sequence[Any], max_words: int, get_length=len
+) -> List[List[Any]]:
+ """Given a list of sequences, return a batched list of indices into the
+ list, where the batches are grouped by length, in descending order.
+
+ Batches may be at most max_words in size, defined as max sequence length * size.
+ """
+ # Use negative index so we can get sort by position ascending.
+ lengths_indices = [(get_length(seq), i) for i, seq in enumerate(seqs)]
+ lengths_indices.sort()
+ batches = []
+ batch = []
+ for length, i in lengths_indices:
+ if not batch:
+ batch.append(i)
+ elif length * (len(batch) + 1) <= max_words:
+ batch.append(i)
+ else:
+ batches.append(batch)
+ batch = [i]
+ if batch:
+ batches.append(batch)
+ # Check lengths match
+ assert sum(len(b) for b in batches) == len(seqs)
+ batches = [list(sorted(batch)) for batch in batches]
+ batches.reverse()
+ return batches
diff --git a/spacy/gold/converters/__init__.py b/spacy/gold/converters/__init__.py
new file mode 100644
index 000000000..15f025a08
--- /dev/null
+++ b/spacy/gold/converters/__init__.py
@@ -0,0 +1,4 @@
+from .iob2docs import iob2docs # noqa: F401
+from .conll_ner2docs import conll_ner2docs # noqa: F401
+from .json2docs import json2docs # noqa: F401
+from .conllu2docs import conllu2docs # noqa: F401
diff --git a/spacy/cli/converters/conll_ner2json.py b/spacy/gold/converters/conll_ner2docs.py
similarity index 73%
rename from spacy/cli/converters/conll_ner2json.py
rename to spacy/gold/converters/conll_ner2docs.py
index 46489ad7c..c04a77f07 100644
--- a/spacy/cli/converters/conll_ner2json.py
+++ b/spacy/gold/converters/conll_ner2docs.py
@@ -1,20 +1,18 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from wasabi import Printer
+from .. import tags_to_entities
from ...gold import iob_to_biluo
from ...lang.xx import MultiLanguage
-from ...tokens.doc import Doc
+from ...tokens import Doc, Span
from ...util import load_model
-def conll_ner2json(
+def conll_ner2docs(
input_data, n_sents=10, seg_sents=False, model=None, no_print=False, **kwargs
):
"""
Convert files in the CoNLL-2003 NER format and similar
- whitespace-separated columns into JSON format for use with train cli.
+ whitespace-separated columns into Doc objects.
The first column is the tokens, the final column is the IOB tags. If an
additional second column is present, the second column is the tags.
@@ -64,9 +62,9 @@ def conll_ner2json(
# sentence segmentation required for document segmentation
if n_sents > 0 and not seg_sents:
msg.warn(
- "No sentence boundaries found to use with option `-n {}`. "
- "Use `-s` to automatically segment sentences or `-n 0` "
- "to disable.".format(n_sents)
+ f"No sentence boundaries found to use with option `-n {n_sents}`. "
+ f"Use `-s` to automatically segment sentences or `-n 0` "
+ f"to disable."
)
else:
n_sents_info(msg, n_sents)
@@ -84,43 +82,45 @@ def conll_ner2json(
"No document delimiters found. Use `-n` to automatically group "
"sentences into documents."
)
+
+ if model:
+ nlp = load_model(model)
+ else:
+ nlp = MultiLanguage()
output_docs = []
- for doc in input_data.strip().split(doc_delimiter):
- doc = doc.strip()
- if not doc:
+ for conll_doc in input_data.strip().split(doc_delimiter):
+ conll_doc = conll_doc.strip()
+ if not conll_doc:
continue
- output_doc = []
- for sent in doc.split("\n\n"):
- sent = sent.strip()
- if not sent:
+ words = []
+ sent_starts = []
+ pos_tags = []
+ biluo_tags = []
+ for conll_sent in conll_doc.split("\n\n"):
+ conll_sent = conll_sent.strip()
+ if not conll_sent:
continue
- lines = [line.strip() for line in sent.split("\n") if line.strip()]
+ lines = [line.strip() for line in conll_sent.split("\n") if line.strip()]
cols = list(zip(*[line.split() for line in lines]))
if len(cols) < 2:
raise ValueError(
"The token-per-line NER file is not formatted correctly. "
"Try checking whitespace and delimiters. See "
- "https://spacy.io/api/cli#convert"
+ "https://nightly.spacy.io/api/cli#convert"
)
- words = cols[0]
- iob_ents = cols[-1]
- if len(cols) > 2:
- tags = cols[1]
- else:
- tags = ["-"] * len(words)
- biluo_ents = iob_to_biluo(iob_ents)
- output_doc.append(
- {
- "tokens": [
- {"orth": w, "tag": tag, "ner": ent}
- for (w, tag, ent) in zip(words, tags, biluo_ents)
- ]
- }
- )
- output_docs.append(
- {"id": len(output_docs), "paragraphs": [{"sentences": output_doc}]}
- )
- output_doc = []
+ length = len(cols[0])
+ words.extend(cols[0])
+ sent_starts.extend([True] + [False] * (length - 1))
+ biluo_tags.extend(iob_to_biluo(cols[-1]))
+ pos_tags.extend(cols[1] if len(cols) > 2 else ["-"] * length)
+
+ doc = Doc(nlp.vocab, words=words)
+ for i, token in enumerate(doc):
+ token.tag_ = pos_tags[i]
+ token.is_sent_start = sent_starts[i]
+ entities = tags_to_entities(biluo_tags)
+ doc.ents = [Span(doc, start=s, end=e + 1, label=L) for L, s, e in entities]
+ output_docs.append(doc)
return output_docs
@@ -129,7 +129,7 @@ def segment_sents_and_docs(doc, n_sents, doc_delimiter, model=None, msg=None):
if model:
nlp = load_model(model)
if "parser" in nlp.pipe_names:
- msg.info("Segmenting sentences with parser from model '{}'.".format(model))
+ msg.info(f"Segmenting sentences with parser from model '{model}'.")
sentencizer = nlp.get_pipe("parser")
if not sentencizer:
msg.info(
@@ -166,7 +166,7 @@ def segment_docs(input_data, n_sents, doc_delimiter):
def n_sents_info(msg, n_sents):
- msg.info("Grouping every {} sentences into a document.".format(n_sents))
+ msg.info(f"Grouping every {n_sents} sentences into a document.")
if n_sents == 1:
msg.warn(
"To generate better training data, you may want to group "
diff --git a/spacy/gold/converters/conllu2docs.py b/spacy/gold/converters/conllu2docs.py
new file mode 100644
index 000000000..11ee86182
--- /dev/null
+++ b/spacy/gold/converters/conllu2docs.py
@@ -0,0 +1,294 @@
+import re
+
+from .conll_ner2docs import n_sents_info
+from ...gold import iob_to_biluo, spans_from_biluo_tags
+from ...tokens import Doc, Token, Span
+from ...vocab import Vocab
+from wasabi import Printer
+
+
+def conllu2docs(
+ input_data,
+ n_sents=10,
+ append_morphology=False,
+ ner_map=None,
+ merge_subtokens=False,
+ no_print=False,
+ **_
+):
+ """
+ Convert conllu files into JSON format for use with train cli.
+ append_morphology parameter enables appending morphology to tags, which is
+ useful for languages such as Spanish, where UD tags are not so rich.
+
+ Extract NER tags if available and convert them so that they follow
+ BILUO and the Wikipedia scheme
+ """
+ MISC_NER_PATTERN = "^((?:name|NE)=)?([BILU])-([A-Z_]+)|O$"
+ msg = Printer(no_print=no_print)
+ n_sents_info(msg, n_sents)
+ sent_docs = read_conllx(
+ input_data,
+ append_morphology=append_morphology,
+ ner_tag_pattern=MISC_NER_PATTERN,
+ ner_map=ner_map,
+ merge_subtokens=merge_subtokens,
+ )
+ docs = []
+ sent_docs_to_merge = []
+ for sent_doc in sent_docs:
+ sent_docs_to_merge.append(sent_doc)
+ if len(sent_docs_to_merge) % n_sents == 0:
+ docs.append(Doc.from_docs(sent_docs_to_merge))
+ sent_docs_to_merge = []
+ if sent_docs_to_merge:
+ docs.append(Doc.from_docs(sent_docs_to_merge))
+ return docs
+
+
+def has_ner(input_data, ner_tag_pattern):
+ """
+ Check the MISC column for NER tags.
+ """
+ for sent in input_data.strip().split("\n\n"):
+ lines = sent.strip().split("\n")
+ if lines:
+ while lines[0].startswith("#"):
+ lines.pop(0)
+ for line in lines:
+ parts = line.split("\t")
+ id_, word, lemma, pos, tag, morph, head, dep, _1, misc = parts
+ for misc_part in misc.split("|"):
+ if re.match(ner_tag_pattern, misc_part):
+ return True
+ return False
+
+
+def read_conllx(
+ input_data,
+ append_morphology=False,
+ merge_subtokens=False,
+ ner_tag_pattern="",
+ ner_map=None,
+):
+ """ Yield docs, one for each sentence """
+ vocab = Vocab() # need vocab to make a minimal Doc
+ for sent in input_data.strip().split("\n\n"):
+ lines = sent.strip().split("\n")
+ if lines:
+ while lines[0].startswith("#"):
+ lines.pop(0)
+ doc = doc_from_conllu_sentence(
+ vocab,
+ lines,
+ ner_tag_pattern,
+ merge_subtokens=merge_subtokens,
+ append_morphology=append_morphology,
+ ner_map=ner_map,
+ )
+ yield doc
+
+
+def get_entities(lines, tag_pattern, ner_map=None):
+ """Find entities in the MISC column according to the pattern and map to
+ final entity type with `ner_map` if mapping present. Entity tag is 'O' if
+ the pattern is not matched.
+
+ lines (str): CONLL-U lines for one sentences
+ tag_pattern (str): Regex pattern for entity tag
+ ner_map (dict): Map old NER tag names to new ones, '' maps to O.
+ RETURNS (list): List of BILUO entity tags
+ """
+ miscs = []
+ for line in lines:
+ parts = line.split("\t")
+ id_, word, lemma, pos, tag, morph, head, dep, _1, misc = parts
+ if "-" in id_ or "." in id_:
+ continue
+ miscs.append(misc)
+
+ iob = []
+ for misc in miscs:
+ iob_tag = "O"
+ for misc_part in misc.split("|"):
+ tag_match = re.match(tag_pattern, misc_part)
+ if tag_match:
+ prefix = tag_match.group(2)
+ suffix = tag_match.group(3)
+ if prefix and suffix:
+ iob_tag = prefix + "-" + suffix
+ if ner_map:
+ suffix = ner_map.get(suffix, suffix)
+ if suffix == "":
+ iob_tag = "O"
+ else:
+ iob_tag = prefix + "-" + suffix
+ break
+ iob.append(iob_tag)
+ return iob_to_biluo(iob)
+
+
+def doc_from_conllu_sentence(
+ vocab,
+ lines,
+ ner_tag_pattern,
+ merge_subtokens=False,
+ append_morphology=False,
+ ner_map=None,
+):
+ """Create an Example from the lines for one CoNLL-U sentence, merging
+ subtokens and appending morphology to tags if required.
+
+ lines (str): The non-comment lines for a CoNLL-U sentence
+ ner_tag_pattern (str): The regex pattern for matching NER in MISC col
+ RETURNS (Example): An example containing the annotation
+ """
+ # create a Doc with each subtoken as its own token
+ # if merging subtokens, each subtoken orth is the merged subtoken form
+ if not Token.has_extension("merged_orth"):
+ Token.set_extension("merged_orth", default="")
+ if not Token.has_extension("merged_lemma"):
+ Token.set_extension("merged_lemma", default="")
+ if not Token.has_extension("merged_morph"):
+ Token.set_extension("merged_morph", default="")
+ if not Token.has_extension("merged_spaceafter"):
+ Token.set_extension("merged_spaceafter", default="")
+ words, spaces, tags, poses, morphs, lemmas = [], [], [], [], [], []
+ heads, deps = [], []
+ subtok_word = ""
+ in_subtok = False
+ for i in range(len(lines)):
+ line = lines[i]
+ parts = line.split("\t")
+ id_, word, lemma, pos, tag, morph, head, dep, _1, misc = parts
+ if "." in id_:
+ continue
+ if "-" in id_:
+ in_subtok = True
+ if "-" in id_:
+ in_subtok = True
+ subtok_word = word
+ subtok_start, subtok_end = id_.split("-")
+ subtok_spaceafter = "SpaceAfter=No" not in misc
+ continue
+ if merge_subtokens and in_subtok:
+ words.append(subtok_word)
+ else:
+ words.append(word)
+ if in_subtok:
+ if id_ == subtok_end:
+ spaces.append(subtok_spaceafter)
+ else:
+ spaces.append(False)
+ elif "SpaceAfter=No" in misc:
+ spaces.append(False)
+ else:
+ spaces.append(True)
+ if in_subtok and id_ == subtok_end:
+ subtok_word = ""
+ in_subtok = False
+ id_ = int(id_) - 1
+ head = (int(head) - 1) if head not in ("0", "_") else id_
+ tag = pos if tag == "_" else tag
+ morph = morph if morph != "_" else ""
+ dep = "ROOT" if dep == "root" else dep
+ lemmas.append(lemma)
+ poses.append(pos)
+ tags.append(tag)
+ morphs.append(morph)
+ heads.append(head)
+ deps.append(dep)
+
+ doc = Doc(vocab, words=words, spaces=spaces)
+ for i in range(len(doc)):
+ doc[i].tag_ = tags[i]
+ doc[i].pos_ = poses[i]
+ doc[i].dep_ = deps[i]
+ doc[i].lemma_ = lemmas[i]
+ doc[i].head = doc[heads[i]]
+ doc[i]._.merged_orth = words[i]
+ doc[i]._.merged_morph = morphs[i]
+ doc[i]._.merged_lemma = lemmas[i]
+ doc[i]._.merged_spaceafter = spaces[i]
+ ents = get_entities(lines, ner_tag_pattern, ner_map)
+ doc.ents = spans_from_biluo_tags(doc, ents)
+ doc.is_parsed = True
+ doc.is_tagged = True
+
+ if merge_subtokens:
+ doc = merge_conllu_subtokens(lines, doc)
+
+ # create final Doc from custom Doc annotation
+ words, spaces, tags, morphs, lemmas, poses = [], [], [], [], [], []
+ heads, deps = [], []
+ for i, t in enumerate(doc):
+ words.append(t._.merged_orth)
+ lemmas.append(t._.merged_lemma)
+ spaces.append(t._.merged_spaceafter)
+ morphs.append(t._.merged_morph)
+ if append_morphology and t._.merged_morph:
+ tags.append(t.tag_ + "__" + t._.merged_morph)
+ else:
+ tags.append(t.tag_)
+ poses.append(t.pos_)
+ heads.append(t.head.i)
+ deps.append(t.dep_)
+
+ doc_x = Doc(vocab, words=words, spaces=spaces)
+ for i in range(len(doc)):
+ doc_x[i].tag_ = tags[i]
+ doc_x[i].morph_ = morphs[i]
+ doc_x[i].lemma_ = lemmas[i]
+ doc_x[i].pos_ = poses[i]
+ doc_x[i].dep_ = deps[i]
+ doc_x[i].head = doc_x[heads[i]]
+ doc_x.ents = [Span(doc_x, ent.start, ent.end, label=ent.label) for ent in doc.ents]
+ doc_x.is_parsed = True
+ doc_x.is_tagged = True
+
+ return doc_x
+
+
+def merge_conllu_subtokens(lines, doc):
+ # identify and process all subtoken spans to prepare attrs for merging
+ subtok_spans = []
+ for line in lines:
+ parts = line.split("\t")
+ id_, word, lemma, pos, tag, morph, head, dep, _1, misc = parts
+ if "-" in id_:
+ subtok_start, subtok_end = id_.split("-")
+ subtok_span = doc[int(subtok_start) - 1 : int(subtok_end)]
+ subtok_spans.append(subtok_span)
+ # create merged tag, morph, and lemma values
+ tags = []
+ morphs = {}
+ lemmas = []
+ for token in subtok_span:
+ tags.append(token.tag_)
+ lemmas.append(token.lemma_)
+ if token._.merged_morph:
+ for feature in token._.merged_morph.split("|"):
+ field, values = feature.split("=", 1)
+ if field not in morphs:
+ morphs[field] = set()
+ for value in values.split(","):
+ morphs[field].add(value)
+ # create merged features for each morph field
+ for field, values in morphs.items():
+ morphs[field] = field + "=" + ",".join(sorted(values))
+ # set the same attrs on all subtok tokens so that whatever head the
+ # retokenizer chooses, the final attrs are available on that token
+ for token in subtok_span:
+ token._.merged_orth = token.orth_
+ token._.merged_lemma = " ".join(lemmas)
+ token.tag_ = "_".join(tags)
+ token._.merged_morph = "|".join(sorted(morphs.values()))
+ token._.merged_spaceafter = (
+ True if subtok_span[-1].whitespace_ else False
+ )
+
+ with doc.retokenize() as retokenizer:
+ for span in subtok_spans:
+ retokenizer.merge(span)
+
+ return doc
diff --git a/spacy/gold/converters/iob2docs.py b/spacy/gold/converters/iob2docs.py
new file mode 100644
index 000000000..eebf1266b
--- /dev/null
+++ b/spacy/gold/converters/iob2docs.py
@@ -0,0 +1,64 @@
+from wasabi import Printer
+
+from .conll_ner2docs import n_sents_info
+from ...gold import iob_to_biluo, tags_to_entities
+from ...tokens import Doc, Span
+from ...util import minibatch
+
+
+def iob2docs(input_data, vocab, n_sents=10, no_print=False, *args, **kwargs):
+ """
+ Convert IOB files with one sentence per line and tags separated with '|'
+ into Doc objects so they can be saved. IOB and IOB2 are accepted.
+
+ Sample formats:
+
+ I|O like|O London|I-GPE and|O New|B-GPE York|I-GPE City|I-GPE .|O
+ I|O like|O London|B-GPE and|O New|B-GPE York|I-GPE City|I-GPE .|O
+ I|PRP|O like|VBP|O London|NNP|I-GPE and|CC|O New|NNP|B-GPE York|NNP|I-GPE City|NNP|I-GPE .|.|O
+ I|PRP|O like|VBP|O London|NNP|B-GPE and|CC|O New|NNP|B-GPE York|NNP|I-GPE City|NNP|I-GPE .|.|O
+ """
+ msg = Printer(no_print=no_print)
+ if n_sents > 0:
+ n_sents_info(msg, n_sents)
+ docs = read_iob(input_data.split("\n"), vocab, n_sents)
+ return docs
+
+
+def read_iob(raw_sents, vocab, n_sents):
+ docs = []
+ for group in minibatch(raw_sents, size=n_sents):
+ tokens = []
+ words = []
+ tags = []
+ iob = []
+ sent_starts = []
+ for line in group:
+ if not line.strip():
+ continue
+ sent_tokens = [t.split("|") for t in line.split()]
+ if len(sent_tokens[0]) == 3:
+ sent_words, sent_tags, sent_iob = zip(*sent_tokens)
+ elif len(sent_tokens[0]) == 2:
+ sent_words, sent_iob = zip(*sent_tokens)
+ sent_tags = ["-"] * len(sent_words)
+ else:
+ raise ValueError(
+ "The sentence-per-line IOB/IOB2 file is not formatted correctly. Try checking whitespace and delimiters. See https://nightly.spacy.io/api/cli#convert"
+ )
+ words.extend(sent_words)
+ tags.extend(sent_tags)
+ iob.extend(sent_iob)
+ tokens.extend(sent_tokens)
+ sent_starts.append(True)
+ sent_starts.extend([False for _ in sent_words[1:]])
+ doc = Doc(vocab, words=words)
+ for i, tag in enumerate(tags):
+ doc[i].tag_ = tag
+ for i, sent_start in enumerate(sent_starts):
+ doc[i].is_sent_start = sent_start
+ biluo = iob_to_biluo(iob)
+ entities = tags_to_entities(biluo)
+ doc.ents = [Span(doc, start=s, end=e + 1, label=L) for (L, s, e) in entities]
+ docs.append(doc)
+ return docs
diff --git a/spacy/gold/converters/json2docs.py b/spacy/gold/converters/json2docs.py
new file mode 100644
index 000000000..342f94848
--- /dev/null
+++ b/spacy/gold/converters/json2docs.py
@@ -0,0 +1,22 @@
+import srsly
+from ..gold_io import json_iterate, json_to_annotations
+from ..example import annotations2doc
+from ..example import _fix_legacy_dict_data, _parse_example_dict_data
+from ...util import load_model
+from ...lang.xx import MultiLanguage
+
+
+def json2docs(input_data, model=None, **kwargs):
+ nlp = load_model(model) if model is not None else MultiLanguage()
+ if not isinstance(input_data, bytes):
+ if not isinstance(input_data, str):
+ input_data = srsly.json_dumps(input_data)
+ input_data = input_data.encode("utf8")
+ docs = []
+ for json_doc in json_iterate(input_data):
+ for json_para in json_to_annotations(json_doc):
+ example_dict = _fix_legacy_dict_data(json_para)
+ tok_dict, doc_dict = _parse_example_dict_data(example_dict)
+ doc = annotations2doc(nlp.vocab, tok_dict, doc_dict)
+ docs.append(doc)
+ return docs
diff --git a/spacy/gold/corpus.py b/spacy/gold/corpus.py
new file mode 100644
index 000000000..545f01eaa
--- /dev/null
+++ b/spacy/gold/corpus.py
@@ -0,0 +1,153 @@
+import warnings
+from typing import Union, List, Iterable, Iterator, TYPE_CHECKING, Callable
+from pathlib import Path
+
+from .. import util
+from .example import Example
+from ..errors import Warnings
+from ..tokens import DocBin, Doc
+from ..vocab import Vocab
+
+if TYPE_CHECKING:
+ # This lets us add type hints for mypy etc. without causing circular imports
+ from ..language import Language # noqa: F401
+
+FILE_TYPE = ".spacy"
+
+
+@util.registry.readers("spacy.Corpus.v1")
+def create_docbin_reader(
+ path: Path, gold_preproc: bool, max_length: int = 0, limit: int = 0
+) -> Callable[["Language"], Iterable[Example]]:
+ return Corpus(path, gold_preproc=gold_preproc, max_length=max_length, limit=limit)
+
+
+class Corpus:
+ """Iterate Example objects from a file or directory of DocBin (.spacy)
+ formatted data files.
+
+ path (Path): The directory or filename to read from.
+ gold_preproc (bool): Whether to set up the Example object with gold-standard
+ sentences and tokens for the predictions. Gold preprocessing helps
+ the annotations align to the tokenization, and may result in sequences
+ of more consistent length. However, it may reduce run-time accuracy due
+ to train/test skew. Defaults to False.
+ max_length (int): Maximum document length. Longer documents will be
+ split into sentences, if sentence boundaries are available. Defaults to
+ 0, which indicates no limit.
+ limit (int): Limit corpus to a subset of examples, e.g. for debugging.
+ Defaults to 0, which indicates no limit.
+
+ DOCS: https://nightly.spacy.io/api/corpus
+ """
+
+ def __init__(
+ self,
+ path: Union[str, Path],
+ *,
+ limit: int = 0,
+ gold_preproc: bool = False,
+ max_length: bool = False,
+ ) -> None:
+ self.path = util.ensure_path(path)
+ self.gold_preproc = gold_preproc
+ self.max_length = max_length
+ self.limit = limit
+
+ @staticmethod
+ def walk_corpus(path: Union[str, Path]) -> List[Path]:
+ path = util.ensure_path(path)
+ if not path.is_dir() and path.parts[-1].endswith(FILE_TYPE):
+ return [path]
+ orig_path = path
+ paths = [path]
+ locs = []
+ seen = set()
+ for path in paths:
+ if str(path) in seen:
+ continue
+ seen.add(str(path))
+ if path.parts and path.parts[-1].startswith("."):
+ continue
+ elif path.is_dir():
+ paths.extend(path.iterdir())
+ elif path.parts[-1].endswith(FILE_TYPE):
+ locs.append(path)
+ if len(locs) == 0:
+ warnings.warn(Warnings.W090.format(path=orig_path))
+ return locs
+
+ def __call__(self, nlp: "Language") -> Iterator[Example]:
+ """Yield examples from the data.
+
+ nlp (Language): The current nlp object.
+ YIELDS (Example): The examples.
+
+ DOCS: https://nightly.spacy.io/api/corpus#call
+ """
+ ref_docs = self.read_docbin(nlp.vocab, self.walk_corpus(self.path))
+ if self.gold_preproc:
+ examples = self.make_examples_gold_preproc(nlp, ref_docs)
+ else:
+ examples = self.make_examples(nlp, ref_docs, self.max_length)
+ yield from examples
+
+ def _make_example(
+ self, nlp: "Language", reference: Doc, gold_preproc: bool
+ ) -> Example:
+ if gold_preproc or reference.has_unknown_spaces:
+ return Example(
+ Doc(
+ nlp.vocab,
+ words=[word.text for word in reference],
+ spaces=[bool(word.whitespace_) for word in reference],
+ ),
+ reference,
+ )
+ else:
+ return Example(nlp.make_doc(reference.text), reference)
+
+ def make_examples(
+ self, nlp: "Language", reference_docs: Iterable[Doc], max_length: int = 0
+ ) -> Iterator[Example]:
+ for reference in reference_docs:
+ if len(reference) == 0:
+ continue
+ elif max_length == 0 or len(reference) < max_length:
+ yield self._make_example(nlp, reference, False)
+ elif reference.is_sentenced:
+ for ref_sent in reference.sents:
+ if len(ref_sent) == 0:
+ continue
+ elif max_length == 0 or len(ref_sent) < max_length:
+ yield self._make_example(nlp, ref_sent.as_doc(), False)
+
+ def make_examples_gold_preproc(
+ self, nlp: "Language", reference_docs: Iterable[Doc]
+ ) -> Iterator[Example]:
+ for reference in reference_docs:
+ if reference.is_sentenced:
+ ref_sents = [sent.as_doc() for sent in reference.sents]
+ else:
+ ref_sents = [reference]
+ for ref_sent in ref_sents:
+ eg = self._make_example(nlp, ref_sent, True)
+ if len(eg.x):
+ yield eg
+
+ def read_docbin(
+ self, vocab: Vocab, locs: Iterable[Union[str, Path]]
+ ) -> Iterator[Doc]:
+ """ Yield training examples as example dicts """
+ i = 0
+ for loc in locs:
+ loc = util.ensure_path(loc)
+ if loc.parts[-1].endswith(FILE_TYPE):
+ doc_bin = DocBin().from_disk(loc)
+ docs = doc_bin.get_docs(vocab)
+ for doc in docs:
+ if len(doc):
+ yield doc
+ i += 1
+ if self.limit >= 1 and i >= self.limit:
+ break
diff --git a/spacy/gold/example.pxd b/spacy/gold/example.pxd
new file mode 100644
index 000000000..e06e36287
--- /dev/null
+++ b/spacy/gold/example.pxd
@@ -0,0 +1,9 @@
+from ..tokens.doc cimport Doc
+
+
+cdef class Example:
+ cdef readonly Doc x
+ cdef readonly Doc y
+ cdef readonly object _cached_alignment
+ cdef readonly object _cached_words_x
+ cdef readonly object _cached_words_y
diff --git a/spacy/gold/example.pyx b/spacy/gold/example.pyx
new file mode 100644
index 000000000..3344704bf
--- /dev/null
+++ b/spacy/gold/example.pyx
@@ -0,0 +1,451 @@
+from collections import Iterable as IterableInstance
+import warnings
+import numpy
+
+from ..tokens.doc cimport Doc
+from ..tokens.span cimport Span
+from ..tokens.span import Span
+from ..attrs import IDS
+from .align import Alignment
+from .iob_utils import biluo_to_iob, biluo_tags_from_offsets, biluo_tags_from_doc
+from .iob_utils import spans_from_biluo_tags
+from ..errors import Errors, Warnings
+from ..pipeline._parser_internals import nonproj
+
+
+cpdef Doc annotations2doc(vocab, tok_annot, doc_annot):
+ """ Create a Doc from dictionaries with token and doc annotations. """
+ attrs, array = _annot2array(vocab, tok_annot, doc_annot)
+ output = Doc(vocab, words=tok_annot["ORTH"], spaces=tok_annot["SPACY"])
+ if "entities" in doc_annot:
+ _add_entities_to_doc(output, doc_annot["entities"])
+ if array.size:
+ output = output.from_array(attrs, array)
+ # links are currently added with ENT_KB_ID on the token level
+ output.cats.update(doc_annot.get("cats", {}))
+ return output
+
+
+def validate_examples(examples, method):
+ """Check that a batch of examples received during processing is valid.
+ This function lives here to prevent circular imports.
+
+ examples (Iterable[Examples]): A batch of examples.
+ method (str): The method name to show in error messages.
+ """
+ if not isinstance(examples, IterableInstance):
+ err = Errors.E978.format(name=method, types=type(examples))
+ raise TypeError(err)
+ wrong = set([type(eg) for eg in examples if not isinstance(eg, Example)])
+ if wrong:
+ err = Errors.E978.format(name=method, types=wrong)
+ raise TypeError(err)
+
+
+cdef class Example:
+ def __init__(self, Doc predicted, Doc reference, *, alignment=None):
+ if predicted is None:
+ raise TypeError(Errors.E972.format(arg="predicted"))
+ if reference is None:
+ raise TypeError(Errors.E972.format(arg="reference"))
+ self.predicted = predicted
+ self.reference = reference
+ self._cached_alignment = alignment
+
+ def __len__(self):
+ return len(self.predicted)
+
+ property predicted:
+ def __get__(self):
+ return self.x
+
+ def __set__(self, doc):
+ self.x = doc
+ self._cached_alignment = None
+ self._cached_words_x = [t.text for t in doc]
+
+ property reference:
+ def __get__(self):
+ return self.y
+
+ def __set__(self, doc):
+ self.y = doc
+ self._cached_alignment = None
+ self._cached_words_y = [t.text for t in doc]
+
+ def copy(self):
+ return Example(
+ self.x.copy(),
+ self.y.copy()
+ )
+
+ @classmethod
+ def from_dict(cls, Doc predicted, dict example_dict):
+ if predicted is None:
+ raise ValueError(Errors.E976.format(n="first", type="Doc"))
+ if example_dict is None:
+ raise ValueError(Errors.E976.format(n="second", type="dict"))
+ example_dict = _fix_legacy_dict_data(example_dict)
+ tok_dict, doc_dict = _parse_example_dict_data(example_dict)
+ if "ORTH" not in tok_dict:
+ tok_dict["ORTH"] = [tok.text for tok in predicted]
+ tok_dict["SPACY"] = [tok.whitespace_ for tok in predicted]
+ return Example(
+ predicted,
+ annotations2doc(predicted.vocab, tok_dict, doc_dict)
+ )
+
+ @property
+ def alignment(self):
+ words_x = [token.text for token in self.x]
+ words_y = [token.text for token in self.y]
+ if self._cached_alignment is None or \
+ words_x != self._cached_words_x or \
+ words_y != self._cached_words_y:
+ self._cached_alignment = Alignment.from_strings(words_x, words_y)
+ self._cached_words_x = words_x
+ self._cached_words_y = words_y
+ return self._cached_alignment
+
+ def get_aligned(self, field, as_string=False):
+ """Return an aligned array for a token attribute."""
+ align = self.alignment.x2y
+
+ vocab = self.reference.vocab
+ gold_values = self.reference.to_array([field])
+ output = [None] * len(self.predicted)
+ for token in self.predicted:
+ if token.is_space:
+ output[token.i] = None
+ else:
+ values = gold_values[align[token.i].dataXd]
+ values = values.ravel()
+ if len(values) == 0:
+ output[token.i] = None
+ elif len(values) == 1:
+ output[token.i] = values[0]
+ elif len(set(list(values))) == 1:
+ # If all aligned tokens have the same value, use it.
+ output[token.i] = values[0]
+ else:
+ output[token.i] = None
+ if as_string and field not in ["ENT_IOB", "SENT_START"]:
+ output = [vocab.strings[o] if o is not None else o for o in output]
+ return output
+
+ def get_aligned_parse(self, projectivize=True):
+ cand_to_gold = self.alignment.x2y
+ gold_to_cand = self.alignment.y2x
+ aligned_heads = [None] * self.x.length
+ aligned_deps = [None] * self.x.length
+ heads = [token.head.i for token in self.y]
+ deps = [token.dep_ for token in self.y]
+ if projectivize:
+ heads, deps = nonproj.projectivize(heads, deps)
+ for cand_i in range(self.x.length):
+ if cand_to_gold.lengths[cand_i] == 1:
+ gold_i = cand_to_gold[cand_i].dataXd[0, 0]
+ if gold_to_cand.lengths[heads[gold_i]] == 1:
+ aligned_heads[cand_i] = int(gold_to_cand[heads[gold_i]].dataXd[0, 0])
+ aligned_deps[cand_i] = deps[gold_i]
+ return aligned_heads, aligned_deps
+
+ def get_aligned_spans_x2y(self, x_spans):
+ return self._get_aligned_spans(self.y, x_spans, self.alignment.x2y)
+
+ def get_aligned_spans_y2x(self, y_spans):
+ return self._get_aligned_spans(self.x, y_spans, self.alignment.y2x)
+
+ def _get_aligned_spans(self, doc, spans, align):
+ seen = set()
+ output = []
+ for span in spans:
+ indices = align[span.start : span.end].data.ravel()
+ indices = [idx for idx in indices if idx not in seen]
+ if len(indices) >= 1:
+ aligned_span = Span(doc, indices[0], indices[-1] + 1, label=span.label)
+ target_text = span.text.lower().strip().replace(" ", "")
+ our_text = aligned_span.text.lower().strip().replace(" ", "")
+ if our_text == target_text:
+ output.append(aligned_span)
+ seen.update(indices)
+ return output
+
+ def get_aligned_ner(self):
+ if not self.y.is_nered:
+ return [None] * len(self.x) # should this be 'missing' instead of 'None' ?
+ x_ents = self.get_aligned_spans_y2x(self.y.ents)
+ # Default to 'None' for missing values
+ x_tags = biluo_tags_from_offsets(
+ self.x,
+ [(e.start_char, e.end_char, e.label_) for e in x_ents],
+ missing=None
+ )
+ # Now fill the tokens we can align to O.
+ O = 2 # I=1, O=2, B=3
+ for i, ent_iob in enumerate(self.get_aligned("ENT_IOB")):
+ if x_tags[i] is None:
+ if ent_iob == O:
+ x_tags[i] = "O"
+ elif self.x[i].is_space:
+ x_tags[i] = "O"
+ return x_tags
+
+ def to_dict(self):
+ return {
+ "doc_annotation": {
+ "cats": dict(self.reference.cats),
+ "entities": biluo_tags_from_doc(self.reference),
+ "links": self._links_to_dict()
+ },
+ "token_annotation": {
+ "ORTH": [t.text for t in self.reference],
+ "SPACY": [bool(t.whitespace_) for t in self.reference],
+ "TAG": [t.tag_ for t in self.reference],
+ "LEMMA": [t.lemma_ for t in self.reference],
+ "POS": [t.pos_ for t in self.reference],
+ "MORPH": [t.morph_ for t in self.reference],
+ "HEAD": [t.head.i for t in self.reference],
+ "DEP": [t.dep_ for t in self.reference],
+ "SENT_START": [int(bool(t.is_sent_start)) for t in self.reference]
+ }
+ }
+
+ def _links_to_dict(self):
+ links = {}
+ for ent in self.reference.ents:
+ if ent.kb_id_:
+ links[(ent.start_char, ent.end_char)] = {ent.kb_id_: 1.0}
+ return links
+
+ def split_sents(self):
+ """ Split the token annotations into multiple Examples based on
+ sent_starts and return a list of the new Examples"""
+ if not self.reference.is_sentenced:
+ return [self]
+
+ align = self.alignment.y2x
+ seen_indices = set()
+ output = []
+ for y_sent in self.reference.sents:
+ indices = align[y_sent.start : y_sent.end].data.ravel()
+ indices = [idx for idx in indices if idx not in seen_indices]
+ if indices:
+ x_sent = self.predicted[indices[0] : indices[-1] + 1]
+ output.append(Example(x_sent.as_doc(), y_sent.as_doc()))
+ seen_indices.update(indices)
+ return output
+
+ property text:
+ def __get__(self):
+ return self.x.text
+
+ def __str__(self):
+ return str(self.to_dict())
+
+ def __repr__(self):
+ return str(self.to_dict())
+
+
+def _annot2array(vocab, tok_annot, doc_annot):
+ attrs = []
+ values = []
+
+ for key, value in doc_annot.items():
+ if value:
+ if key == "entities":
+ pass
+ elif key == "links":
+ ent_kb_ids = _parse_links(vocab, tok_annot["ORTH"], tok_annot["SPACY"], value)
+ tok_annot["ENT_KB_ID"] = ent_kb_ids
+ elif key == "cats":
+ pass
+ else:
+ raise ValueError(Errors.E974.format(obj="doc", key=key))
+
+ for key, value in tok_annot.items():
+ if key not in IDS:
+ raise ValueError(Errors.E974.format(obj="token", key=key))
+ elif key in ["ORTH", "SPACY"]:
+ pass
+ elif key == "HEAD":
+ attrs.append(key)
+ values.append([h-i for i, h in enumerate(value)])
+ elif key == "SENT_START":
+ attrs.append(key)
+ values.append(value)
+ elif key == "MORPH":
+ attrs.append(key)
+ values.append([vocab.morphology.add(v) for v in value])
+ else:
+ attrs.append(key)
+ if not all(isinstance(v, str) for v in value):
+ types = set([type(v) for v in value])
+ raise TypeError(Errors.E969.format(field=key, types=types)) from None
+ values.append([vocab.strings.add(v) for v in value])
+ array = numpy.asarray(values, dtype="uint64")
+ return attrs, array.T
+
+
+def _add_entities_to_doc(doc, ner_data):
+ if ner_data is None:
+ return
+ elif ner_data == []:
+ doc.ents = []
+ elif isinstance(ner_data[0], tuple):
+ return _add_entities_to_doc(
+ doc,
+ biluo_tags_from_offsets(doc, ner_data)
+ )
+ elif isinstance(ner_data[0], str) or ner_data[0] is None:
+ return _add_entities_to_doc(
+ doc,
+ spans_from_biluo_tags(doc, ner_data)
+ )
+ elif isinstance(ner_data[0], Span):
+ # Ugh, this is super messy. Really hard to set O entities
+ doc.ents = ner_data
+ doc.ents = [span for span in ner_data if span.label_]
+ else:
+ raise ValueError(Errors.E973)
+
+
+def _parse_example_dict_data(example_dict):
+ return (
+ example_dict["token_annotation"],
+ example_dict["doc_annotation"]
+ )
+
+
+def _fix_legacy_dict_data(example_dict):
+ token_dict = example_dict.get("token_annotation", {})
+ doc_dict = example_dict.get("doc_annotation", {})
+ for key, value in example_dict.items():
+ if value:
+ if key in ("token_annotation", "doc_annotation"):
+ pass
+ elif key == "ids":
+ pass
+ elif key in ("cats", "links"):
+ doc_dict[key] = value
+ elif key in ("ner", "entities"):
+ doc_dict["entities"] = value
+ else:
+ token_dict[key] = value
+ # Remap keys
+ remapping = {
+ "words": "ORTH",
+ "tags": "TAG",
+ "pos": "POS",
+ "lemmas": "LEMMA",
+ "deps": "DEP",
+ "heads": "HEAD",
+ "sent_starts": "SENT_START",
+ "morphs": "MORPH",
+ "spaces": "SPACY",
+ }
+ old_token_dict = token_dict
+ token_dict = {}
+ for key, value in old_token_dict.items():
+ if key in ("text", "ids", "brackets"):
+ pass
+ elif key in remapping.values():
+ token_dict[key] = value
+ elif key.lower() in remapping:
+ token_dict[remapping[key.lower()]] = value
+ else:
+ all_keys = set(remapping.values())
+ all_keys.update(remapping.keys())
+ raise KeyError(Errors.E983.format(key=key, dict="token_annotation", keys=all_keys))
+ text = example_dict.get("text", example_dict.get("raw"))
+ if _has_field(token_dict, "ORTH") and not _has_field(token_dict, "SPACY"):
+ token_dict["SPACY"] = _guess_spaces(text, token_dict["ORTH"])
+ if "HEAD" in token_dict and "SENT_START" in token_dict:
+ # If heads are set, we don't also redundantly specify SENT_START.
+ token_dict.pop("SENT_START")
+ warnings.warn(Warnings.W092)
+ return {
+ "token_annotation": token_dict,
+ "doc_annotation": doc_dict
+ }
+
+def _has_field(annot, field):
+ if field not in annot:
+ return False
+ elif annot[field] is None:
+ return False
+ elif len(annot[field]) == 0:
+ return False
+ elif all([value is None for value in annot[field]]):
+ return False
+ else:
+ return True
+
+
+def _parse_ner_tags(biluo_or_offsets, vocab, words, spaces):
+ if isinstance(biluo_or_offsets[0], (list, tuple)):
+ # Convert to biluo if necessary
+ # This is annoying but to convert the offsets we need a Doc
+ # that has the target tokenization.
+ reference = Doc(vocab, words=words, spaces=spaces)
+ biluo = biluo_tags_from_offsets(reference, biluo_or_offsets)
+ else:
+ biluo = biluo_or_offsets
+ ent_iobs = []
+ ent_types = []
+ for iob_tag in biluo_to_iob(biluo):
+ if iob_tag in (None, "-"):
+ ent_iobs.append("")
+ ent_types.append("")
+ else:
+ ent_iobs.append(iob_tag.split("-")[0])
+ if iob_tag.startswith("I") or iob_tag.startswith("B"):
+ ent_types.append(iob_tag.split("-", 1)[1])
+ else:
+ ent_types.append("")
+ return ent_iobs, ent_types
+
+def _parse_links(vocab, words, spaces, links):
+ reference = Doc(vocab, words=words, spaces=spaces)
+ starts = {token.idx: token.i for token in reference}
+ ends = {token.idx + len(token): token.i for token in reference}
+ ent_kb_ids = ["" for _ in reference]
+
+ for index, annot_dict in links.items():
+ true_kb_ids = []
+ for key, value in annot_dict.items():
+ if value == 1.0:
+ true_kb_ids.append(key)
+ if len(true_kb_ids) > 1:
+ raise ValueError(Errors.E980)
+
+ if len(true_kb_ids) == 1:
+ start_char, end_char = index
+ start_token = starts.get(start_char)
+ end_token = ends.get(end_char)
+ if start_token is None or end_token is None:
+ raise ValueError(Errors.E981)
+ for i in range(start_token, end_token+1):
+ ent_kb_ids[i] = true_kb_ids[0]
+
+ return ent_kb_ids
+
+
+def _guess_spaces(text, words):
+ if text is None:
+ return None
+ spaces = []
+ text_pos = 0
+ # align words with text
+ for word in words:
+ try:
+ word_start = text[text_pos:].index(word)
+ except ValueError:
+ spaces.append(True)
+ continue
+ text_pos += word_start + len(word)
+ if text_pos < len(text) and text[text_pos] == " ":
+ spaces.append(True)
+ else:
+ spaces.append(False)
+ return spaces
diff --git a/spacy/gold/gold_io.pyx b/spacy/gold/gold_io.pyx
new file mode 100644
index 000000000..5dc39eb31
--- /dev/null
+++ b/spacy/gold/gold_io.pyx
@@ -0,0 +1,201 @@
+import warnings
+import srsly
+from .. import util
+from ..errors import Warnings
+from ..tokens import Doc
+from .iob_utils import biluo_tags_from_offsets, tags_to_entities
+import json
+
+
+def docs_to_json(docs, doc_id=0, ner_missing_tag="O"):
+ """Convert a list of Doc objects into the JSON-serializable format used by
+ the spacy train command.
+
+ docs (iterable / Doc): The Doc object(s) to convert.
+ doc_id (int): Id for the JSON.
+ RETURNS (dict): The data in spaCy's JSON format
+ - each input doc will be treated as a paragraph in the output doc
+ """
+ if isinstance(docs, Doc):
+ docs = [docs]
+ json_doc = {"id": doc_id, "paragraphs": []}
+ for i, doc in enumerate(docs):
+ json_para = {'raw': doc.text, "sentences": [], "cats": [], "entities": [], "links": []}
+ for cat, val in doc.cats.items():
+ json_cat = {"label": cat, "value": val}
+ json_para["cats"].append(json_cat)
+ # warning: entities information is currently duplicated as
+ # doc-level "entities" and token-level "ner"
+ for ent in doc.ents:
+ ent_tuple = (ent.start_char, ent.end_char, ent.label_)
+ json_para["entities"].append(ent_tuple)
+ if ent.kb_id_:
+ link_dict = {(ent.start_char, ent.end_char): {ent.kb_id_: 1.0}}
+ json_para["links"].append(link_dict)
+ biluo_tags = biluo_tags_from_offsets(doc, json_para["entities"], missing=ner_missing_tag)
+ for j, sent in enumerate(doc.sents):
+ json_sent = {"tokens": [], "brackets": []}
+ for token in sent:
+ json_token = {"id": token.i, "orth": token.text, "space": token.whitespace_}
+ if doc.is_tagged:
+ json_token["tag"] = token.tag_
+ json_token["pos"] = token.pos_
+ json_token["morph"] = token.morph_
+ json_token["lemma"] = token.lemma_
+ if doc.is_parsed:
+ json_token["head"] = token.head.i-token.i
+ json_token["dep"] = token.dep_
+ json_token["ner"] = biluo_tags[token.i]
+ json_sent["tokens"].append(json_token)
+ json_para["sentences"].append(json_sent)
+ json_doc["paragraphs"].append(json_para)
+ return json_doc
+
+
+def read_json_file(loc, docs_filter=None, limit=None):
+ """Read Example dictionaries from a json file or directory."""
+ loc = util.ensure_path(loc)
+ if loc.is_dir():
+ for filename in loc.iterdir():
+ yield from read_json_file(loc / filename, limit=limit)
+ else:
+ with loc.open("rb") as file_:
+ utf8_str = file_.read()
+ for json_doc in json_iterate(utf8_str):
+ if docs_filter is not None and not docs_filter(json_doc):
+ continue
+ for json_paragraph in json_to_annotations(json_doc):
+ yield json_paragraph
+
+
+def json_to_annotations(doc):
+ """Convert an item in the JSON-formatted training data to the format
+ used by Example.
+
+ doc (dict): One entry in the training data.
+ YIELDS (tuple): The reformatted data - one training example per paragraph
+ """
+ for paragraph in doc["paragraphs"]:
+ example = {"text": paragraph.get("raw", None)}
+ words = []
+ spaces = []
+ ids = []
+ tags = []
+ ner_tags = []
+ pos = []
+ morphs = []
+ lemmas = []
+ heads = []
+ labels = []
+ sent_starts = []
+ brackets = []
+ for sent in paragraph["sentences"]:
+ sent_start_i = len(words)
+ for i, token in enumerate(sent["tokens"]):
+ words.append(token["orth"])
+ spaces.append(token.get("space", None))
+ ids.append(token.get('id', sent_start_i + i))
+ tags.append(token.get("tag", None))
+ pos.append(token.get("pos", None))
+ morphs.append(token.get("morph", None))
+ lemmas.append(token.get("lemma", None))
+ if "head" in token:
+ heads.append(token["head"] + sent_start_i + i)
+ else:
+ heads.append(None)
+ if "dep" in token:
+ labels.append(token["dep"])
+ # Ensure ROOT label is case-insensitive
+ if labels[-1].lower() == "root":
+ labels[-1] = "ROOT"
+ else:
+ labels.append(None)
+ ner_tags.append(token.get("ner", None))
+ if i == 0:
+ sent_starts.append(1)
+ else:
+ sent_starts.append(0)
+ if "brackets" in sent:
+ brackets.extend((b["first"] + sent_start_i,
+ b["last"] + sent_start_i, b["label"])
+ for b in sent["brackets"])
+
+ example["token_annotation"] = dict(
+ ids=ids,
+ words=words,
+ spaces=spaces,
+ sent_starts=sent_starts,
+ brackets=brackets
+ )
+ # avoid including dummy values that looks like gold info was present
+ if any(tags):
+ example["token_annotation"]["tags"] = tags
+ if any(pos):
+ example["token_annotation"]["pos"] = pos
+ if any(morphs):
+ example["token_annotation"]["morphs"] = morphs
+ if any(lemmas):
+ example["token_annotation"]["lemmas"] = lemmas
+ if any(head is not None for head in heads):
+ example["token_annotation"]["heads"] = heads
+ if any(labels):
+ example["token_annotation"]["deps"] = labels
+
+ cats = {}
+ for cat in paragraph.get("cats", {}):
+ cats[cat["label"]] = cat["value"]
+ example["doc_annotation"] = dict(
+ cats=cats,
+ entities=ner_tags,
+ links=paragraph.get("links", [])
+ )
+ yield example
+
+def json_iterate(bytes utf8_str):
+ # We should've made these files jsonl...But since we didn't, parse out
+ # the docs one-by-one to reduce memory usage.
+ # It's okay to read in the whole file -- just don't parse it into JSON.
+ cdef long file_length = len(utf8_str)
+ if file_length > 2 ** 30:
+ warnings.warn(Warnings.W027.format(size=file_length))
+
+ raw = utf8_str
+ cdef int square_depth = 0
+ cdef int curly_depth = 0
+ cdef int inside_string = 0
+ cdef int escape = 0
+ cdef long start = -1
+ cdef char c
+ cdef char quote = ord('"')
+ cdef char backslash = ord("\\")
+ cdef char open_square = ord("[")
+ cdef char close_square = ord("]")
+ cdef char open_curly = ord("{")
+ cdef char close_curly = ord("}")
+ for i in range(file_length):
+ c = raw[i]
+ if escape:
+ escape = False
+ continue
+ if c == backslash:
+ escape = True
+ continue
+ if c == quote:
+ inside_string = not inside_string
+ continue
+ if inside_string:
+ continue
+ if c == open_square:
+ square_depth += 1
+ elif c == close_square:
+ square_depth -= 1
+ elif c == open_curly:
+ if square_depth == 1 and curly_depth == 0:
+ start = i
+ curly_depth += 1
+ elif c == close_curly:
+ curly_depth -= 1
+ if square_depth == 1 and curly_depth == 0:
+ substr = utf8_str[start : i + 1].decode("utf8")
+ yield srsly.json_loads(substr)
+ start = -1
diff --git a/spacy/gold/iob_utils.py b/spacy/gold/iob_utils.py
new file mode 100644
index 000000000..08751cfd4
--- /dev/null
+++ b/spacy/gold/iob_utils.py
@@ -0,0 +1,209 @@
+import warnings
+from ..errors import Errors, Warnings
+from ..tokens import Span
+
+
+def iob_to_biluo(tags):
+ out = []
+ tags = list(tags)
+ while tags:
+ out.extend(_consume_os(tags))
+ out.extend(_consume_ent(tags))
+ return out
+
+
+def biluo_to_iob(tags):
+ out = []
+ for tag in tags:
+ if tag is None:
+ out.append(tag)
+ else:
+ tag = tag.replace("U-", "B-", 1).replace("L-", "I-", 1)
+ out.append(tag)
+ return out
+
+
+def _consume_os(tags):
+ while tags and tags[0] == "O":
+ yield tags.pop(0)
+
+
+def _consume_ent(tags):
+ if not tags:
+ return []
+ tag = tags.pop(0)
+ target_in = "I" + tag[1:]
+ target_last = "L" + tag[1:]
+ length = 1
+ while tags and tags[0] in {target_in, target_last}:
+ length += 1
+ tags.pop(0)
+ label = tag[2:]
+ if length == 1:
+ if len(label) == 0:
+ raise ValueError(Errors.E177.format(tag=tag))
+ return ["U-" + label]
+ else:
+ start = "B-" + label
+ end = "L-" + label
+ middle = [f"I-{label}" for _ in range(1, length - 1)]
+ return [start] + middle + [end]
+
+
+def biluo_tags_from_doc(doc, missing="O"):
+ return biluo_tags_from_offsets(
+ doc,
+ [(ent.start_char, ent.end_char, ent.label_) for ent in doc.ents],
+ missing=missing,
+ )
+
+
+def biluo_tags_from_offsets(doc, entities, missing="O"):
+ """Encode labelled spans into per-token tags, using the
+ Begin/In/Last/Unit/Out scheme (BILUO).
+
+ doc (Doc): The document that the entity offsets refer to. The output tags
+ will refer to the token boundaries within the document.
+ entities (iterable): A sequence of `(start, end, label)` triples. `start`
+ and `end` should be character-offset integers denoting the slice into
+ the original string.
+ RETURNS (list): A list of unicode strings, describing the tags. Each tag
+ string will be of the form either "", "O" or "{action}-{label}", where
+ action is one of "B", "I", "L", "U". The string "-" is used where the
+ entity offsets don't align with the tokenization in the `Doc` object.
+ The training algorithm will view these as missing values. "O" denotes a
+ non-entity token. "B" denotes the beginning of a multi-token entity,
+ "I" the inside of an entity of three or more tokens, and "L" the end
+ of an entity of two or more tokens. "U" denotes a single-token entity.
+
+ EXAMPLE:
+ >>> text = 'I like London.'
+ >>> entities = [(len('I like '), len('I like London'), 'LOC')]
+ >>> doc = nlp.tokenizer(text)
+ >>> tags = biluo_tags_from_offsets(doc, entities)
+ >>> assert tags == ["O", "O", 'U-LOC', "O"]
+ """
+ # Ensure no overlapping entity labels exist
+ tokens_in_ents = {}
+
+ starts = {token.idx: token.i for token in doc}
+ ends = {token.idx + len(token): token.i for token in doc}
+ biluo = ["-" for _ in doc]
+ # Handle entity cases
+ for start_char, end_char, label in entities:
+ if not label:
+ for s in starts: # account for many-to-one
+ if s >= start_char and s < end_char:
+ biluo[starts[s]] = "O"
+ else:
+ for token_index in range(start_char, end_char):
+ if token_index in tokens_in_ents.keys():
+ raise ValueError(
+ Errors.E103.format(
+ span1=(
+ tokens_in_ents[token_index][0],
+ tokens_in_ents[token_index][1],
+ tokens_in_ents[token_index][2],
+ ),
+ span2=(start_char, end_char, label),
+ )
+ )
+ tokens_in_ents[token_index] = (start_char, end_char, label)
+
+ start_token = starts.get(start_char)
+ end_token = ends.get(end_char)
+ # Only interested if the tokenization is correct
+ if start_token is not None and end_token is not None:
+ if start_token == end_token:
+ biluo[start_token] = f"U-{label}"
+ else:
+ biluo[start_token] = f"B-{label}"
+ for i in range(start_token + 1, end_token):
+ biluo[i] = f"I-{label}"
+ biluo[end_token] = f"L-{label}"
+ # Now distinguish the O cases from ones where we miss the tokenization
+ entity_chars = set()
+ for start_char, end_char, label in entities:
+ for i in range(start_char, end_char):
+ entity_chars.add(i)
+ for token in doc:
+ for i in range(token.idx, token.idx + len(token)):
+ if i in entity_chars:
+ break
+ else:
+ biluo[token.i] = missing
+ if "-" in biluo and missing != "-":
+ ent_str = str(entities)
+ warnings.warn(
+ Warnings.W030.format(
+ text=doc.text[:50] + "..." if len(doc.text) > 50 else doc.text,
+ entities=ent_str[:50] + "..." if len(ent_str) > 50 else ent_str,
+ )
+ )
+ return biluo
+
+
+def spans_from_biluo_tags(doc, tags):
+ """Encode per-token tags following the BILUO scheme into Span object, e.g.
+ to overwrite the doc.ents.
+
+ doc (Doc): The document that the BILUO tags refer to.
+ entities (iterable): A sequence of BILUO tags with each tag describing one
+ token. Each tags string will be of the form of either "", "O" or
+ "{action}-{label}", where action is one of "B", "I", "L", "U".
+ RETURNS (list): A sequence of Span objects.
+ """
+ token_offsets = tags_to_entities(tags)
+ spans = []
+ for label, start_idx, end_idx in token_offsets:
+ span = Span(doc, start_idx, end_idx + 1, label=label)
+ spans.append(span)
+ return spans
+
+
+def offsets_from_biluo_tags(doc, tags):
+ """Encode per-token tags following the BILUO scheme into entity offsets.
+
+ doc (Doc): The document that the BILUO tags refer to.
+ entities (iterable): A sequence of BILUO tags with each tag describing one
+ token. Each tags string will be of the form of either "", "O" or
+ "{action}-{label}", where action is one of "B", "I", "L", "U".
+ RETURNS (list): A sequence of `(start, end, label)` triples. `start` and
+ `end` will be character-offset integers denoting the slice into the
+ original string.
+ """
+ spans = spans_from_biluo_tags(doc, tags)
+ return [(span.start_char, span.end_char, span.label_) for span in spans]
+
+
+def tags_to_entities(tags):
+ """ Note that the end index returned by this function is inclusive.
+ To use it for Span creation, increment the end by 1."""
+ entities = []
+ start = None
+ for i, tag in enumerate(tags):
+ if tag is None:
+ continue
+ if tag.startswith("O"):
+ # TODO: We shouldn't be getting these malformed inputs. Fix this.
+ if start is not None:
+ start = None
+ else:
+ entities.append(("", i, i))
+ continue
+ elif tag == "-":
+ continue
+ elif tag.startswith("I"):
+ if start is None:
+ raise ValueError(Errors.E067.format(tags=tags[: i + 1]))
+ continue
+ if tag.startswith("U"):
+ entities.append((tag[2:], i, i))
+ elif tag.startswith("B"):
+ start = i
+ elif tag.startswith("L"):
+ entities.append((tag[2:], start, i))
+ start = None
+ else:
+ raise ValueError(Errors.E068.format(tag=tag))
+ return entities
diff --git a/spacy/gold/loggers.py b/spacy/gold/loggers.py
new file mode 100644
index 000000000..e071e5827
--- /dev/null
+++ b/spacy/gold/loggers.py
@@ -0,0 +1,103 @@
+from typing import Dict, Any, Tuple, Callable, List
+
+from ..util import registry
+from .. import util
+from ..errors import Errors
+from wasabi import msg
+
+
+@registry.loggers("spacy.ConsoleLogger.v1")
+def console_logger():
+ def setup_printer(
+ nlp: "Language",
+ ) -> Tuple[Callable[[Dict[str, Any]], None], Callable]:
+ score_cols = list(nlp.config["training"]["score_weights"])
+ score_widths = [max(len(col), 6) for col in score_cols]
+ loss_cols = [f"Loss {pipe}" for pipe in nlp.pipe_names]
+ loss_widths = [max(len(col), 8) for col in loss_cols]
+ table_header = ["E", "#"] + loss_cols + score_cols + ["Score"]
+ table_header = [col.upper() for col in table_header]
+ table_widths = [3, 6] + loss_widths + score_widths + [6]
+ table_aligns = ["r" for _ in table_widths]
+ msg.row(table_header, widths=table_widths)
+ msg.row(["-" * width for width in table_widths])
+
+ def log_step(info: Dict[str, Any]):
+ try:
+ losses = [
+ "{0:.2f}".format(float(info["losses"][pipe_name]))
+ for pipe_name in nlp.pipe_names
+ ]
+ except KeyError as e:
+ raise KeyError(
+ Errors.E983.format(
+ dict="scores (losses)",
+ key=str(e),
+ keys=list(info["losses"].keys()),
+ )
+ ) from None
+
+ try:
+ scores = [
+ "{0:.2f}".format(float(info["other_scores"].get(col, 0.0)) * 100)
+ for col in score_cols
+ ]
+ except KeyError as e:
+ raise KeyError(
+ Errors.E983.format(
+ dict="scores (other)",
+ key=str(e),
+ keys=list(info["other_scores"].keys()),
+ )
+ ) from None
+ data = (
+ [info["epoch"], info["step"]]
+ + losses
+ + scores
+ + ["{0:.2f}".format(float(info["score"]))]
+ )
+ msg.row(data, widths=table_widths, aligns=table_aligns)
+
+ def finalize():
+ pass
+
+ return log_step, finalize
+
+ return setup_printer
+
+
+@registry.loggers("spacy.WandbLogger.v1")
+def wandb_logger(project_name: str, remove_config_values: List[str] = []):
+ import wandb
+
+ console = console_logger()
+
+ def setup_logger(
+ nlp: "Language",
+ ) -> Tuple[Callable[[Dict[str, Any]], None], Callable]:
+ config = nlp.config.interpolate()
+ config_dot = util.dict_to_dot(config)
+ for field in remove_config_values:
+ del config_dot[field]
+ config = util.dot_to_dict(config_dot)
+ wandb.init(project=project_name, config=config)
+ console_log_step, console_finalize = console(nlp)
+
+ def log_step(info: Dict[str, Any]):
+ console_log_step(info)
+ score = info["score"]
+ other_scores = info["other_scores"]
+ losses = info["losses"]
+ wandb.log({"score": score})
+ if losses:
+ wandb.log({f"loss_{k}": v for k, v in losses.items()})
+ if isinstance(other_scores, dict):
+ wandb.log(other_scores)
+
+ def finalize():
+ console_finalize()
+ pass
+
+ return log_step, finalize
+
+ return setup_logger
diff --git a/spacy/kb.pxd b/spacy/kb.pxd
index 518ce0f4e..695693666 100644
--- a/spacy/kb.pxd
+++ b/spacy/kb.pxd
@@ -1,15 +1,15 @@
"""Knowledge-base for entity or concept linking."""
from cymem.cymem cimport Pool
from preshed.maps cimport PreshMap
-
from libcpp.vector cimport vector
from libc.stdint cimport int32_t, int64_t
from libc.stdio cimport FILE
from .vocab cimport Vocab
from .typedefs cimport hash_t
-
from .structs cimport KBEntryC, AliasC
+
+
ctypedef vector[KBEntryC] entry_vec
ctypedef vector[AliasC] alias_vec
ctypedef vector[float] float_vec
@@ -140,7 +140,7 @@ cdef class KnowledgeBase:
self._entries.push_back(entry)
self._aliases_table.push_back(alias)
- cpdef load_bulk(self, loc)
+ cpdef from_disk(self, loc)
cpdef set_entities(self, entity_list, freq_list, vector_list)
diff --git a/spacy/kb.pyx b/spacy/kb.pyx
index a187e63d6..b24ed3a20 100644
--- a/spacy/kb.pyx
+++ b/spacy/kb.pyx
@@ -1,6 +1,5 @@
-# cython: infer_types=True
-# cython: profile=True
-# coding: utf8
+# cython: infer_types=True, profile=True
+from typing import Iterator
from cymem.cymem cimport Pool
from preshed.maps cimport PreshMap
from cpython.exc cimport PyErr_SetFromErrno
@@ -8,12 +7,11 @@ from libc.stdio cimport fopen, fclose, fread, fwrite, feof, fseek
from libc.stdint cimport int32_t, int64_t
from libcpp.vector cimport vector
+from pathlib import Path
import warnings
from os import path
-from pathlib import Path
from .typedefs cimport hash_t
-
from .errors import Errors, Warnings
@@ -23,7 +21,7 @@ cdef class Candidate:
algorithm which will disambiguate the various candidates to the correct one.
Each candidate (alias, entity) pair is assigned to a certain prior probability.
- DOCS: https://spacy.io/api/kb/#candidate_init
+ DOCS: https://nightly.spacy.io/api/kb/#candidate_init
"""
def __init__(self, KnowledgeBase kb, entity_hash, entity_freq, entity_vector, alias_hash, prior_prob):
@@ -41,7 +39,7 @@ cdef class Candidate:
@property
def entity_(self):
- """RETURNS (unicode): ID/name of this entity in the KB"""
+ """RETURNS (str): ID/name of this entity in the KB"""
return self.kb.vocab.strings[self.entity_hash]
@property
@@ -51,7 +49,7 @@ cdef class Candidate:
@property
def alias_(self):
- """RETURNS (unicode): ID of the original alias"""
+ """RETURNS (str): ID of the original alias"""
return self.kb.vocab.strings[self.alias_hash]
@property
@@ -67,21 +65,30 @@ cdef class Candidate:
return self.prior_prob
+def get_candidates(KnowledgeBase kb, span) -> Iterator[Candidate]:
+ """
+ Return candidate entities for a given span by using the text of the span as the alias
+ and fetching appropriate entries from the index.
+ This particular function is optimized to work with the built-in KB functionality,
+ but any other custom candidate generation method can be used in combination with the KB as well.
+ """
+ return kb.get_alias_candidates(span.text)
+
+
cdef class KnowledgeBase:
"""A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases,
to support entity linking of named entities to real-world concepts.
- DOCS: https://spacy.io/api/kb
+ DOCS: https://nightly.spacy.io/api/kb
"""
- def __init__(self, Vocab vocab, entity_vector_length=64):
- self.vocab = vocab
+ def __init__(self, Vocab vocab, entity_vector_length):
+ """Create a KnowledgeBase."""
self.mem = Pool()
self.entity_vector_length = entity_vector_length
-
self._entry_index = PreshMap()
self._alias_index = PreshMap()
-
+ self.vocab = vocab
self.vocab.strings.add("")
self._create_empty_vectors(dummy_hash=self.vocab.strings[""])
@@ -261,8 +268,7 @@ cdef class KnowledgeBase:
alias_entry.probs = probs
self._aliases_table[alias_index] = alias_entry
-
- def get_candidates(self, unicode alias):
+ def get_alias_candidates(self, unicode alias) -> Iterator[Candidate]:
"""
Return candidate entities for an alias. Each candidate defines the entity, the original alias,
and the prior probability of that alias resolving to that entity.
@@ -313,7 +319,7 @@ cdef class KnowledgeBase:
return 0.0
- def dump(self, loc):
+ def to_disk(self, loc):
cdef Writer writer = Writer(loc)
writer.write_header(self.get_size_entities(), self.entity_vector_length)
@@ -353,7 +359,7 @@ cdef class KnowledgeBase:
writer.close()
- cpdef load_bulk(self, loc):
+ cpdef from_disk(self, loc):
cdef hash_t entity_hash
cdef hash_t alias_hash
cdef int64_t entry_index
@@ -448,7 +454,8 @@ cdef class Writer:
if isinstance(loc, Path):
loc = bytes(loc)
if path.exists(loc):
- assert not path.isdir(loc), "%s is directory." % loc
+ if path.isdir(loc):
+ raise ValueError(Errors.E928.format(loc=loc))
cdef bytes bytes_loc = loc.encode('utf8') if type(loc) == unicode else loc
self._fp = fopen(bytes_loc, 'wb')
if not self._fp:
@@ -492,8 +499,10 @@ cdef class Reader:
def __init__(self, object loc):
if isinstance(loc, Path):
loc = bytes(loc)
- assert path.exists(loc)
- assert not path.isdir(loc)
+ if not path.exists(loc):
+ raise ValueError(Errors.E929.format(loc=loc))
+ if path.isdir(loc):
+ raise ValueError(Errors.E928.format(loc=loc))
cdef bytes bytes_loc = loc.encode('utf8') if type(loc) == unicode else loc
self._fp = fopen(bytes_loc, 'rb')
if not self._fp:
diff --git a/spacy/lang/af/__init__.py b/spacy/lang/af/__init__.py
index 90ea324f0..91917daee 100644
--- a/spacy/lang/af/__init__.py
+++ b/spacy/lang/af/__init__.py
@@ -1,14 +1,8 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
from ...language import Language
-from ...attrs import LANG
class AfrikaansDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "af"
stop_words = STOP_WORDS
diff --git a/spacy/lang/af/stop_words.py b/spacy/lang/af/stop_words.py
index 2b3bcc019..4b5a04a5e 100644
--- a/spacy/lang/af/stop_words.py
+++ b/spacy/lang/af/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
# Source: https://github.com/stopwords-iso/stopwords-af
STOP_WORDS = set(
diff --git a/spacy/lang/ar/__init__.py b/spacy/lang/ar/__init__.py
index c120703f6..6abb65efb 100644
--- a/spacy/lang/ar/__init__.py
+++ b/spacy/lang/ar/__init__.py
@@ -1,34 +1,21 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
from .punctuation import TOKENIZER_SUFFIXES
-
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ..norm_exceptions import BASE_NORMS
from ...language import Language
-from ...attrs import LANG, NORM
-from ...util import update_exc, add_lookups
class ArabicDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters.update(LEX_ATTRS)
- lex_attr_getters[LANG] = lambda text: "ar"
- lex_attr_getters[NORM] = add_lookups(
- Language.Defaults.lex_attr_getters[NORM], BASE_NORMS
- )
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
- stop_words = STOP_WORDS
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
suffixes = TOKENIZER_SUFFIXES
+ stop_words = STOP_WORDS
+ lex_attr_getters = LEX_ATTRS
writing_system = {"direction": "rtl", "has_case": False, "has_letters": True}
class Arabic(Language):
- lang = "ar"
Defaults = ArabicDefaults
+ lang = "ar"
__all__ = ["Arabic"]
diff --git a/spacy/lang/ar/examples.py b/spacy/lang/ar/examples.py
index 2a10f4fcc..a51bb9ded 100644
--- a/spacy/lang/ar/examples.py
+++ b/spacy/lang/ar/examples.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/ar/lex_attrs.py b/spacy/lang/ar/lex_attrs.py
index 19e7aef8a..54ad7a8c3 100644
--- a/spacy/lang/ar/lex_attrs.py
+++ b/spacy/lang/ar/lex_attrs.py
@@ -1,5 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
from ...attrs import LIKE_NUM
_num_words = set(
diff --git a/spacy/lang/ar/punctuation.py b/spacy/lang/ar/punctuation.py
index 6625c5475..f30204c02 100644
--- a/spacy/lang/ar/punctuation.py
+++ b/spacy/lang/ar/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, CURRENCY
from ..char_classes import UNITS, ALPHA_UPPER
diff --git a/spacy/lang/ar/stop_words.py b/spacy/lang/ar/stop_words.py
index de2fc7443..f4da54dda 100644
--- a/spacy/lang/ar/stop_words.py
+++ b/spacy/lang/ar/stop_words.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
STOP_WORDS = set(
"""
من
diff --git a/spacy/lang/ar/tokenizer_exceptions.py b/spacy/lang/ar/tokenizer_exceptions.py
index 030daecd5..7c385bef8 100644
--- a/spacy/lang/ar/tokenizer_exceptions.py
+++ b/spacy/lang/ar/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import ORTH, LEMMA
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import ORTH, NORM
+from ...util import update_exc
_exc = {}
@@ -9,41 +8,41 @@ _exc = {}
# Time
for exc_data in [
- {LEMMA: "قبل الميلاد", ORTH: "ق.م"},
- {LEMMA: "بعد الميلاد", ORTH: "ب. م"},
- {LEMMA: "ميلادي", ORTH: ".م"},
- {LEMMA: "هجري", ORTH: ".هـ"},
- {LEMMA: "توفي", ORTH: ".ت"},
+ {NORM: "قبل الميلاد", ORTH: "ق.م"},
+ {NORM: "بعد الميلاد", ORTH: "ب. م"},
+ {NORM: "ميلادي", ORTH: ".م"},
+ {NORM: "هجري", ORTH: ".هـ"},
+ {NORM: "توفي", ORTH: ".ت"},
]:
_exc[exc_data[ORTH]] = [exc_data]
# Scientific abv.
for exc_data in [
- {LEMMA: "صلى الله عليه وسلم", ORTH: "صلعم"},
- {LEMMA: "الشارح", ORTH: "الشـ"},
- {LEMMA: "الظاهر", ORTH: "الظـ"},
- {LEMMA: "أيضًا", ORTH: "أيضـ"},
- {LEMMA: "إلى آخره", ORTH: "إلخ"},
- {LEMMA: "انتهى", ORTH: "اهـ"},
- {LEMMA: "حدّثنا", ORTH: "ثنا"},
- {LEMMA: "حدثني", ORTH: "ثنى"},
- {LEMMA: "أنبأنا", ORTH: "أنا"},
- {LEMMA: "أخبرنا", ORTH: "نا"},
- {LEMMA: "مصدر سابق", ORTH: "م. س"},
- {LEMMA: "مصدر نفسه", ORTH: "م. ن"},
+ {NORM: "صلى الله عليه وسلم", ORTH: "صلعم"},
+ {NORM: "الشارح", ORTH: "الشـ"},
+ {NORM: "الظاهر", ORTH: "الظـ"},
+ {NORM: "أيضًا", ORTH: "أيضـ"},
+ {NORM: "إلى آخره", ORTH: "إلخ"},
+ {NORM: "انتهى", ORTH: "اهـ"},
+ {NORM: "حدّثنا", ORTH: "ثنا"},
+ {NORM: "حدثني", ORTH: "ثنى"},
+ {NORM: "أنبأنا", ORTH: "أنا"},
+ {NORM: "أخبرنا", ORTH: "نا"},
+ {NORM: "مصدر سابق", ORTH: "م. س"},
+ {NORM: "مصدر نفسه", ORTH: "م. ن"},
]:
_exc[exc_data[ORTH]] = [exc_data]
# Other abv.
for exc_data in [
- {LEMMA: "دكتور", ORTH: "د."},
- {LEMMA: "أستاذ دكتور", ORTH: "أ.د"},
- {LEMMA: "أستاذ", ORTH: "أ."},
- {LEMMA: "بروفيسور", ORTH: "ب."},
+ {NORM: "دكتور", ORTH: "د."},
+ {NORM: "أستاذ دكتور", ORTH: "أ.د"},
+ {NORM: "أستاذ", ORTH: "أ."},
+ {NORM: "بروفيسور", ORTH: "ب."},
]:
_exc[exc_data[ORTH]] = [exc_data]
-for exc_data in [{LEMMA: "تلفون", ORTH: "ت."}, {LEMMA: "صندوق بريد", ORTH: "ص.ب"}]:
+for exc_data in [{NORM: "تلفون", ORTH: "ت."}, {NORM: "صندوق بريد", ORTH: "ص.ب"}]:
_exc[exc_data[ORTH]] = [exc_data]
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
diff --git a/spacy/lang/bg/__init__.py b/spacy/lang/bg/__init__.py
index 9b4c647e3..a30f49ce7 100644
--- a/spacy/lang/bg/__init__.py
+++ b/spacy/lang/bg/__init__.py
@@ -1,14 +1,8 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
from ...language import Language
-from ...attrs import LANG
class BulgarianDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "bg"
stop_words = STOP_WORDS
diff --git a/spacy/lang/bg/examples.py b/spacy/lang/bg/examples.py
index b08b8926d..a6d40da1a 100644
--- a/spacy/lang/bg/examples.py
+++ b/spacy/lang/bg/examples.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/bg/stop_words.py b/spacy/lang/bg/stop_words.py
index e7c65cbc2..aae7692a2 100644
--- a/spacy/lang/bg/stop_words.py
+++ b/spacy/lang/bg/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
# Source: https://github.com/Alir3z4/stop-words
STOP_WORDS = set(
diff --git a/spacy/lang/bn/__init__.py b/spacy/lang/bn/__init__.py
index 7da50ff2d..6c1d66cba 100644
--- a/spacy/lang/bn/__init__.py
+++ b/spacy/lang/bn/__init__.py
@@ -1,24 +1,15 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
from .stop_words import STOP_WORDS
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...language import Language
-from ...attrs import LANG
-from ...util import update_exc
class BengaliDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "bn"
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
- stop_words = STOP_WORDS
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
prefixes = TOKENIZER_PREFIXES
suffixes = TOKENIZER_SUFFIXES
infixes = TOKENIZER_INFIXES
+ stop_words = STOP_WORDS
class Bengali(Language):
diff --git a/spacy/lang/bn/examples.py b/spacy/lang/bn/examples.py
index 2d5bdb238..c3be4c556 100644
--- a/spacy/lang/bn/examples.py
+++ b/spacy/lang/bn/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/bn/morph_rules.py b/spacy/lang/bn/morph_rules.py
deleted file mode 100644
index 21a76c7e6..000000000
--- a/spacy/lang/bn/morph_rules.py
+++ /dev/null
@@ -1,266 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import LEMMA, PRON_LEMMA
-
-
-MORPH_RULES = {
- "PRP": {
- "ঐ": {LEMMA: PRON_LEMMA, "PronType": "Dem"},
- "ওই": {LEMMA: PRON_LEMMA, "PronType": "Dem"},
- "আমাকে": {
- LEMMA: PRON_LEMMA,
- "Number": "Sing",
- "Person": "One",
- "PronType": "Prs",
- "Case": "Acc",
- },
- "কি": {
- LEMMA: PRON_LEMMA,
- "Number": "Sing",
- "Gender": "Neut",
- "PronType": "Int",
- "Case": "Acc",
- },
- "সে": {
- LEMMA: PRON_LEMMA,
- "Number": "Sing",
- "Person": "Three",
- "PronType": "Prs",
- "Case": "Nom",
- },
- "কিসে": {
- LEMMA: PRON_LEMMA,
- "Number": "Sing",
- "Gender": "Neut",
- "PronType": "Int",
- "Case": "Acc",
- },
- "তাকে": {
- LEMMA: PRON_LEMMA,
- "Number": "Sing",
- "Person": "Three",
- "PronType": "Prs",
- "Case": "Acc",
- },
- "স্বয়ং": {LEMMA: PRON_LEMMA, "Reflex": "Yes", "PronType": "Ref"},
- "কোনগুলো": {
- LEMMA: PRON_LEMMA,
- "Number": "Plur",
- "Gender": "Neut",
- "PronType": "Int",
- "Case": "Acc",
- },
- "তুমি": {
- LEMMA: PRON_LEMMA,
- "Number": "Sing",
- "Person": "Two",
- "PronType": "Prs",
- "Case": "Nom",
- },
- "তুই": {
- LEMMA: PRON_LEMMA,
- "Number": "Sing",
- "Person": "Two",
- "PronType": "Prs",
- "Case": "Nom",
- },
- "তাদেরকে": {
- LEMMA: PRON_LEMMA,
- "Number": "Plur",
- "Person": "Three",
- "PronType": "Prs",
- "Case": "Acc",
- },
- "আমরা": {
- LEMMA: PRON_LEMMA,
- "Number": "Plur",
- "Person": "One ",
- "PronType": "Prs",
- "Case": "Nom",
- },
- "যিনি": {LEMMA: PRON_LEMMA, "Number": "Sing", "PronType": "Rel", "Case": "Nom"},
- "আমাদেরকে": {
- LEMMA: PRON_LEMMA,
- "Number": "Plur",
- "Person": "One",
- "PronType": "Prs",
- "Case": "Acc",
- },
- "কোন": {LEMMA: PRON_LEMMA, "Number": "Sing", "PronType": "Int", "Case": "Acc"},
- "কারা": {LEMMA: PRON_LEMMA, "Number": "Plur", "PronType": "Int", "Case": "Acc"},
- "তোমাকে": {
- LEMMA: PRON_LEMMA,
- "Number": "Sing",
- "Person": "Two",
- "PronType": "Prs",
- "Case": "Acc",
- },
- "তোকে": {
- LEMMA: PRON_LEMMA,
- "Number": "Sing",
- "Person": "Two",
- "PronType": "Prs",
- "Case": "Acc",
- },
- "খোদ": {LEMMA: PRON_LEMMA, "Reflex": "Yes", "PronType": "Ref"},
- "কে": {LEMMA: PRON_LEMMA, "Number": "Sing", "PronType": "Int", "Case": "Acc"},
- "যারা": {LEMMA: PRON_LEMMA, "Number": "Plur", "PronType": "Rel", "Case": "Nom"},
- "যে": {LEMMA: PRON_LEMMA, "Number": "Sing", "PronType": "Rel", "Case": "Nom"},
- "তোমরা": {
- LEMMA: PRON_LEMMA,
- "Number": "Plur",
- "Person": "Two",
- "PronType": "Prs",
- "Case": "Nom",
- },
- "তোরা": {
- LEMMA: PRON_LEMMA,
- "Number": "Plur",
- "Person": "Two",
- "PronType": "Prs",
- "Case": "Nom",
- },
- "তোমাদেরকে": {
- LEMMA: PRON_LEMMA,
- "Number": "Plur",
- "Person": "Two",
- "PronType": "Prs",
- "Case": "Acc",
- },
- "তোদেরকে": {
- LEMMA: PRON_LEMMA,
- "Number": "Plur",
- "Person": "Two",
- "PronType": "Prs",
- "Case": "Acc",
- },
- "আপন": {LEMMA: PRON_LEMMA, "Reflex": "Yes", "PronType": "Ref"},
- "এ": {LEMMA: PRON_LEMMA, "PronType": "Dem"},
- "নিজ": {LEMMA: PRON_LEMMA, "Reflex": "Yes", "PronType": "Ref"},
- "কার": {LEMMA: PRON_LEMMA, "Number": "Sing", "PronType": "Int", "Case": "Acc"},
- "যা": {
- LEMMA: PRON_LEMMA,
- "Number": "Sing",
- "Gender": "Neut",
- "PronType": "Rel",
- "Case": "Nom",
- },
- "তারা": {
- LEMMA: PRON_LEMMA,
- "Number": "Plur",
- "Person": "Three",
- "PronType": "Prs",
- "Case": "Nom",
- },
- "আমি": {
- LEMMA: PRON_LEMMA,
- "Number": "Sing",
- "Person": "One",
- "PronType": "Prs",
- "Case": "Nom",
- },
- },
- "PRP$": {
- "আমার": {
- LEMMA: PRON_LEMMA,
- "Number": "Sing",
- "Person": "One",
- "PronType": "Prs",
- "Poss": "Yes",
- "Case": "Nom",
- },
- "মোর": {
- LEMMA: PRON_LEMMA,
- "Number": "Sing",
- "Person": "One",
- "PronType": "Prs",
- "Poss": "Yes",
- "Case": "Nom",
- },
- "মোদের": {
- LEMMA: PRON_LEMMA,
- "Number": "Plur",
- "Person": "One",
- "PronType": "Prs",
- "Poss": "Yes",
- "Case": "Nom",
- },
- "তার": {
- LEMMA: PRON_LEMMA,
- "Number": "Sing",
- "Person": "Three",
- "PronType": "Prs",
- "Poss": "Yes",
- "Case": "Nom",
- },
- "তাহাার": {
- LEMMA: PRON_LEMMA,
- "Number": "Sing",
- "Person": "Three",
- "PronType": "Prs",
- "Poss": "Yes",
- "Case": "Nom",
- },
- "তোমাদের": {
- LEMMA: PRON_LEMMA,
- "Number": "Plur",
- "Person": "Two",
- "PronType": "Prs",
- "Poss": "Yes",
- "Case": "Nom",
- },
- "আমাদের": {
- LEMMA: PRON_LEMMA,
- "Number": "Plur",
- "Person": "One",
- "PronType": "Prs",
- "Poss": "Yes",
- "Case": "Nom",
- },
- "তোমার": {
- LEMMA: PRON_LEMMA,
- "Number": "Sing",
- "Person": "Two",
- "PronType": "Prs",
- "Poss": "Yes",
- "Case": "Nom",
- },
- "তোর": {
- LEMMA: PRON_LEMMA,
- "Number": "Sing",
- "Person": "Two",
- "PronType": "Prs",
- "Poss": "Yes",
- "Case": "Nom",
- },
- "তাদের": {
- LEMMA: PRON_LEMMA,
- "Number": "Plur",
- "Person": "Three",
- "PronType": "Prs",
- "Poss": "Yes",
- "Case": "Nom",
- },
- "কাদের": {
- LEMMA: PRON_LEMMA,
- "Number": "Plur",
- "PronType": "Int",
- "Case": "Acc",
- },
- "তোদের": {
- LEMMA: PRON_LEMMA,
- "Number": "Plur",
- "Person": "Two",
- "PronType": "Prs",
- "Poss": "Yes",
- "Case": "Nom",
- },
- "যাদের": {
- LEMMA: PRON_LEMMA,
- "Number": "Plur",
- "PronType": "Int",
- "Case": "Acc",
- },
- },
-}
diff --git a/spacy/lang/bn/punctuation.py b/spacy/lang/bn/punctuation.py
index f624b4ba4..becfe8d2a 100644
--- a/spacy/lang/bn/punctuation.py
+++ b/spacy/lang/bn/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, LIST_ICONS
from ..char_classes import ALPHA_LOWER, ALPHA, HYPHENS, CONCAT_QUOTES, UNITS
diff --git a/spacy/lang/bn/stop_words.py b/spacy/lang/bn/stop_words.py
index 6c9967df8..bf38e3254 100644
--- a/spacy/lang/bn/stop_words.py
+++ b/spacy/lang/bn/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
STOP_WORDS = set(
"""
অতএব অথচ অথবা অনুযায়ী অনেক অনেকে অনেকেই অন্তত অবধি অবশ্য অর্থাৎ অন্য অনুযায়ী অর্ধভাগে
diff --git a/spacy/lang/bn/tokenizer_exceptions.py b/spacy/lang/bn/tokenizer_exceptions.py
index 32acb1730..e666522b8 100644
--- a/spacy/lang/bn/tokenizer_exceptions.py
+++ b/spacy/lang/bn/tokenizer_exceptions.py
@@ -1,27 +1,26 @@
-# coding=utf-8
-from __future__ import unicode_literals
-
-from ...symbols import ORTH, LEMMA
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import ORTH, NORM
+from ...util import update_exc
_exc = {}
for exc_data in [
- {ORTH: "ডঃ", LEMMA: "ডক্টর"},
- {ORTH: "ডাঃ", LEMMA: "ডাক্তার"},
- {ORTH: "ড.", LEMMA: "ডক্টর"},
- {ORTH: "ডা.", LEMMA: "ডাক্তার"},
- {ORTH: "মোঃ", LEMMA: "মোহাম্মদ"},
- {ORTH: "মো.", LEMMA: "মোহাম্মদ"},
- {ORTH: "সে.", LEMMA: "সেলসিয়াস"},
- {ORTH: "কি.মি.", LEMMA: "কিলোমিটার"},
- {ORTH: "কি.মি", LEMMA: "কিলোমিটার"},
- {ORTH: "সে.মি.", LEMMA: "সেন্টিমিটার"},
- {ORTH: "সে.মি", LEMMA: "সেন্টিমিটার"},
- {ORTH: "মি.লি.", LEMMA: "মিলিলিটার"},
+ {ORTH: "ডঃ", NORM: "ডক্টর"},
+ {ORTH: "ডাঃ", NORM: "ডাক্তার"},
+ {ORTH: "ড.", NORM: "ডক্টর"},
+ {ORTH: "ডা.", NORM: "ডাক্তার"},
+ {ORTH: "মোঃ", NORM: "মোহাম্মদ"},
+ {ORTH: "মো.", NORM: "মোহাম্মদ"},
+ {ORTH: "সে.", NORM: "সেলসিয়াস"},
+ {ORTH: "কি.মি.", NORM: "কিলোমিটার"},
+ {ORTH: "কি.মি", NORM: "কিলোমিটার"},
+ {ORTH: "সে.মি.", NORM: "সেন্টিমিটার"},
+ {ORTH: "সে.মি", NORM: "সেন্টিমিটার"},
+ {ORTH: "মি.লি.", NORM: "মিলিলিটার"},
]:
_exc[exc_data[ORTH]] = [exc_data]
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
diff --git a/spacy/lang/ca/__init__.py b/spacy/lang/ca/__init__.py
index 6d4c00a6b..970b23c1e 100644
--- a/spacy/lang/ca/__init__.py
+++ b/spacy/lang/ca/__init__.py
@@ -1,29 +1,15 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
+from .punctuation import TOKENIZER_INFIXES
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ..norm_exceptions import BASE_NORMS
from ...language import Language
-from ...attrs import LANG, NORM
-from ...util import update_exc, add_lookups
-
-from .punctuation import TOKENIZER_INFIXES
class CatalanDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "ca"
- lex_attr_getters[NORM] = add_lookups(
- Language.Defaults.lex_attr_getters[NORM], BASE_NORMS
- )
- lex_attr_getters.update(LEX_ATTRS)
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
- stop_words = STOP_WORDS
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
infixes = TOKENIZER_INFIXES
+ stop_words = STOP_WORDS
+ lex_attr_getters = LEX_ATTRS
class Catalan(Language):
diff --git a/spacy/lang/ca/examples.py b/spacy/lang/ca/examples.py
index 3020ee707..ae6aa3e24 100644
--- a/spacy/lang/ca/examples.py
+++ b/spacy/lang/ca/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/ca/lex_attrs.py b/spacy/lang/ca/lex_attrs.py
index 6314efa92..be8b7a6ea 100644
--- a/spacy/lang/ca/lex_attrs.py
+++ b/spacy/lang/ca/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ...attrs import LIKE_NUM
diff --git a/spacy/lang/ca/punctuation.py b/spacy/lang/ca/punctuation.py
index 4439376c8..d50b75589 100644
--- a/spacy/lang/ca/punctuation.py
+++ b/spacy/lang/ca/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..punctuation import TOKENIZER_INFIXES
from ..char_classes import ALPHA
diff --git a/spacy/lang/ca/stop_words.py b/spacy/lang/ca/stop_words.py
index a803db2a5..1a87b2f9d 100644
--- a/spacy/lang/ca/stop_words.py
+++ b/spacy/lang/ca/stop_words.py
@@ -1,7 +1,3 @@
-# encoding: utf8
-from __future__ import unicode_literals
-
-
STOP_WORDS = set(
"""
a abans ací ah així això al aleshores algun alguna algunes alguns alhora allà allí allò
diff --git a/spacy/lang/ca/tag_map.py b/spacy/lang/ca/tag_map.py
deleted file mode 100644
index 472e772ef..000000000
--- a/spacy/lang/ca/tag_map.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ..symbols import POS, ADV, NOUN, ADP, PRON, SCONJ, PROPN, DET, SYM, INTJ
-from ..symbols import PUNCT, NUM, AUX, X, CONJ, ADJ, VERB, PART, SPACE, CCONJ
-
-
-TAG_MAP = {
- "ADV": {POS: ADV},
- "NOUN": {POS: NOUN},
- "ADP": {POS: ADP},
- "PRON": {POS: PRON},
- "SCONJ": {POS: SCONJ},
- "PROPN": {POS: PROPN},
- "DET": {POS: DET},
- "SYM": {POS: SYM},
- "INTJ": {POS: INTJ},
- "PUNCT": {POS: PUNCT},
- "NUM": {POS: NUM},
- "AUX": {POS: AUX},
- "X": {POS: X},
- "CONJ": {POS: CONJ},
- "CCONJ": {POS: CCONJ},
- "ADJ": {POS: ADJ},
- "VERB": {POS: VERB},
- "PART": {POS: PART},
- "SP": {POS: SPACE},
-}
diff --git a/spacy/lang/ca/tokenizer_exceptions.py b/spacy/lang/ca/tokenizer_exceptions.py
index d95e5e626..b465e97ba 100644
--- a/spacy/lang/ca/tokenizer_exceptions.py
+++ b/spacy/lang/ca/tokenizer_exceptions.py
@@ -1,41 +1,40 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import ORTH, LEMMA
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import ORTH, NORM
+from ...util import update_exc
_exc = {}
for exc_data in [
- {ORTH: "aprox.", LEMMA: "aproximadament"},
- {ORTH: "pàg.", LEMMA: "pàgina"},
- {ORTH: "p.ex.", LEMMA: "per exemple"},
- {ORTH: "gen.", LEMMA: "gener"},
- {ORTH: "feb.", LEMMA: "febrer"},
- {ORTH: "abr.", LEMMA: "abril"},
- {ORTH: "jul.", LEMMA: "juliol"},
- {ORTH: "set.", LEMMA: "setembre"},
- {ORTH: "oct.", LEMMA: "octubre"},
- {ORTH: "nov.", LEMMA: "novembre"},
- {ORTH: "dec.", LEMMA: "desembre"},
- {ORTH: "Dr.", LEMMA: "doctor"},
- {ORTH: "Sr.", LEMMA: "senyor"},
- {ORTH: "Sra.", LEMMA: "senyora"},
- {ORTH: "Srta.", LEMMA: "senyoreta"},
- {ORTH: "núm", LEMMA: "número"},
- {ORTH: "St.", LEMMA: "sant"},
- {ORTH: "Sta.", LEMMA: "santa"},
+ {ORTH: "aprox.", NORM: "aproximadament"},
+ {ORTH: "pàg.", NORM: "pàgina"},
+ {ORTH: "p.ex.", NORM: "per exemple"},
+ {ORTH: "gen.", NORM: "gener"},
+ {ORTH: "feb.", NORM: "febrer"},
+ {ORTH: "abr.", NORM: "abril"},
+ {ORTH: "jul.", NORM: "juliol"},
+ {ORTH: "set.", NORM: "setembre"},
+ {ORTH: "oct.", NORM: "octubre"},
+ {ORTH: "nov.", NORM: "novembre"},
+ {ORTH: "dec.", NORM: "desembre"},
+ {ORTH: "Dr.", NORM: "doctor"},
+ {ORTH: "Sr.", NORM: "senyor"},
+ {ORTH: "Sra.", NORM: "senyora"},
+ {ORTH: "Srta.", NORM: "senyoreta"},
+ {ORTH: "núm", NORM: "número"},
+ {ORTH: "St.", NORM: "sant"},
+ {ORTH: "Sta.", NORM: "santa"},
]:
_exc[exc_data[ORTH]] = [exc_data]
# Times
-_exc["12m."] = [{ORTH: "12"}, {ORTH: "m.", LEMMA: "p.m."}]
+_exc["12m."] = [{ORTH: "12"}, {ORTH: "m.", NORM: "p.m."}]
for h in range(1, 12 + 1):
for period in ["a.m.", "am"]:
- _exc["%d%s" % (h, period)] = [{ORTH: "%d" % h}, {ORTH: period, LEMMA: "a.m."}]
+ _exc[f"{h}{period}"] = [{ORTH: f"{h}"}, {ORTH: period, NORM: "a.m."}]
for period in ["p.m.", "pm"]:
- _exc["%d%s" % (h, period)] = [{ORTH: "%d" % h}, {ORTH: period, LEMMA: "p.m."}]
+ _exc[f"{h}{period}"] = [{ORTH: f"{h}"}, {ORTH: period, NORM: "p.m."}]
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
diff --git a/spacy/lang/char_classes.py b/spacy/lang/char_classes.py
index bd0f7e437..b8094319f 100644
--- a/spacy/lang/char_classes.py
+++ b/spacy/lang/char_classes.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
split_chars = lambda char: list(char.strip().split(" "))
merge_chars = lambda char: char.strip().replace(" ", "|")
group_chars = lambda char: char.strip().replace(" ", "")
diff --git a/spacy/lang/cs/__init__.py b/spacy/lang/cs/__init__.py
index baaaa162b..0c35e2288 100644
--- a/spacy/lang/cs/__init__.py
+++ b/spacy/lang/cs/__init__.py
@@ -1,17 +1,11 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
-from ...language import Language
-from ...attrs import LANG
from .lex_attrs import LEX_ATTRS
+from ...language import Language
class CzechDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters.update(LEX_ATTRS)
- lex_attr_getters[LANG] = lambda text: "cs"
stop_words = STOP_WORDS
+ lex_attr_getters = LEX_ATTRS
class Czech(Language):
diff --git a/spacy/lang/cs/stop_words.py b/spacy/lang/cs/stop_words.py
index 9277772fb..f61f424f6 100644
--- a/spacy/lang/cs/stop_words.py
+++ b/spacy/lang/cs/stop_words.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
# Source: https://github.com/Alir3z4/stop-words
# Source: https://github.com/stopwords-iso/stopwords-cs/blob/master/stopwords-cs.txt
diff --git a/spacy/lang/da/__init__.py b/spacy/lang/da/__init__.py
index 0190656e5..8cac30b26 100644
--- a/spacy/lang/da/__init__.py
+++ b/spacy/lang/da/__init__.py
@@ -1,28 +1,15 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
-from .morph_rules import MORPH_RULES
-from ..tag_map import TAG_MAP
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...language import Language
-from ...attrs import LANG
-from ...util import update_exc
class DanishDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters.update(LEX_ATTRS)
- lex_attr_getters[LANG] = lambda text: "da"
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
- morph_rules = MORPH_RULES
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
infixes = TOKENIZER_INFIXES
suffixes = TOKENIZER_SUFFIXES
- tag_map = TAG_MAP
+ lex_attr_getters = LEX_ATTRS
stop_words = STOP_WORDS
diff --git a/spacy/lang/da/examples.py b/spacy/lang/da/examples.py
index 525c6519c..efa1a7c0e 100644
--- a/spacy/lang/da/examples.py
+++ b/spacy/lang/da/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/da/lex_attrs.py b/spacy/lang/da/lex_attrs.py
index 9fefc1eba..403af686c 100644
--- a/spacy/lang/da/lex_attrs.py
+++ b/spacy/lang/da/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ...attrs import LIKE_NUM
diff --git a/spacy/lang/da/morph_rules.py b/spacy/lang/da/morph_rules.py
deleted file mode 100644
index 7ffe2ac6f..000000000
--- a/spacy/lang/da/morph_rules.py
+++ /dev/null
@@ -1,311 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import LEMMA, PRON_LEMMA
-
-# Source: Danish Universal Dependencies and http://fjern-uv.dk/pronom.php
-
-# Note: The Danish Universal Dependencies specify Case=Acc for all instances
-# of "den"/"det" even when the case is in fact "Nom". In the rules below, Case
-# is left unspecified for "den" and "det".
-
-MORPH_RULES = {
- "PRON": {
- "jeg": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "One",
- "Number": "Sing",
- "Case": "Nom",
- "Gender": "Com",
- }, # Case=Nom|Gender=Com|Number=Sing|Person=1|PronType=Prs
- "mig": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "One",
- "Number": "Sing",
- "Case": "Acc",
- "Gender": "Com",
- }, # Case=Acc|Gender=Com|Number=Sing|Person=1|PronType=Prs
- "min": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "One",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Com",
- }, # Gender=Com|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs
- "mit": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "One",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Neut",
- }, # Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs
- "vor": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "One",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Com",
- }, # Gender=Com|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs|Style=Form
- "vort": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "One",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Neut",
- }, # Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs|Style=Form
- "du": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Two",
- "Number": "Sing",
- "Case": "Nom",
- "Gender": "Com",
- }, # Case=Nom|Gender=Com|Number=Sing|Person=2|PronType=Prs
- "dig": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Two",
- "Number": "Sing",
- "Case": "Acc",
- "Gender": "Com",
- }, # Case=Acc|Gender=Com|Number=Sing|Person=2|PronType=Prs
- "din": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Two",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Com",
- }, # Gender=Com|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs
- "dit": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Two",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Neut",
- }, # Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs
- "han": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Case": "Nom",
- "Gender": "Com",
- }, # Case=Nom|Gender=Com|Number=Sing|Person=3|PronType=Prs
- "hun": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Case": "Nom",
- "Gender": "Com",
- }, # Case=Nom|Gender=Com|Number=Sing|Person=3|PronType=Prs
- "den": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Com",
- }, # Case=Acc|Gender=Com|Number=Sing|Person=3|PronType=Prs, See note above.
- "det": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Neut",
- }, # Case=Acc|Gender=Neut|Number=Sing|Person=3|PronType=Prs See note above.
- "ham": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Case": "Acc",
- "Gender": "Com",
- }, # Case=Acc|Gender=Com|Number=Sing|Person=3|PronType=Prs
- "hende": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Case": "Acc",
- "Gender": "Com",
- }, # Case=Acc|Gender=Com|Number=Sing|Person=3|PronType=Prs
- "sin": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Com",
- "Reflex": "Yes",
- }, # Gender=Com|Number=Sing|Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs|Reflex=Yes
- "sit": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Neut",
- "Reflex": "Yes",
- }, # Gender=Neut|Number=Sing|Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs|Reflex=Yes
- "vi": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "One",
- "Number": "Plur",
- "Case": "Nom",
- "Gender": "Com",
- }, # Case=Nom|Gender=Com|Number=Plur|Person=1|PronType=Prs
- "os": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "One",
- "Number": "Plur",
- "Case": "Acc",
- "Gender": "Com",
- }, # Case=Acc|Gender=Com|Number=Plur|Person=1|PronType=Prs
- "mine": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "One",
- "Number": "Plur",
- "Poss": "Yes",
- }, # Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs
- "vore": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "One",
- "Number": "Plur",
- "Poss": "Yes",
- }, # Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs|Style=Form
- "I": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Two",
- "Number": "Plur",
- "Case": "Nom",
- "Gender": "Com",
- }, # Case=Nom|Gender=Com|Number=Plur|Person=2|PronType=Prs
- "jer": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Two",
- "Number": "Plur",
- "Case": "Acc",
- "Gender": "Com",
- }, # Case=Acc|Gender=Com|Number=Plur|Person=2|PronType=Prs
- "dine": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Two",
- "Number": "Plur",
- "Poss": "Yes",
- }, # Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs
- "de": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Plur",
- "Case": "Nom",
- }, # Case=Nom|Number=Plur|Person=3|PronType=Prs
- "dem": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Plur",
- "Case": "Acc",
- }, # Case=Acc|Number=Plur|Person=3|PronType=Prs
- "sine": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Plur",
- "Poss": "Yes",
- "Reflex": "Yes",
- }, # Number=Plur|Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs|Reflex=Yes
- "vores": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "One",
- "Poss": "Yes",
- }, # Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs
- "De": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Two",
- "Case": "Nom",
- "Gender": "Com",
- }, # Case=Nom|Gender=Com|Person=2|Polite=Form|PronType=Prs
- "Dem": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Two",
- "Case": "Acc",
- "Gender": "Com",
- }, # Case=Acc|Gender=Com|Person=2|Polite=Form|PronType=Prs
- "Deres": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Two",
- "Poss": "Yes",
- }, # Person=2|Polite=Form|Poss=Yes|PronType=Prs
- "jeres": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Two",
- "Poss": "Yes",
- }, # Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs
- "sig": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Case": "Acc",
- "Reflex": "Yes",
- }, # Case=Acc|Person=3|PronType=Prs|Reflex=Yes
- "hans": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Poss": "Yes",
- }, # Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs
- "hendes": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Poss": "Yes",
- }, # Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs
- "dens": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Poss": "Yes",
- }, # Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs
- "dets": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Poss": "Yes",
- }, # Number[psor]=Sing|Person=3|Poss=Yes|PronType=Prs
- "deres": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Poss": "Yes",
- }, # Number[psor]=Plur|Person=3|Poss=Yes|PronType=Prs
- },
- "VERB": {
- "er": {LEMMA: "være", "VerbForm": "Fin", "Tense": "Pres"},
- "var": {LEMMA: "være", "VerbForm": "Fin", "Tense": "Past"},
- },
-}
-
-for tag, rules in MORPH_RULES.items():
- for key, attrs in dict(rules).items():
- rules[key.title()] = attrs
diff --git a/spacy/lang/da/punctuation.py b/spacy/lang/da/punctuation.py
index b6b852c55..e050ab7aa 100644
--- a/spacy/lang/da/punctuation.py
+++ b/spacy/lang/da/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..char_classes import LIST_ELLIPSES, LIST_ICONS
from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
from ..punctuation import TOKENIZER_SUFFIXES
diff --git a/spacy/lang/da/stop_words.py b/spacy/lang/da/stop_words.py
index 48de0c7ca..05b2084dd 100644
--- a/spacy/lang/da/stop_words.py
+++ b/spacy/lang/da/stop_words.py
@@ -1,6 +1,3 @@
-# encoding: utf8
-from __future__ import unicode_literals
-
# Source: Handpicked by Jens Dahl Møllerhøj.
STOP_WORDS = set(
diff --git a/spacy/lang/da/tokenizer_exceptions.py b/spacy/lang/da/tokenizer_exceptions.py
index 9e4637bfb..ce25c546b 100644
--- a/spacy/lang/da/tokenizer_exceptions.py
+++ b/spacy/lang/da/tokenizer_exceptions.py
@@ -1,12 +1,10 @@
-# encoding: utf8
"""
Tokenizer Exceptions.
Source: https://forkortelse.dk/ and various others.
"""
-
-from __future__ import unicode_literals
-
-from ...symbols import ORTH, LEMMA, NORM
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import ORTH, NORM
+from ...util import update_exc
_exc = {}
@@ -15,44 +13,44 @@ _exc = {}
# (for "torsdag") are left out because they are ambiguous. The same is the case
# for abbreviations "jul." and "Jul." ("juli").
for exc_data in [
- {ORTH: "Kbh.", LEMMA: "København", NORM: "København"},
- {ORTH: "jan.", LEMMA: "januar"},
- {ORTH: "febr.", LEMMA: "februar"},
- {ORTH: "feb.", LEMMA: "februar"},
- {ORTH: "mar.", LEMMA: "marts"},
- {ORTH: "apr.", LEMMA: "april"},
- {ORTH: "jun.", LEMMA: "juni"},
- {ORTH: "aug.", LEMMA: "august"},
- {ORTH: "sept.", LEMMA: "september"},
- {ORTH: "sep.", LEMMA: "september"},
- {ORTH: "okt.", LEMMA: "oktober"},
- {ORTH: "nov.", LEMMA: "november"},
- {ORTH: "dec.", LEMMA: "december"},
- {ORTH: "man.", LEMMA: "mandag"},
- {ORTH: "tirs.", LEMMA: "tirsdag"},
- {ORTH: "ons.", LEMMA: "onsdag"},
- {ORTH: "tor.", LEMMA: "torsdag"},
- {ORTH: "tors.", LEMMA: "torsdag"},
- {ORTH: "fre.", LEMMA: "fredag"},
- {ORTH: "lør.", LEMMA: "lørdag"},
- {ORTH: "Jan.", LEMMA: "januar"},
- {ORTH: "Febr.", LEMMA: "februar"},
- {ORTH: "Feb.", LEMMA: "februar"},
- {ORTH: "Mar.", LEMMA: "marts"},
- {ORTH: "Apr.", LEMMA: "april"},
- {ORTH: "Jun.", LEMMA: "juni"},
- {ORTH: "Aug.", LEMMA: "august"},
- {ORTH: "Sept.", LEMMA: "september"},
- {ORTH: "Sep.", LEMMA: "september"},
- {ORTH: "Okt.", LEMMA: "oktober"},
- {ORTH: "Nov.", LEMMA: "november"},
- {ORTH: "Dec.", LEMMA: "december"},
- {ORTH: "Man.", LEMMA: "mandag"},
- {ORTH: "Tirs.", LEMMA: "tirsdag"},
- {ORTH: "Ons.", LEMMA: "onsdag"},
- {ORTH: "Fre.", LEMMA: "fredag"},
- {ORTH: "Lør.", LEMMA: "lørdag"},
- {ORTH: "og/eller", LEMMA: "og/eller", NORM: "og/eller"},
+ {ORTH: "Kbh.", NORM: "København"},
+ {ORTH: "jan.", NORM: "januar"},
+ {ORTH: "febr.", NORM: "februar"},
+ {ORTH: "feb.", NORM: "februar"},
+ {ORTH: "mar.", NORM: "marts"},
+ {ORTH: "apr.", NORM: "april"},
+ {ORTH: "jun.", NORM: "juni"},
+ {ORTH: "aug.", NORM: "august"},
+ {ORTH: "sept.", NORM: "september"},
+ {ORTH: "sep.", NORM: "september"},
+ {ORTH: "okt.", NORM: "oktober"},
+ {ORTH: "nov.", NORM: "november"},
+ {ORTH: "dec.", NORM: "december"},
+ {ORTH: "man.", NORM: "mandag"},
+ {ORTH: "tirs.", NORM: "tirsdag"},
+ {ORTH: "ons.", NORM: "onsdag"},
+ {ORTH: "tor.", NORM: "torsdag"},
+ {ORTH: "tors.", NORM: "torsdag"},
+ {ORTH: "fre.", NORM: "fredag"},
+ {ORTH: "lør.", NORM: "lørdag"},
+ {ORTH: "Jan.", NORM: "januar"},
+ {ORTH: "Febr.", NORM: "februar"},
+ {ORTH: "Feb.", NORM: "februar"},
+ {ORTH: "Mar.", NORM: "marts"},
+ {ORTH: "Apr.", NORM: "april"},
+ {ORTH: "Jun.", NORM: "juni"},
+ {ORTH: "Aug.", NORM: "august"},
+ {ORTH: "Sept.", NORM: "september"},
+ {ORTH: "Sep.", NORM: "september"},
+ {ORTH: "Okt.", NORM: "oktober"},
+ {ORTH: "Nov.", NORM: "november"},
+ {ORTH: "Dec.", NORM: "december"},
+ {ORTH: "Man.", NORM: "mandag"},
+ {ORTH: "Tirs.", NORM: "tirsdag"},
+ {ORTH: "Ons.", NORM: "onsdag"},
+ {ORTH: "Fre.", NORM: "fredag"},
+ {ORTH: "Lør.", NORM: "lørdag"},
+ {ORTH: "og/eller", NORM: "og/eller"},
]:
_exc[exc_data[ORTH]] = [exc_data]
@@ -552,22 +550,22 @@ for orth in [
_exc[capitalized] = [{ORTH: capitalized}]
for exc_data in [
- {ORTH: "s'gu", LEMMA: "s'gu", NORM: "s'gu"},
- {ORTH: "S'gu", LEMMA: "s'gu", NORM: "s'gu"},
- {ORTH: "sgu'", LEMMA: "s'gu", NORM: "s'gu"},
- {ORTH: "Sgu'", LEMMA: "s'gu", NORM: "s'gu"},
- {ORTH: "sku'", LEMMA: "skal", NORM: "skulle"},
- {ORTH: "ku'", LEMMA: "kan", NORM: "kunne"},
- {ORTH: "Ku'", LEMMA: "kan", NORM: "kunne"},
- {ORTH: "ka'", LEMMA: "kan", NORM: "kan"},
- {ORTH: "Ka'", LEMMA: "kan", NORM: "kan"},
- {ORTH: "gi'", LEMMA: "give", NORM: "giv"},
- {ORTH: "Gi'", LEMMA: "give", NORM: "giv"},
- {ORTH: "li'", LEMMA: "lide", NORM: "lide"},
- {ORTH: "ha'", LEMMA: "have", NORM: "have"},
- {ORTH: "Ha'", LEMMA: "have", NORM: "have"},
- {ORTH: "ik'", LEMMA: "ikke", NORM: "ikke"},
- {ORTH: "Ik'", LEMMA: "ikke", NORM: "ikke"},
+ {ORTH: "s'gu", NORM: "s'gu"},
+ {ORTH: "S'gu", NORM: "s'gu"},
+ {ORTH: "sgu'", NORM: "s'gu"},
+ {ORTH: "Sgu'", NORM: "s'gu"},
+ {ORTH: "sku'", NORM: "skulle"},
+ {ORTH: "ku'", NORM: "kunne"},
+ {ORTH: "Ku'", NORM: "kunne"},
+ {ORTH: "ka'", NORM: "kan"},
+ {ORTH: "Ka'", NORM: "kan"},
+ {ORTH: "gi'", NORM: "giv"},
+ {ORTH: "Gi'", NORM: "giv"},
+ {ORTH: "li'", NORM: "lide"},
+ {ORTH: "ha'", NORM: "have"},
+ {ORTH: "Ha'", NORM: "have"},
+ {ORTH: "ik'", NORM: "ikke"},
+ {ORTH: "Ik'", NORM: "ikke"},
]:
_exc[exc_data[ORTH]] = [exc_data]
@@ -575,9 +573,9 @@ for exc_data in [
# Dates
for h in range(1, 31 + 1):
for period in ["."]:
- _exc["%d%s" % (h, period)] = [{ORTH: "%d." % h}]
+ _exc[f"{h}{period}"] = [{ORTH: f"{h}."}]
-_custom_base_exc = {"i.": [{ORTH: "i", LEMMA: "i", NORM: "i"}, {ORTH: "."}]}
+_custom_base_exc = {"i.": [{ORTH: "i", NORM: "i"}, {ORTH: "."}]}
_exc.update(_custom_base_exc)
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
diff --git a/spacy/lang/de/__init__.py b/spacy/lang/de/__init__.py
index ca01428ba..b645d3480 100644
--- a/spacy/lang/de/__init__.py
+++ b/spacy/lang/de/__init__.py
@@ -1,43 +1,17 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
-from .punctuation import TOKENIZER_INFIXES
-from .tag_map import TAG_MAP
+from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
from .stop_words import STOP_WORDS
from .syntax_iterators import SYNTAX_ITERATORS
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...language import Language
-from ...attrs import LANG
-from ...util import update_exc
class GermanDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "de"
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
prefixes = TOKENIZER_PREFIXES
suffixes = TOKENIZER_SUFFIXES
infixes = TOKENIZER_INFIXES
- tag_map = TAG_MAP
- stop_words = STOP_WORDS
syntax_iterators = SYNTAX_ITERATORS
- single_orth_variants = [
- {"tags": ["$("], "variants": ["…", "..."]},
- {"tags": ["$("], "variants": ["-", "—", "–", "--", "---", "——"]},
- ]
- paired_orth_variants = [
- {
- "tags": ["$("],
- "variants": [("'", "'"), (",", "'"), ("‚", "‘"), ("›", "‹"), ("‹", "›")],
- },
- {
- "tags": ["$("],
- "variants": [("``", "''"), ('"', '"'), ("„", "“"), ("»", "«"), ("«", "»")],
- },
- ]
+ stop_words = STOP_WORDS
class German(Language):
diff --git a/spacy/lang/de/examples.py b/spacy/lang/de/examples.py
index 0c64a693a..735d1c316 100644
--- a/spacy/lang/de/examples.py
+++ b/spacy/lang/de/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/de/punctuation.py b/spacy/lang/de/punctuation.py
index 93454ffff..69d402237 100644
--- a/spacy/lang/de/punctuation.py
+++ b/spacy/lang/de/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..char_classes import LIST_ELLIPSES, LIST_ICONS, LIST_PUNCT, LIST_QUOTES
from ..char_classes import CURRENCY, UNITS, PUNCT
from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
diff --git a/spacy/lang/de/stop_words.py b/spacy/lang/de/stop_words.py
index 0c8b375e0..f52687eb9 100644
--- a/spacy/lang/de/stop_words.py
+++ b/spacy/lang/de/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
STOP_WORDS = set(
"""
á a ab aber ach acht achte achten achter achtes ag alle allein allem allen
diff --git a/spacy/lang/de/syntax_iterators.py b/spacy/lang/de/syntax_iterators.py
index 73c1b1a6e..bd495f792 100644
--- a/spacy/lang/de/syntax_iterators.py
+++ b/spacy/lang/de/syntax_iterators.py
@@ -1,42 +1,26 @@
-# coding: utf8
-from __future__ import unicode_literals
+from typing import Union, Iterator
from ...symbols import NOUN, PROPN, PRON
from ...errors import Errors
+from ...tokens import Doc, Span
-def noun_chunks(doclike):
- """
- Detect base noun phrases from a dependency parse. Works on both Doc and Span.
- """
+def noun_chunks(doclike: Union[Doc, Span]) -> Iterator[Span]:
+ """Detect base noun phrases from a dependency parse. Works on Doc and Span."""
# this iterator extracts spans headed by NOUNs starting from the left-most
# syntactic dependent until the NOUN itself for close apposition and
# measurement construction, the span is sometimes extended to the right of
# the NOUN. Example: "eine Tasse Tee" (a cup (of) tea) returns "eine Tasse Tee"
# and not just "eine Tasse", same for "das Thema Familie".
- labels = [
- "sb",
- "oa",
- "da",
- "nk",
- "mo",
- "ag",
- "ROOT",
- "root",
- "cj",
- "pd",
- "og",
- "app",
- ]
+ # fmt: off
+ labels = ["sb", "oa", "da", "nk", "mo", "ag", "ROOT", "root", "cj", "pd", "og", "app"]
+ # fmt: on
doc = doclike.doc # Ensure works on both Doc and Span.
-
if not doc.is_parsed:
raise ValueError(Errors.E029)
-
np_label = doc.vocab.strings.add("NP")
np_deps = set(doc.vocab.strings.add(label) for label in labels)
close_app = doc.vocab.strings.add("nk")
-
rbracket = 0
for i, word in enumerate(doclike):
if i < rbracket:
diff --git a/spacy/lang/de/tag_map.py b/spacy/lang/de/tag_map.py
deleted file mode 100644
index c169501a9..000000000
--- a/spacy/lang/de/tag_map.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, PUNCT, ADJ, CCONJ, SCONJ, NUM, DET, ADV, ADP, X
-from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON, AUX, VERB
-
-
-TAG_MAP = {
- "$(": {POS: PUNCT, "PunctType": "brck"},
- "$,": {POS: PUNCT, "PunctType": "comm"},
- "$.": {POS: PUNCT, "PunctType": "peri"},
- "ADJA": {POS: ADJ},
- "ADJD": {POS: ADJ},
- "ADV": {POS: ADV},
- "APPO": {POS: ADP, "AdpType": "post"},
- "APPR": {POS: ADP, "AdpType": "prep"},
- "APPRART": {POS: ADP, "AdpType": "prep", "PronType": "art"},
- "APZR": {POS: ADP, "AdpType": "circ"},
- "ART": {POS: DET, "PronType": "art"},
- "CARD": {POS: NUM, "NumType": "card"},
- "FM": {POS: X, "Foreign": "yes"},
- "ITJ": {POS: INTJ},
- "KOKOM": {POS: CCONJ, "ConjType": "comp"},
- "KON": {POS: CCONJ},
- "KOUI": {POS: SCONJ},
- "KOUS": {POS: SCONJ},
- "NE": {POS: PROPN},
- "NNE": {POS: PROPN},
- "NN": {POS: NOUN},
- "PAV": {POS: ADV, "PronType": "dem"},
- "PROAV": {POS: ADV, "PronType": "dem"},
- "PDAT": {POS: DET, "PronType": "dem"},
- "PDS": {POS: PRON, "PronType": "dem"},
- "PIAT": {POS: DET, "PronType": "ind|neg|tot"},
- "PIDAT": {POS: DET, "PronType": "ind|neg|tot"},
- "PIS": {POS: PRON, "PronType": "ind|neg|tot"},
- "PPER": {POS: PRON, "PronType": "prs"},
- "PPOSAT": {POS: DET, "Poss": "yes", "PronType": "prs"},
- "PPOSS": {POS: PRON, "Poss": "yes", "PronType": "prs"},
- "PRELAT": {POS: DET, "PronType": "rel"},
- "PRELS": {POS: PRON, "PronType": "rel"},
- "PRF": {POS: PRON, "PronType": "prs", "Reflex": "yes"},
- "PTKA": {POS: PART},
- "PTKANT": {POS: PART, "PartType": "res"},
- "PTKNEG": {POS: PART, "Polarity": "neg"},
- "PTKVZ": {POS: ADP, "PartType": "vbp"},
- "PTKZU": {POS: PART, "PartType": "inf"},
- "PWAT": {POS: DET, "PronType": "int"},
- "PWAV": {POS: ADV, "PronType": "int"},
- "PWS": {POS: PRON, "PronType": "int"},
- "TRUNC": {POS: X, "Hyph": "yes"},
- "VAFIN": {POS: AUX, "Mood": "ind", "VerbForm": "fin"},
- "VAIMP": {POS: AUX, "Mood": "imp", "VerbForm": "fin"},
- "VAINF": {POS: AUX, "VerbForm": "inf"},
- "VAPP": {POS: AUX, "Aspect": "perf", "VerbForm": "part"},
- "VMFIN": {POS: VERB, "Mood": "ind", "VerbForm": "fin", "VerbType": "mod"},
- "VMINF": {POS: VERB, "VerbForm": "inf", "VerbType": "mod"},
- "VMPP": {POS: VERB, "Aspect": "perf", "VerbForm": "part", "VerbType": "mod"},
- "VVFIN": {POS: VERB, "Mood": "ind", "VerbForm": "fin"},
- "VVIMP": {POS: VERB, "Mood": "imp", "VerbForm": "fin"},
- "VVINF": {POS: VERB, "VerbForm": "inf"},
- "VVIZU": {POS: VERB, "VerbForm": "inf"},
- "VVPP": {POS: VERB, "Aspect": "perf", "VerbForm": "part"},
- "XY": {POS: X},
- "_SP": {POS: SPACE},
-}
diff --git a/spacy/lang/de/tokenizer_exceptions.py b/spacy/lang/de/tokenizer_exceptions.py
index ebbbfba8c..21d99cffe 100644
--- a/spacy/lang/de/tokenizer_exceptions.py
+++ b/spacy/lang/de/tokenizer_exceptions.py
@@ -1,160 +1,135 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import ORTH, LEMMA, TAG, NORM, PRON_LEMMA
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import ORTH, NORM
+from ...util import update_exc
_exc = {
- "auf'm": [{ORTH: "auf", LEMMA: "auf"}, {ORTH: "'m", LEMMA: "der", NORM: "dem"}],
- "du's": [
- {ORTH: "du", LEMMA: PRON_LEMMA, TAG: "PPER"},
- {ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER", NORM: "es"},
- ],
- "er's": [
- {ORTH: "er", LEMMA: PRON_LEMMA, TAG: "PPER"},
- {ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER", NORM: "es"},
- ],
- "hinter'm": [
- {ORTH: "hinter", LEMMA: "hinter"},
- {ORTH: "'m", LEMMA: "der", NORM: "dem"},
- ],
- "ich's": [
- {ORTH: "ich", LEMMA: PRON_LEMMA, TAG: "PPER"},
- {ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER", NORM: "es"},
- ],
- "ihr's": [
- {ORTH: "ihr", LEMMA: PRON_LEMMA, TAG: "PPER"},
- {ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER", NORM: "es"},
- ],
- "sie's": [
- {ORTH: "sie", LEMMA: PRON_LEMMA, TAG: "PPER"},
- {ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER", NORM: "es"},
- ],
- "unter'm": [
- {ORTH: "unter", LEMMA: "unter"},
- {ORTH: "'m", LEMMA: "der", NORM: "dem"},
- ],
- "vor'm": [{ORTH: "vor", LEMMA: "vor"}, {ORTH: "'m", LEMMA: "der", NORM: "dem"}],
- "wir's": [
- {ORTH: "wir", LEMMA: PRON_LEMMA, TAG: "PPER"},
- {ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER", NORM: "es"},
- ],
- "über'm": [{ORTH: "über", LEMMA: "über"}, {ORTH: "'m", LEMMA: "der", NORM: "dem"}],
+ "auf'm": [{ORTH: "auf"}, {ORTH: "'m", NORM: "dem"}],
+ "du's": [{ORTH: "du"}, {ORTH: "'s", NORM: "es"}],
+ "er's": [{ORTH: "er"}, {ORTH: "'s", NORM: "es"}],
+ "hinter'm": [{ORTH: "hinter"}, {ORTH: "'m", NORM: "dem"}],
+ "ich's": [{ORTH: "ich"}, {ORTH: "'s", NORM: "es"}],
+ "ihr's": [{ORTH: "ihr"}, {ORTH: "'s", NORM: "es"}],
+ "sie's": [{ORTH: "sie"}, {ORTH: "'s", NORM: "es"}],
+ "unter'm": [{ORTH: "unter"}, {ORTH: "'m", NORM: "dem"}],
+ "vor'm": [{ORTH: "vor"}, {ORTH: "'m", NORM: "dem"}],
+ "wir's": [{ORTH: "wir"}, {ORTH: "'s", NORM: "es"}],
+ "über'm": [{ORTH: "über"}, {ORTH: "'m", NORM: "dem"}],
}
for exc_data in [
- {ORTH: "'S", LEMMA: PRON_LEMMA, NORM: "'s", TAG: "PPER"},
- {ORTH: "'s", LEMMA: PRON_LEMMA, NORM: "'s", TAG: "PPER"},
- {ORTH: "S'", LEMMA: PRON_LEMMA, NORM: "'s", TAG: "PPER"},
- {ORTH: "s'", LEMMA: PRON_LEMMA, NORM: "'s", TAG: "PPER"},
- {ORTH: "'n", LEMMA: "ein", NORM: "ein"},
- {ORTH: "'ne", LEMMA: "eine", NORM: "eine"},
- {ORTH: "'nen", LEMMA: "ein", NORM: "einen"},
- {ORTH: "'nem", LEMMA: "ein", NORM: "einem"},
- {ORTH: "Abb.", LEMMA: "Abbildung", NORM: "Abbildung"},
- {ORTH: "Abk.", LEMMA: "Abkürzung", NORM: "Abkürzung"},
- {ORTH: "Abt.", LEMMA: "Abteilung", NORM: "Abteilung"},
- {ORTH: "Apr.", LEMMA: "April", NORM: "April"},
- {ORTH: "Aug.", LEMMA: "August", NORM: "August"},
- {ORTH: "Bd.", LEMMA: "Band", NORM: "Band"},
- {ORTH: "Betr.", LEMMA: "Betreff", NORM: "Betreff"},
- {ORTH: "Bf.", LEMMA: "Bahnhof", NORM: "Bahnhof"},
- {ORTH: "Bhf.", LEMMA: "Bahnhof", NORM: "Bahnhof"},
- {ORTH: "Bsp.", LEMMA: "Beispiel", NORM: "Beispiel"},
- {ORTH: "Dez.", LEMMA: "Dezember", NORM: "Dezember"},
- {ORTH: "Di.", LEMMA: "Dienstag", NORM: "Dienstag"},
- {ORTH: "Do.", LEMMA: "Donnerstag", NORM: "Donnerstag"},
- {ORTH: "Fa.", LEMMA: "Firma", NORM: "Firma"},
- {ORTH: "Fam.", LEMMA: "Familie", NORM: "Familie"},
- {ORTH: "Feb.", LEMMA: "Februar", NORM: "Februar"},
- {ORTH: "Fr.", LEMMA: "Frau", NORM: "Frau"},
- {ORTH: "Frl.", LEMMA: "Fräulein", NORM: "Fräulein"},
- {ORTH: "Hbf.", LEMMA: "Hauptbahnhof", NORM: "Hauptbahnhof"},
- {ORTH: "Hr.", LEMMA: "Herr", NORM: "Herr"},
- {ORTH: "Hrn.", LEMMA: "Herr", NORM: "Herrn"},
- {ORTH: "Jan.", LEMMA: "Januar", NORM: "Januar"},
- {ORTH: "Jh.", LEMMA: "Jahrhundert", NORM: "Jahrhundert"},
- {ORTH: "Jhd.", LEMMA: "Jahrhundert", NORM: "Jahrhundert"},
- {ORTH: "Jul.", LEMMA: "Juli", NORM: "Juli"},
- {ORTH: "Jun.", LEMMA: "Juni", NORM: "Juni"},
- {ORTH: "Mi.", LEMMA: "Mittwoch", NORM: "Mittwoch"},
- {ORTH: "Mio.", LEMMA: "Million", NORM: "Million"},
- {ORTH: "Mo.", LEMMA: "Montag", NORM: "Montag"},
- {ORTH: "Mrd.", LEMMA: "Milliarde", NORM: "Milliarde"},
- {ORTH: "Mrz.", LEMMA: "März", NORM: "März"},
- {ORTH: "MwSt.", LEMMA: "Mehrwertsteuer", NORM: "Mehrwertsteuer"},
- {ORTH: "Mär.", LEMMA: "März", NORM: "März"},
- {ORTH: "Nov.", LEMMA: "November", NORM: "November"},
- {ORTH: "Nr.", LEMMA: "Nummer", NORM: "Nummer"},
- {ORTH: "Okt.", LEMMA: "Oktober", NORM: "Oktober"},
- {ORTH: "Orig.", LEMMA: "Original", NORM: "Original"},
- {ORTH: "Pkt.", LEMMA: "Punkt", NORM: "Punkt"},
- {ORTH: "Prof.", LEMMA: "Professor", NORM: "Professor"},
- {ORTH: "Red.", LEMMA: "Redaktion", NORM: "Redaktion"},
- {ORTH: "Sa.", LEMMA: "Samstag", NORM: "Samstag"},
- {ORTH: "Sep.", LEMMA: "September", NORM: "September"},
- {ORTH: "Sept.", LEMMA: "September", NORM: "September"},
- {ORTH: "So.", LEMMA: "Sonntag", NORM: "Sonntag"},
- {ORTH: "Std.", LEMMA: "Stunde", NORM: "Stunde"},
- {ORTH: "Str.", LEMMA: "Straße", NORM: "Straße"},
- {ORTH: "Tel.", LEMMA: "Telefon", NORM: "Telefon"},
- {ORTH: "Tsd.", LEMMA: "Tausend", NORM: "Tausend"},
- {ORTH: "Univ.", LEMMA: "Universität", NORM: "Universität"},
- {ORTH: "abzgl.", LEMMA: "abzüglich", NORM: "abzüglich"},
- {ORTH: "allg.", LEMMA: "allgemein", NORM: "allgemein"},
- {ORTH: "bspw.", LEMMA: "beispielsweise", NORM: "beispielsweise"},
- {ORTH: "bzgl.", LEMMA: "bezüglich", NORM: "bezüglich"},
- {ORTH: "bzw.", LEMMA: "beziehungsweise", NORM: "beziehungsweise"},
- {ORTH: "d.h.", LEMMA: "das heißt"},
- {ORTH: "dgl.", LEMMA: "dergleichen", NORM: "dergleichen"},
- {ORTH: "ebd.", LEMMA: "ebenda", NORM: "ebenda"},
- {ORTH: "eigtl.", LEMMA: "eigentlich", NORM: "eigentlich"},
- {ORTH: "engl.", LEMMA: "englisch", NORM: "englisch"},
- {ORTH: "evtl.", LEMMA: "eventuell", NORM: "eventuell"},
- {ORTH: "frz.", LEMMA: "französisch", NORM: "französisch"},
- {ORTH: "gegr.", LEMMA: "gegründet", NORM: "gegründet"},
- {ORTH: "ggf.", LEMMA: "gegebenenfalls", NORM: "gegebenenfalls"},
- {ORTH: "ggfs.", LEMMA: "gegebenenfalls", NORM: "gegebenenfalls"},
- {ORTH: "ggü.", LEMMA: "gegenüber", NORM: "gegenüber"},
- {ORTH: "i.O.", LEMMA: "in Ordnung"},
- {ORTH: "i.d.R.", LEMMA: "in der Regel"},
- {ORTH: "incl.", LEMMA: "inklusive", NORM: "inklusive"},
- {ORTH: "inkl.", LEMMA: "inklusive", NORM: "inklusive"},
- {ORTH: "insb.", LEMMA: "insbesondere", NORM: "insbesondere"},
- {ORTH: "kath.", LEMMA: "katholisch", NORM: "katholisch"},
- {ORTH: "lt.", LEMMA: "laut", NORM: "laut"},
- {ORTH: "max.", LEMMA: "maximal", NORM: "maximal"},
- {ORTH: "min.", LEMMA: "minimal", NORM: "minimal"},
- {ORTH: "mind.", LEMMA: "mindestens", NORM: "mindestens"},
- {ORTH: "mtl.", LEMMA: "monatlich", NORM: "monatlich"},
- {ORTH: "n.Chr.", LEMMA: "nach Christus"},
- {ORTH: "orig.", LEMMA: "original", NORM: "original"},
- {ORTH: "röm.", LEMMA: "römisch", NORM: "römisch"},
- {ORTH: "s.o.", LEMMA: "siehe oben"},
- {ORTH: "sog.", LEMMA: "so genannt"},
- {ORTH: "stellv.", LEMMA: "stellvertretend"},
- {ORTH: "tägl.", LEMMA: "täglich", NORM: "täglich"},
- {ORTH: "u.U.", LEMMA: "unter Umständen"},
- {ORTH: "u.s.w.", LEMMA: "und so weiter"},
- {ORTH: "u.v.m.", LEMMA: "und vieles mehr"},
- {ORTH: "usf.", LEMMA: "und so fort"},
- {ORTH: "usw.", LEMMA: "und so weiter"},
- {ORTH: "uvm.", LEMMA: "und vieles mehr"},
- {ORTH: "v.Chr.", LEMMA: "vor Christus"},
- {ORTH: "v.a.", LEMMA: "vor allem"},
- {ORTH: "v.l.n.r.", LEMMA: "von links nach rechts"},
- {ORTH: "vgl.", LEMMA: "vergleiche", NORM: "vergleiche"},
- {ORTH: "vllt.", LEMMA: "vielleicht", NORM: "vielleicht"},
- {ORTH: "vlt.", LEMMA: "vielleicht", NORM: "vielleicht"},
- {ORTH: "z.B.", LEMMA: "zum Beispiel"},
- {ORTH: "z.Bsp.", LEMMA: "zum Beispiel"},
- {ORTH: "z.T.", LEMMA: "zum Teil"},
- {ORTH: "z.Z.", LEMMA: "zur Zeit"},
- {ORTH: "z.Zt.", LEMMA: "zur Zeit"},
- {ORTH: "z.b.", LEMMA: "zum Beispiel"},
- {ORTH: "zzgl.", LEMMA: "zuzüglich"},
- {ORTH: "österr.", LEMMA: "österreichisch", NORM: "österreichisch"},
+ {ORTH: "'S", NORM: "'s"},
+ {ORTH: "'s", NORM: "'s"},
+ {ORTH: "S'", NORM: "'s"},
+ {ORTH: "s'", NORM: "'s"},
+ {ORTH: "'n", NORM: "ein"},
+ {ORTH: "'ne", NORM: "eine"},
+ {ORTH: "'nen", NORM: "einen"},
+ {ORTH: "'nem", NORM: "einem"},
+ {ORTH: "Abb.", NORM: "Abbildung"},
+ {ORTH: "Abk.", NORM: "Abkürzung"},
+ {ORTH: "Abt.", NORM: "Abteilung"},
+ {ORTH: "Apr.", NORM: "April"},
+ {ORTH: "Aug.", NORM: "August"},
+ {ORTH: "Bd.", NORM: "Band"},
+ {ORTH: "Betr.", NORM: "Betreff"},
+ {ORTH: "Bf.", NORM: "Bahnhof"},
+ {ORTH: "Bhf.", NORM: "Bahnhof"},
+ {ORTH: "Bsp.", NORM: "Beispiel"},
+ {ORTH: "Dez.", NORM: "Dezember"},
+ {ORTH: "Di.", NORM: "Dienstag"},
+ {ORTH: "Do.", NORM: "Donnerstag"},
+ {ORTH: "Fa.", NORM: "Firma"},
+ {ORTH: "Fam.", NORM: "Familie"},
+ {ORTH: "Feb.", NORM: "Februar"},
+ {ORTH: "Fr.", NORM: "Frau"},
+ {ORTH: "Frl.", NORM: "Fräulein"},
+ {ORTH: "Hbf.", NORM: "Hauptbahnhof"},
+ {ORTH: "Hr.", NORM: "Herr"},
+ {ORTH: "Hrn.", NORM: "Herrn"},
+ {ORTH: "Jan.", NORM: "Januar"},
+ {ORTH: "Jh.", NORM: "Jahrhundert"},
+ {ORTH: "Jhd.", NORM: "Jahrhundert"},
+ {ORTH: "Jul.", NORM: "Juli"},
+ {ORTH: "Jun.", NORM: "Juni"},
+ {ORTH: "Mi.", NORM: "Mittwoch"},
+ {ORTH: "Mio.", NORM: "Million"},
+ {ORTH: "Mo.", NORM: "Montag"},
+ {ORTH: "Mrd.", NORM: "Milliarde"},
+ {ORTH: "Mrz.", NORM: "März"},
+ {ORTH: "MwSt.", NORM: "Mehrwertsteuer"},
+ {ORTH: "Mär.", NORM: "März"},
+ {ORTH: "Nov.", NORM: "November"},
+ {ORTH: "Nr.", NORM: "Nummer"},
+ {ORTH: "Okt.", NORM: "Oktober"},
+ {ORTH: "Orig.", NORM: "Original"},
+ {ORTH: "Pkt.", NORM: "Punkt"},
+ {ORTH: "Prof.", NORM: "Professor"},
+ {ORTH: "Red.", NORM: "Redaktion"},
+ {ORTH: "Sa.", NORM: "Samstag"},
+ {ORTH: "Sep.", NORM: "September"},
+ {ORTH: "Sept.", NORM: "September"},
+ {ORTH: "So.", NORM: "Sonntag"},
+ {ORTH: "Std.", NORM: "Stunde"},
+ {ORTH: "Str.", NORM: "Straße"},
+ {ORTH: "Tel.", NORM: "Telefon"},
+ {ORTH: "Tsd.", NORM: "Tausend"},
+ {ORTH: "Univ.", NORM: "Universität"},
+ {ORTH: "abzgl.", NORM: "abzüglich"},
+ {ORTH: "allg.", NORM: "allgemein"},
+ {ORTH: "bspw.", NORM: "beispielsweise"},
+ {ORTH: "bzgl.", NORM: "bezüglich"},
+ {ORTH: "bzw.", NORM: "beziehungsweise"},
+ {ORTH: "d.h."},
+ {ORTH: "dgl.", NORM: "dergleichen"},
+ {ORTH: "ebd.", NORM: "ebenda"},
+ {ORTH: "eigtl.", NORM: "eigentlich"},
+ {ORTH: "engl.", NORM: "englisch"},
+ {ORTH: "evtl.", NORM: "eventuell"},
+ {ORTH: "frz.", NORM: "französisch"},
+ {ORTH: "gegr.", NORM: "gegründet"},
+ {ORTH: "ggf.", NORM: "gegebenenfalls"},
+ {ORTH: "ggfs.", NORM: "gegebenenfalls"},
+ {ORTH: "ggü.", NORM: "gegenüber"},
+ {ORTH: "i.O."},
+ {ORTH: "i.d.R."},
+ {ORTH: "incl.", NORM: "inklusive"},
+ {ORTH: "inkl.", NORM: "inklusive"},
+ {ORTH: "insb.", NORM: "insbesondere"},
+ {ORTH: "kath.", NORM: "katholisch"},
+ {ORTH: "lt.", NORM: "laut"},
+ {ORTH: "max.", NORM: "maximal"},
+ {ORTH: "min.", NORM: "minimal"},
+ {ORTH: "mind.", NORM: "mindestens"},
+ {ORTH: "mtl.", NORM: "monatlich"},
+ {ORTH: "n.Chr."},
+ {ORTH: "orig.", NORM: "original"},
+ {ORTH: "röm.", NORM: "römisch"},
+ {ORTH: "s.o."},
+ {ORTH: "sog."},
+ {ORTH: "stellv."},
+ {ORTH: "tägl.", NORM: "täglich"},
+ {ORTH: "u.U."},
+ {ORTH: "u.s.w."},
+ {ORTH: "u.v.m."},
+ {ORTH: "usf."},
+ {ORTH: "usw."},
+ {ORTH: "uvm."},
+ {ORTH: "v.Chr."},
+ {ORTH: "v.a."},
+ {ORTH: "v.l.n.r."},
+ {ORTH: "vgl.", NORM: "vergleiche"},
+ {ORTH: "vllt.", NORM: "vielleicht"},
+ {ORTH: "vlt.", NORM: "vielleicht"},
+ {ORTH: "z.B."},
+ {ORTH: "z.Bsp."},
+ {ORTH: "z.T."},
+ {ORTH: "z.Z."},
+ {ORTH: "z.Zt."},
+ {ORTH: "z.b."},
+ {ORTH: "zzgl."},
+ {ORTH: "österr.", NORM: "österreichisch"},
]:
_exc[exc_data[ORTH]] = [exc_data]
@@ -257,4 +232,4 @@ for orth in [
_exc[orth] = [{ORTH: orth}]
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
diff --git a/spacy/lang/el/__init__.py b/spacy/lang/el/__init__.py
index d03a42da9..0c5e0672b 100644
--- a/spacy/lang/el/__init__.py
+++ b/spacy/lang/el/__init__.py
@@ -1,43 +1,47 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import unicode_literals
+from typing import Optional
+from thinc.api import Model
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from ..tag_map import TAG_MAP
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
-from .lemmatizer import GreekLemmatizer
from .syntax_iterators import SYNTAX_ITERATORS
from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...language import Language
+from .lemmatizer import GreekLemmatizer
from ...lookups import Lookups
-from ...attrs import LANG
-from ...util import update_exc
+from ...language import Language
class GreekDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters.update(LEX_ATTRS)
- lex_attr_getters[LANG] = lambda text: "el"
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
- stop_words = STOP_WORDS
- tag_map = TAG_MAP
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
prefixes = TOKENIZER_PREFIXES
suffixes = TOKENIZER_SUFFIXES
infixes = TOKENIZER_INFIXES
+ lex_attr_getters = LEX_ATTRS
+ stop_words = STOP_WORDS
syntax_iterators = SYNTAX_ITERATORS
- @classmethod
- def create_lemmatizer(cls, nlp=None, lookups=None):
- if lookups is None:
- lookups = Lookups()
- return GreekLemmatizer(lookups)
-
class Greek(Language):
lang = "el"
Defaults = GreekDefaults
+@Greek.factory(
+ "lemmatizer",
+ assigns=["token.lemma"],
+ default_config={"model": None, "mode": "rule", "lookups": None},
+ scores=["lemma_acc"],
+ default_score_weights={"lemma_acc": 1.0},
+)
+def make_lemmatizer(
+ nlp: Language,
+ model: Optional[Model],
+ name: str,
+ mode: str,
+ lookups: Optional[Lookups],
+):
+ lookups = GreekLemmatizer.load_lookups(nlp.lang, mode, lookups)
+ return GreekLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
+
+
__all__ = ["Greek"]
diff --git a/spacy/lang/el/examples.py b/spacy/lang/el/examples.py
index 521e7b30d..62515c07a 100644
--- a/spacy/lang/el/examples.py
+++ b/spacy/lang/el/examples.py
@@ -1,7 +1,3 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import unicode_literals
-
"""
Example sentences to test spaCy and its language models.
>>> from spacy.lang.el.examples import sentences
diff --git a/spacy/lang/el/get_pos_from_wiktionary.py b/spacy/lang/el/get_pos_from_wiktionary.py
index f41833974..369973cc0 100644
--- a/spacy/lang/el/get_pos_from_wiktionary.py
+++ b/spacy/lang/el/get_pos_from_wiktionary.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
def get_pos_from_wiktionary():
import re
from gensim.corpora.wikicorpus import extract_pages
diff --git a/spacy/lang/el/lemmatizer.py b/spacy/lang/el/lemmatizer.py
index 6f5b3999b..a049601dc 100644
--- a/spacy/lang/el/lemmatizer.py
+++ b/spacy/lang/el/lemmatizer.py
@@ -1,7 +1,7 @@
-# coding: utf8
-from __future__ import unicode_literals
+from typing import List
-from ...lemmatizer import Lemmatizer
+from ...pipeline import Lemmatizer
+from ...tokens import Token
class GreekLemmatizer(Lemmatizer):
@@ -15,7 +15,27 @@ class GreekLemmatizer(Lemmatizer):
not applicable for Greek language.
"""
- def lemmatize(self, string, index, exceptions, rules):
+ def rule_lemmatize(self, token: Token) -> List[str]:
+ """Lemmatize using a rule-based approach.
+
+ token (Token): The token to lemmatize.
+ RETURNS (list): The available lemmas for the string.
+ """
+ cache_key = (token.lower, token.pos)
+ if cache_key in self.cache:
+ return self.cache[cache_key]
+ string = token.text
+ univ_pos = token.pos_.lower()
+ if univ_pos in ("", "eol", "space"):
+ return [string.lower()]
+
+ index_table = self.lookups.get_table("lemma_index", {})
+ exc_table = self.lookups.get_table("lemma_exc", {})
+ rules_table = self.lookups.get_table("lemma_rules", {})
+ index = index_table.get(univ_pos, {})
+ exceptions = exc_table.get(univ_pos, {})
+ rules = rules_table.get(univ_pos, {})
+
string = string.lower()
forms = []
if string in index:
@@ -37,4 +57,6 @@ class GreekLemmatizer(Lemmatizer):
forms.extend(oov_forms)
if not forms:
forms.append(string)
- return list(set(forms))
+ forms = list(set(forms))
+ self.cache[cache_key] = forms
+ return forms
diff --git a/spacy/lang/el/lex_attrs.py b/spacy/lang/el/lex_attrs.py
index cf32fe12c..5c8f96848 100644
--- a/spacy/lang/el/lex_attrs.py
+++ b/spacy/lang/el/lex_attrs.py
@@ -1,7 +1,3 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import unicode_literals
-
from ...attrs import LIKE_NUM
_num_words = [
diff --git a/spacy/lang/el/punctuation.py b/spacy/lang/el/punctuation.py
index fbf773f4d..2d5690407 100644
--- a/spacy/lang/el/punctuation.py
+++ b/spacy/lang/el/punctuation.py
@@ -1,7 +1,3 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import unicode_literals
-
from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, LIST_CURRENCY
from ..char_classes import LIST_ICONS, ALPHA_LOWER, ALPHA_UPPER, ALPHA, HYPHENS
from ..char_classes import CONCAT_QUOTES, CURRENCY
diff --git a/spacy/lang/el/stop_words.py b/spacy/lang/el/stop_words.py
index f13c47ec2..7c436219f 100644
--- a/spacy/lang/el/stop_words.py
+++ b/spacy/lang/el/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
# Stop words
# Link to greek stop words: https://www.translatum.gr/forum/index.php?topic=3550.0?topic=3550.0
STOP_WORDS = set(
diff --git a/spacy/lang/el/syntax_iterators.py b/spacy/lang/el/syntax_iterators.py
index 4a40e28c2..0a13edcc0 100644
--- a/spacy/lang/el/syntax_iterators.py
+++ b/spacy/lang/el/syntax_iterators.py
@@ -1,24 +1,20 @@
-# coding: utf8
-from __future__ import unicode_literals
+from typing import Union, Iterator
from ...symbols import NOUN, PROPN, PRON
from ...errors import Errors
+from ...tokens import Doc, Span
-def noun_chunks(doclike):
- """
- Detect base noun phrases. Works on both Doc and Span.
- """
+def noun_chunks(doclike: Union[Doc, Span]) -> Iterator[Span]:
+ """Detect base noun phrases from a dependency parse. Works on Doc and Span."""
# It follows the logic of the noun chunks finder of English language,
# adjusted to some Greek language special characteristics.
# obj tag corrects some DEP tagger mistakes.
# Further improvement of the models will eliminate the need for this tag.
labels = ["nsubj", "obj", "iobj", "appos", "ROOT", "obl"]
doc = doclike.doc # Ensure works on both Doc and Span.
-
if not doc.is_parsed:
raise ValueError(Errors.E029)
-
np_deps = [doc.vocab.strings.add(label) for label in labels]
conj = doc.vocab.strings.add("conj")
nmod = doc.vocab.strings.add("nmod")
diff --git a/spacy/lang/el/tag_map_fine.py b/spacy/lang/el/tag_map_fine.py
deleted file mode 100644
index b346299bc..000000000
--- a/spacy/lang/el/tag_map_fine.py
+++ /dev/null
@@ -1,4268 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, PUNCT, SYM, ADJ, CCONJ, SCONJ, NUM, DET, ADV, ADP, X, VERB
-from ...symbols import NOUN, PROPN, PART, INTJ, PRON, AUX
-
-
-TAG_MAP = {
- "ABBR": {POS: NOUN, "Abbr": "Yes"},
- "AdXxBa": {POS: ADV, "Degree": ""},
- "AdXxCp": {POS: ADV, "Degree": "Cmp"},
- "AdXxSu": {POS: ADV, "Degree": "Sup"},
- "AjBaFePlAc": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Acc",
- },
- "AjBaFePlDa": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Dat",
- },
- "AjBaFePlGe": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Gen",
- },
- "AjBaFePlNm": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Nom",
- },
- "AjBaFePlVo": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Voc",
- },
- "AjBaFeSgAc": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Acc",
- },
- "AjBaFeSgDa": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Dat",
- },
- "AjBaFeSgGe": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Gen",
- },
- "AjBaFeSgNm": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Nom",
- },
- "AjBaFeSgVo": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Voc",
- },
- "AjBaMaPlAc": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Acc",
- },
- "AjBaMaPlDa": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Dat",
- },
- "AjBaMaPlGe": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Gen",
- },
- "AjBaMaPlNm": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Nom",
- },
- "AjBaMaPlVo": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Voc",
- },
- "AjBaMaSgAc": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Acc",
- },
- "AjBaMaSgDa": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Dat",
- },
- "AjBaMaSgGe": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Gen",
- },
- "AjBaMaSgNm": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Nom",
- },
- "AjBaMaSgVo": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Voc",
- },
- "AjBaNePlAc": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Acc",
- },
- "AjBaNePlDa": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Dat",
- },
- "AjBaNePlGe": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Gen",
- },
- "AjBaNePlNm": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Nom",
- },
- "AjBaNePlVo": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Voc",
- },
- "AjBaNeSgAc": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Acc",
- },
- "AjBaNeSgDa": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Dat",
- },
- "AjBaNeSgGe": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Gen",
- },
- "AjBaNeSgNm": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Nom",
- },
- "AjBaNeSgVo": {
- POS: ADJ,
- "Degree": "",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Voc",
- },
- "AjCpFePlAc": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Acc",
- },
- "AjCpFePlDa": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Dat",
- },
- "AjCpFePlGe": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Gen",
- },
- "AjCpFePlNm": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Nom",
- },
- "AjCpFePlVo": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Voc",
- },
- "AjCpFeSgAc": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Acc",
- },
- "AjCpFeSgDa": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Dat",
- },
- "AjCpFeSgGe": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Gen",
- },
- "AjCpFeSgNm": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Nom",
- },
- "AjCpFeSgVo": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Voc",
- },
- "AjCpMaPlAc": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Acc",
- },
- "AjCpMaPlDa": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Dat",
- },
- "AjCpMaPlGe": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Gen",
- },
- "AjCpMaPlNm": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Nom",
- },
- "AjCpMaPlVo": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Voc",
- },
- "AjCpMaSgAc": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Acc",
- },
- "AjCpMaSgDa": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Dat",
- },
- "AjCpMaSgGe": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Gen",
- },
- "AjCpMaSgNm": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Nom",
- },
- "AjCpMaSgVo": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Voc",
- },
- "AjCpNePlAc": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Acc",
- },
- "AjCpNePlDa": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Dat",
- },
- "AjCpNePlGe": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Gen",
- },
- "AjCpNePlNm": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Nom",
- },
- "AjCpNePlVo": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Voc",
- },
- "AjCpNeSgAc": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Acc",
- },
- "AjCpNeSgDa": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Dat",
- },
- "AjCpNeSgGe": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Gen",
- },
- "AjCpNeSgNm": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Nom",
- },
- "AjCpNeSgVo": {
- POS: ADJ,
- "Degree": "Cmp",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Voc",
- },
- "AjSuFePlAc": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Acc",
- },
- "AjSuFePlDa": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Dat",
- },
- "AjSuFePlGe": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Gen",
- },
- "AjSuFePlNm": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Nom",
- },
- "AjSuFePlVo": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Voc",
- },
- "AjSuFeSgAc": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Acc",
- },
- "AjSuFeSgDa": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Dat",
- },
- "AjSuFeSgGe": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Gen",
- },
- "AjSuFeSgNm": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Nom",
- },
- "AjSuFeSgVo": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Voc",
- },
- "AjSuMaPlAc": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Acc",
- },
- "AjSuMaPlDa": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Dat",
- },
- "AjSuMaPlGe": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Gen",
- },
- "AjSuMaPlNm": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Nom",
- },
- "AjSuMaPlVo": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Voc",
- },
- "AjSuMaSgAc": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Acc",
- },
- "AjSuMaSgDa": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Dat",
- },
- "AjSuMaSgGe": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Gen",
- },
- "AjSuMaSgNm": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Nom",
- },
- "AjSuMaSgVo": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Voc",
- },
- "AjSuNePlAc": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Acc",
- },
- "AjSuNePlDa": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Dat",
- },
- "AjSuNePlGe": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Gen",
- },
- "AjSuNePlNm": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Nom",
- },
- "AjSuNePlVo": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Voc",
- },
- "AjSuNeSgAc": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Acc",
- },
- "AjSuNeSgDa": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Dat",
- },
- "AjSuNeSgGe": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Gen",
- },
- "AjSuNeSgNm": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Nom",
- },
- "AjSuNeSgVo": {
- POS: ADJ,
- "Degree": "Sup",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Voc",
- },
- "AsPpPaFePlAc": {POS: ADP, "Gender": "Fem", "Number": "Plur", "Case": "Acc"},
- "AsPpPaFePlGe": {POS: ADP, "Gender": "Fem", "Number": "Plur", "Case": "Gen"},
- "AsPpPaFeSgAc": {POS: ADP, "Gender": "Fem", "Number": "Sing", "Case": "Acc"},
- "AsPpPaFeSgGe": {POS: ADP, "Gender": "Fem", "Number": "Sing", "Case": "Gen"},
- "AsPpPaMaPlAc": {POS: ADP, "Gender": "Masc", "Number": "Plur", "Case": "Acc"},
- "AsPpPaMaPlGe": {POS: ADP, "Gender": "Masc", "Number": "Plur", "Case": "Gen"},
- "AsPpPaMaSgAc": {POS: ADP, "Gender": "Masc", "Number": "Sing", "Case": "Acc"},
- "AsPpPaMaSgGe": {POS: ADP, "Gender": "Masc", "Number": "Sing", "Case": "Gen"},
- "AsPpPaNePlAc": {POS: ADP, "Gender": "Neut", "Number": "Plur", "Case": "Acc"},
- "AsPpPaNePlGe": {POS: ADP, "Gender": "Neut", "Number": "Plur", "Case": "Gen"},
- "AsPpPaNeSgAc": {POS: ADP, "Gender": "Neut", "Number": "Sing", "Case": "Acc"},
- "AsPpPaNeSgGe": {POS: ADP, "Gender": "Neut", "Number": "Sing", "Case": "Gen"},
- "AsPpSp": {POS: ADP},
- "AtDfFePlAc": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Acc",
- "Other": {"Definite": "Def"},
- },
- "AtDfFePlGe": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Gen",
- "Other": {"Definite": "Def"},
- },
- "AtDfFePlNm": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Nom",
- "Other": {"Definite": "Def"},
- },
- "AtDfFeSgAc": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Acc",
- "Other": {"Definite": "Def"},
- },
- "AtDfFeSgDa": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Dat",
- "Other": {"Definite": "Def"},
- },
- "AtDfFeSgGe": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Gen",
- "Other": {"Definite": "Def"},
- },
- "AtDfFeSgNm": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Nom",
- "Other": {"Definite": "Def"},
- },
- "AtDfMaPlAc": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Acc",
- "Other": {"Definite": "Def"},
- },
- "AtDfMaPlGe": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Gen",
- "Other": {"Definite": "Def"},
- },
- "AtDfMaPlNm": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Nom",
- "Other": {"Definite": "Def"},
- },
- "AtDfMaSgAc": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Acc",
- "Other": {"Definite": "Def"},
- },
- "AtDfMaSgDa": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Dat",
- "Other": {"Definite": "Def"},
- },
- "AtDfMaSgGe": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Gen",
- "Other": {"Definite": "Def"},
- },
- "AtDfMaSgNm": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Nom",
- "Other": {"Definite": "Def"},
- },
- "AtDfNePlAc": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Acc",
- "Other": {"Definite": "Def"},
- },
- "AtDfNePlDa": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Dat",
- "Other": {"Definite": "Def"},
- },
- "AtDfNePlGe": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Gen",
- "Other": {"Definite": "Def"},
- },
- "AtDfNePlNm": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Nom",
- "Other": {"Definite": "Def"},
- },
- "AtDfNeSgAc": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Acc",
- "Other": {"Definite": "Def"},
- },
- "AtDfNeSgDa": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Dat",
- "Other": {"Definite": "Def"},
- },
- "AtDfNeSgGe": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Gen",
- "Other": {"Definite": "Def"},
- },
- "AtDfNeSgNm": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Nom",
- "Other": {"Definite": "Def"},
- },
- "AtIdFeSgAc": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Acc",
- "Other": {"Definite": "Ind"},
- },
- "AtIdFeSgDa": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Dat",
- "Other": {"Definite": "Ind"},
- },
- "AtIdFeSgGe": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Gen",
- "Other": {"Definite": "Ind"},
- },
- "AtIdFeSgNm": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Nom",
- "Other": {"Definite": "Ind"},
- },
- "AtIdMaSgAc": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Acc",
- "Other": {"Definite": "Ind"},
- },
- "AtIdMaSgGe": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Gen",
- "Other": {"Definite": "Ind"},
- },
- "AtIdMaSgNm": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Nom",
- "Other": {"Definite": "Ind"},
- },
- "AtIdNeSgAc": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Acc",
- "Other": {"Definite": "Ind"},
- },
- "AtIdNeSgGe": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Gen",
- "Other": {"Definite": "Ind"},
- },
- "AtIdNeSgNm": {
- POS: DET,
- "PronType": "Art",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Nom",
- "Other": {"Definite": "Ind"},
- },
- "CjCo": {POS: CCONJ},
- "CjSb": {POS: SCONJ},
- "CPUNCT": {POS: PUNCT},
- "DATE": {POS: NUM},
- "DIG": {POS: NUM},
- "ENUM": {POS: NUM},
- "Ij": {POS: INTJ},
- "INIT": {POS: SYM},
- "NBABBR": {POS: NOUN, "Abbr": "Yes"},
- "NmAnFePlAcAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Acc",
- },
- "NmAnFePlGeAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Gen",
- },
- "NmAnFePlNmAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Nom",
- },
- "NmAnFePlVoAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Voc",
- },
- "NmAnFeSgAcAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Acc",
- },
- "NmAnFeSgGeAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Gen",
- },
- "NmAnFeSgNmAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Nom",
- },
- "NmAnFeSgVoAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Voc",
- },
- "NmAnMaPlAcAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Acc",
- },
- "NmAnMaPlGeAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Gen",
- },
- "NmAnMaPlNmAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Nom",
- },
- "NmAnMaPlVoAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Voc",
- },
- "NmAnMaSgAcAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Acc",
- },
- "NmAnMaSgGeAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Gen",
- },
- "NmAnMaSgNmAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Nom",
- },
- "NmAnMaSgVoAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Voc",
- },
- "NmAnNePlAcAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Acc",
- },
- "NmAnNePlGeAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Gen",
- },
- "NmAnNePlNmAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Nom",
- },
- "NmAnNePlVoAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Voc",
- },
- "NmAnNeSgAcAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Acc",
- },
- "NmAnNeSgGeAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Gen",
- },
- "NmAnNeSgNmAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Nom",
- },
- "NmAnNeSgVoAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Voc",
- },
- "NmAnXxXxXxAd": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Masc|Fem|Neut",
- "Number": "Sing|Plur",
- "Case": "Acc|Gen|Nom|Voc",
- },
- "NmCdFePlAcAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Acc",
- },
- "NmCdFePlGeAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Gen",
- },
- "NmCdFePlNmAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Nom",
- },
- "NmCdFePlVoAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Voc",
- },
- "NmCdFeSgAcAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Acc",
- },
- "NmCdFeSgDaAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Dat",
- },
- "NmCdFeSgGeAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Gen",
- },
- "NmCdFeSgNmAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Nom",
- },
- "NmCdMaPlAcAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Acc",
- },
- "NmCdMaPlGeAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Gen",
- },
- "NmCdMaPlNmAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Nom",
- },
- "NmCdMaPlVoAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Voc",
- },
- "NmCdMaSgAcAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Acc",
- },
- "NmCdMaSgGeAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Gen",
- },
- "NmCdMaSgNmAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Nom",
- },
- "NmCdNePlAcAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Acc",
- },
- "NmCdNePlDaAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Dat",
- },
- "NmCdNePlGeAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Gen",
- },
- "NmCdNePlNmAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Nom",
- },
- "NmCdNePlVoAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Voc",
- },
- "NmCdNeSgAcAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Acc",
- },
- "NmCdNeSgGeAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Gen",
- },
- "NmCdNeSgNmAj": {
- POS: NUM,
- "NumType": "Card",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Nom",
- },
- "NmCtFePlAcNo": {
- POS: NUM,
- "NumType": "Sets",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Acc",
- },
- "NmCtFePlGeNo": {
- POS: NUM,
- "NumType": "Sets",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Gen",
- },
- "NmCtFePlNmNo": {
- POS: NUM,
- "NumType": "Sets",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Nom",
- },
- "NmCtFePlVoNo": {
- POS: NUM,
- "NumType": "Sets",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Voc",
- },
- "NmCtFeSgAcNo": {
- POS: NUM,
- "NumType": "Sets",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Acc",
- },
- "NmCtFeSgGeNo": {
- POS: NUM,
- "NumType": "Sets",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Gen",
- },
- "NmCtFeSgNmNo": {
- POS: NUM,
- "NumType": "Sets",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Nom",
- },
- "NmCtFeSgVoNo": {
- POS: NUM,
- "NumType": "Sets",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Voc",
- },
- "NmMlFePlAcAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Acc",
- },
- "NmMlFePlGeAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Gen",
- },
- "NmMlFePlNmAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Nom",
- },
- "NmMlFePlVoAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Voc",
- },
- "NmMlFeSgAcAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Acc",
- },
- "NmMlFeSgGeAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Gen",
- },
- "NmMlFeSgNmAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Nom",
- },
- "NmMlFeSgVoAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Voc",
- },
- "NmMlMaPlAcAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Acc",
- },
- "NmMlMaPlGeAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Gen",
- },
- "NmMlMaPlNmAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Nom",
- },
- "NmMlMaPlVoAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Voc",
- },
- "NmMlMaSgAcAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Acc",
- },
- "NmMlMaSgGeAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Gen",
- },
- "NmMlMaSgNmAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Nom",
- },
- "NmMlMaSgVoAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Voc",
- },
- "NmMlNePlAcAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Acc",
- },
- "NmMlNePlGeAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Gen",
- },
- "NmMlNePlNmAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Nom",
- },
- "NmMlNePlVoAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Voc",
- },
- "NmMlNeSgAcAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Acc",
- },
- "NmMlNeSgGeAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Gen",
- },
- "NmMlNeSgNmAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Nom",
- },
- "NmMlNeSgVoAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Voc",
- },
- "NmMlXxXxXxAd": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Masc|Fem|Neut",
- "Number": "Sing|Plur",
- "Case": "Acc|Gen|Nom|Voc",
- },
- "NmOdFePlAcAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Acc",
- },
- "NmOdFePlGeAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Gen",
- },
- "NmOdFePlNmAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Nom",
- },
- "NmOdFePlVoAj": {
- POS: NUM,
- "NumType": "Mult",
- "Gender": "Fem",
- "Number": "Plur",
- "Case": "Voc",
- },
- "NmOdFeSgAcAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Acc",
- },
- "NmOdFeSgGeAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Gen",
- },
- "NmOdFeSgNmAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Nom",
- },
- "NmOdFeSgVoAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Fem",
- "Number": "Sing",
- "Case": "Voc",
- },
- "NmOdMaPlAcAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Acc",
- },
- "NmOdMaPlGeAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Gen",
- },
- "NmOdMaPlNmAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Nom",
- },
- "NmOdMaPlVoAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Masc",
- "Number": "Plur",
- "Case": "Voc",
- },
- "NmOdMaSgAcAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Acc",
- },
- "NmOdMaSgGeAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Gen",
- },
- "NmOdMaSgNmAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Nom",
- },
- "NmOdMaSgVoAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Masc",
- "Number": "Sing",
- "Case": "Voc",
- },
- "NmOdNePlAcAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Acc",
- },
- "NmOdNePlGeAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Gen",
- },
- "NmOdNePlNmAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Nom",
- },
- "NmOdNePlVoAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Neut",
- "Number": "Plur",
- "Case": "Voc",
- },
- "NmOdNeSgAcAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Acc",
- },
- "NmOdNeSgGeAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Gen",
- },
- "NmOdNeSgNmAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Nom",
- },
- "NmOdNeSgVoAj": {
- POS: NUM,
- "NumType": "Ord",
- "Gender": "Neut",
- "Number": "Sing",
- "Case": "Voc",
- },
- "NoCmFePlAc": {POS: NOUN, "Gender": "Fem", "Number": "Plur", "Case": "Acc"},
- "NoCmFePlDa": {POS: NOUN, "Gender": "Fem", "Number": "Plur", "Case": "Dat"},
- "NoCmFePlGe": {POS: NOUN, "Gender": "Fem", "Number": "Plur", "Case": "Gen"},
- "NoCmFePlNm": {POS: NOUN, "Gender": "Fem", "Number": "Plur", "Case": "Nom"},
- "NoCmFePlVo": {POS: NOUN, "Gender": "Fem", "Number": "Plur", "Case": "Voc"},
- "NoCmFeSgAc": {POS: NOUN, "Gender": "Fem", "Number": "Sing", "Case": "Acc"},
- "NoCmFeSgDa": {POS: NOUN, "Gender": "Fem", "Number": "Sing", "Case": "Dat"},
- "NoCmFeSgGe": {POS: NOUN, "Gender": "Fem", "Number": "Sing", "Case": "Gen"},
- "NoCmFeSgNm": {POS: NOUN, "Gender": "Fem", "Number": "Sing", "Case": "Nom"},
- "NoCmFeSgVo": {POS: NOUN, "Gender": "Fem", "Number": "Sing", "Case": "Voc"},
- "NoCmMaPlAc": {POS: NOUN, "Gender": "Masc", "Number": "Plur", "Case": "Acc"},
- "NoCmMaPlDa": {POS: NOUN, "Gender": "Masc", "Number": "Plur", "Case": "Dat"},
- "NoCmMaPlGe": {POS: NOUN, "Gender": "Masc", "Number": "Plur", "Case": "Gen"},
- "NoCmMaPlNm": {POS: NOUN, "Gender": "Masc", "Number": "Plur", "Case": "Nom"},
- "NoCmMaPlVo": {POS: NOUN, "Gender": "Masc", "Number": "Plur", "Case": "Voc"},
- "NoCmMaSgAc": {POS: NOUN, "Gender": "Masc", "Number": "Sing", "Case": "Acc"},
- "NoCmMaSgDa": {POS: NOUN, "Gender": "Masc", "Number": "Sing", "Case": "Dat"},
- "NoCmMaSgGe": {POS: NOUN, "Gender": "Masc", "Number": "Sing", "Case": "Gen"},
- "NoCmMaSgNm": {POS: NOUN, "Gender": "Masc", "Number": "Sing", "Case": "Nom"},
- "NoCmMaSgVo": {POS: NOUN, "Gender": "Masc", "Number": "Sing", "Case": "Voc"},
- "NoCmNePlAc": {POS: NOUN, "Gender": "Neut", "Number": "Plur", "Case": "Acc"},
- "NoCmNePlDa": {POS: NOUN, "Gender": "Neut", "Number": "Plur", "Case": "Dat"},
- "NoCmNePlGe": {POS: NOUN, "Gender": "Neut", "Number": "Plur", "Case": "Gen"},
- "NoCmNePlNm": {POS: NOUN, "Gender": "Neut", "Number": "Plur", "Case": "Nom"},
- "NoCmNePlVo": {POS: NOUN, "Gender": "Neut", "Number": "Plur", "Case": "Voc"},
- "NoCmNeSgAc": {POS: NOUN, "Gender": "Neut", "Number": "Sing", "Case": "Acc"},
- "NoCmNeSgDa": {POS: NOUN, "Gender": "Neut", "Number": "Sing", "Case": "Dat"},
- "NoCmNeSgGe": {POS: NOUN, "Gender": "Neut", "Number": "Sing", "Case": "Gen"},
- "NoCmNeSgNm": {POS: NOUN, "Gender": "Neut", "Number": "Sing", "Case": "Nom"},
- "NoCmNeSgVo": {POS: NOUN, "Gender": "Neut", "Number": "Sing", "Case": "Voc"},
- "NoPrFePlAc": {POS: PROPN, "Gender": "Fem", "Number": "Plur", "Case": "Acc"},
- "NoPrFePlDa": {POS: PROPN, "Gender": "Fem", "Number": "Plur", "Case": "Dat"},
- "NoPrFePlGe": {POS: PROPN, "Gender": "Fem", "Number": "Plur", "Case": "Gen"},
- "NoPrFePlNm": {POS: PROPN, "Gender": "Fem", "Number": "Plur", "Case": "Nom"},
- "NoPrFePlVo": {POS: PROPN, "Gender": "Fem", "Number": "Plur", "Case": "Voc"},
- "NoPrFeSgAc": {POS: PROPN, "Gender": "Fem", "Number": "Sing", "Case": "Acc"},
- "NoPrFeSgDa": {POS: PROPN, "Gender": "Fem", "Number": "Sing", "Case": "Dat"},
- "NoPrFeSgGe": {POS: PROPN, "Gender": "Fem", "Number": "Sing", "Case": "Gen"},
- "NoPrFeSgNm": {POS: PROPN, "Gender": "Fem", "Number": "Sing", "Case": "Nom"},
- "NoPrFeSgVo": {POS: PROPN, "Gender": "Fem", "Number": "Sing", "Case": "Voc"},
- "NoPrMaPlAc": {POS: PROPN, "Gender": "Masc", "Number": "Plur", "Case": "Acc"},
- "NoPrMaPlGe": {POS: PROPN, "Gender": "Masc", "Number": "Plur", "Case": "Gen"},
- "NoPrMaPlNm": {POS: PROPN, "Gender": "Masc", "Number": "Plur", "Case": "Nom"},
- "NoPrMaPlVo": {POS: PROPN, "Gender": "Masc", "Number": "Plur", "Case": "Voc"},
- "NoPrMaSgAc": {POS: PROPN, "Gender": "Masc", "Number": "Sing", "Case": "Acc"},
- "NoPrMaSgDa": {POS: PROPN, "Gender": "Masc", "Number": "Sing", "Case": "Dat"},
- "NoPrMaSgGe": {POS: PROPN, "Gender": "Masc", "Number": "Sing", "Case": "Gen"},
- "NoPrMaSgNm": {POS: PROPN, "Gender": "Masc", "Number": "Sing", "Case": "Nom"},
- "NoPrMaSgVo": {POS: PROPN, "Gender": "Masc", "Number": "Sing", "Case": "Voc"},
- "NoPrNePlAc": {POS: PROPN, "Gender": "Neut", "Number": "Plur", "Case": "Acc"},
- "NoPrNePlGe": {POS: PROPN, "Gender": "Neut", "Number": "Plur", "Case": "Gen"},
- "NoPrNePlNm": {POS: PROPN, "Gender": "Neut", "Number": "Plur", "Case": "Nom"},
- "NoPrNeSgAc": {POS: PROPN, "Gender": "Neut", "Number": "Sing", "Case": "Acc"},
- "NoPrNeSgGe": {POS: PROPN, "Gender": "Neut", "Number": "Sing", "Case": "Gen"},
- "NoPrNeSgNm": {POS: PROPN, "Gender": "Neut", "Number": "Sing", "Case": "Nom"},
- "OPUNCT": {POS: PUNCT},
- "PnDfFe03PlAcXx": {
- POS: PRON,
- "PronType": "",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnDfFe03SgAcXx": {
- POS: PRON,
- "PronType": "",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnDfMa03PlGeXx": {
- POS: PRON,
- "PronType": "",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnDmFe03PlAcXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnDmFe03PlGeXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnDmFe03PlNmXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnDmFe03SgAcXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnDmFe03SgDaXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Dat",
- },
- "PnDmFe03SgGeXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnDmFe03SgNmXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnDmMa03PlAcXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnDmMa03PlDaXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Dat",
- },
- "PnDmMa03PlGeXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnDmMa03PlNmXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnDmMa03SgAcXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnDmMa03SgGeXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnDmMa03SgNmXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnDmNe03PlAcXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnDmNe03PlDaXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Dat",
- },
- "PnDmNe03PlGeXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnDmNe03PlNmXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnDmNe03SgAcXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnDmNe03SgDaXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Dat",
- },
- "PnDmNe03SgGeXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnDmNe03SgNmXx": {
- POS: PRON,
- "PronType": "Dem",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnIdFe03PlAcXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnIdFe03PlGeXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnIdFe03PlNmXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnIdFe03SgAcXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnIdFe03SgGeXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnIdFe03SgNmXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnIdMa03PlAcXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnIdMa03PlGeXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnIdMa03PlNmXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnIdMa03SgAcXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnIdMa03SgGeXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnIdMa03SgNmXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnIdNe03PlAcXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnIdNe03PlGeXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnIdNe03PlNmXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnIdNe03SgAcXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnIdNe03SgDaXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Dat",
- },
- "PnIdNe03SgGeXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnIdNe03SgNmXx": {
- POS: PRON,
- "PronType": "Ind",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnIrFe03PlAcXx": {
- POS: PRON,
- "PronType": "Int",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnIrFe03PlGeXx": {
- POS: PRON,
- "PronType": "Int",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnIrFe03PlNmXx": {
- POS: PRON,
- "PronType": "Int",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnIrFe03SgAcXx": {
- POS: PRON,
- "PronType": "Int",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnIrFe03SgGeXx": {
- POS: PRON,
- "PronType": "Int",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnIrFe03SgNmXx": {
- POS: PRON,
- "PronType": "Int",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnIrMa03PlAcXx": {
- POS: PRON,
- "PronType": "Int",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnIrMa03PlGeXx": {
- POS: PRON,
- "PronType": "Int",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnIrMa03PlNmXx": {
- POS: PRON,
- "PronType": "Int",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnIrMa03SgAcXx": {
- POS: PRON,
- "PronType": "Int",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnIrMa03SgGeXx": {
- POS: PRON,
- "PronType": "Int",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnIrMa03SgNmXx": {
- POS: PRON,
- "PronType": "Int",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnIrNe03PlAcXx": {
- POS: PRON,
- "PronType": "Int",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnIrNe03PlGeXx": {
- POS: PRON,
- "PronType": "Int",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnIrNe03PlNmXx": {
- POS: PRON,
- "PronType": "Int",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnIrNe03SgAcXx": {
- POS: PRON,
- "PronType": "Int",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnIrNe03SgGeXx": {
- POS: PRON,
- "PronType": "Int",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnIrNe03SgNmXx": {
- POS: PRON,
- "PronType": "Int",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnPeFe01PlAcSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "1",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnPeFe01PlAcWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "1",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnPeFe01PlGeWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "1",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPeFe01PlNmSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "1",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnPeFe01SgAcSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "1",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnPeFe01SgAcWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "1",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnPeFe01SgGeSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "1",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnPeFe01SgGeWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "1",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnPeFe01SgNmSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "1",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnPeFe02PlAcSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "2",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnPeFe02PlAcWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "2",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnPeFe02PlGeSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "2",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPeFe02PlGeWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "2",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPeFe02PlNmSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "2",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnPeFe02SgAcSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "2",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnPeFe02SgAcWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "2",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnPeFe02SgGeWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "2",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnPeFe02SgNmSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "2",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnPeFe03PlAcSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnPeFe03PlAcWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnPeFe03PlGeSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPeFe03PlGeWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPeFe03PlNmSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnPeFe03SgAcSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnPeFe03SgAcWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnPeFe03SgGeSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnPeFe03SgGeWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnPeMa01PlAcSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "1",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnPeMa01PlAcWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "1",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnPeMa01PlDaSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "1",
- "Number": "Plur",
- "Case": "Dat",
- },
- "PnPeMa01PlGeSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "1",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPeMa01PlGeWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "1",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPeMa01PlNmSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "1",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnPeMa01SgAcSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "1",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnPeMa01SgAcWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "1",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnPeMa01SgGeSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "1",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnPeMa01SgGeWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "1",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnPeMa01SgNmSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "1",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnPeMa02PlAcSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "2",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnPeMa02PlAcWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "2",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnPeMa02PlGeWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "2",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPeMa02PlNmSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "2",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnPeMa02PlVoSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "2",
- "Number": "Plur",
- "Case": "Voc",
- },
- "PnPeMa02SgAcSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "2",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnPeMa02SgAcWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "2",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnPeMa02SgGeWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "2",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnPeMa02SgNmSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "2",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnPeMa03PlAcWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnPeMa03PlGeSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPeMa03PlGeWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPeMa03PlNmSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnPeMa03SgAcSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnPeMa03SgAcWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnPeMa03SgGeSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnPeMa03SgGeWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnPeMa03SgNmWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnPeNe03PlAcWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnPeNe03PlGeSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPeNe03PlGeWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPeNe03SgAcSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnPeNe03SgAcWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnPeNe03SgGeSt": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnPeNe03SgGeWe": {
- POS: PRON,
- "PronType": "Prs",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnPoFe01PlGeXx": {
- POS: PRON,
- "Poss": "Yes",
- "Gender": "Fem",
- "Person": "1",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPoFe01SgGeXx": {
- POS: PRON,
- "Poss": "Yes",
- "Gender": "Fem",
- "Person": "1",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnPoFe02PlGeXx": {
- POS: PRON,
- "Poss": "Yes",
- "Gender": "Fem",
- "Person": "2",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPoFe02SgGeXx": {
- POS: PRON,
- "Poss": "Yes",
- "Gender": "Fem",
- "Person": "2",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnPoFe03PlGeXx": {
- POS: PRON,
- "Poss": "Yes",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPoFe03SgGeXx": {
- POS: PRON,
- "Poss": "Yes",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnPoMa01PlGeXx": {
- POS: PRON,
- "Poss": "Yes",
- "Gender": "Masc",
- "Person": "1",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPoMa01SgGeXx": {
- POS: PRON,
- "Poss": "Yes",
- "Gender": "Masc",
- "Person": "1",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnPoMa02PlGeXx": {
- POS: PRON,
- "Poss": "Yes",
- "Gender": "Masc",
- "Person": "2",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPoMa02SgGeXx": {
- POS: PRON,
- "Poss": "Yes",
- "Gender": "Masc",
- "Person": "2",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnPoMa03PlGeXx": {
- POS: PRON,
- "Poss": "Yes",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPoMa03SgGeXx": {
- POS: PRON,
- "Poss": "Yes",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnPoNe03PlGeXx": {
- POS: PRON,
- "Poss": "Yes",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnPoNe03SgGeXx": {
- POS: PRON,
- "Poss": "Yes",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnReFe03PlAcXx": {
- POS: PRON,
- "PronType": "Rel",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnReFe03PlGeXx": {
- POS: PRON,
- "PronType": "Rel",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnReFe03PlNmXx": {
- POS: PRON,
- "PronType": "Rel",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnReFe03SgAcXx": {
- POS: PRON,
- "PronType": "Rel",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnReFe03SgGeXx": {
- POS: PRON,
- "PronType": "Rel",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnReFe03SgNmXx": {
- POS: PRON,
- "PronType": "Rel",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnReMa03PlAcXx": {
- POS: PRON,
- "PronType": "Rel",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnReMa03PlGeXx": {
- POS: PRON,
- "PronType": "Rel",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnReMa03PlNmXx": {
- POS: PRON,
- "PronType": "Rel",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnReMa03SgAcXx": {
- POS: PRON,
- "PronType": "Rel",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnReMa03SgGeXx": {
- POS: PRON,
- "PronType": "Rel",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnReMa03SgNmXx": {
- POS: PRON,
- "PronType": "Rel",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnReNe03PlAcXx": {
- POS: PRON,
- "PronType": "Rel",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnReNe03PlGeXx": {
- POS: PRON,
- "PronType": "Rel",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnReNe03PlNmXx": {
- POS: PRON,
- "PronType": "Rel",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnReNe03SgAcXx": {
- POS: PRON,
- "PronType": "Rel",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnReNe03SgGeXx": {
- POS: PRON,
- "PronType": "Rel",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnReNe03SgNmXx": {
- POS: PRON,
- "PronType": "Rel",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnRiFe03PlAcXx": {
- POS: PRON,
- "PronType": "Ind,Rel",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnRiFe03PlGeXx": {
- POS: PRON,
- "PronType": "Ind,Rel",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnRiFe03PlNmXx": {
- POS: PRON,
- "PronType": "Ind,Rel",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnRiFe03SgAcXx": {
- POS: PRON,
- "PronType": "Ind,Rel",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnRiFe03SgGeXx": {
- POS: PRON,
- "PronType": "Ind,Rel",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnRiFe03SgNmXx": {
- POS: PRON,
- "PronType": "Ind,Rel",
- "Gender": "Fem",
- "Person": "3",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnRiMa03PlAcXx": {
- POS: PRON,
- "PronType": "Ind,Rel",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnRiMa03PlGeXx": {
- POS: PRON,
- "PronType": "Ind,Rel",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnRiMa03PlNmXx": {
- POS: PRON,
- "PronType": "Ind,Rel",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnRiMa03SgAcXx": {
- POS: PRON,
- "PronType": "Ind,Rel",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnRiMa03SgGeXx": {
- POS: PRON,
- "PronType": "Ind,Rel",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnRiMa03SgNmXx": {
- POS: PRON,
- "PronType": "Ind,Rel",
- "Gender": "Masc",
- "Person": "3",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PnRiNe03PlAcXx": {
- POS: PRON,
- "PronType": "Ind,Rel",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Acc",
- },
- "PnRiNe03PlGeXx": {
- POS: PRON,
- "PronType": "Ind,Rel",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Gen",
- },
- "PnRiNe03PlNmXx": {
- POS: PRON,
- "PronType": "Ind,Rel",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Plur",
- "Case": "Nom",
- },
- "PnRiNe03SgAcXx": {
- POS: PRON,
- "PronType": "Ind,Rel",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Acc",
- },
- "PnRiNe03SgGeXx": {
- POS: PRON,
- "PronType": "Ind,Rel",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Gen",
- },
- "PnRiNe03SgNmXx": {
- POS: PRON,
- "PronType": "Ind,Rel",
- "Gender": "Neut",
- "Person": "3",
- "Number": "Sing",
- "Case": "Nom",
- },
- "PTERM_P": {POS: PUNCT},
- "PtFu": {POS: PART},
- "PtNg": {POS: PART},
- "PtOt": {POS: PART},
- "PtSj": {POS: PART},
- "Pu": {POS: SYM},
- "PUNCT": {POS: PUNCT},
- "RgAbXx": {POS: X},
- "RgAnXx": {POS: X},
- "RgFwOr": {POS: X, "Foreign": "Yes"},
- "RgFwTr": {POS: X, "Foreign": "Yes"},
- "RgSyXx": {POS: SYM},
- "VbIsIdPa03SgXxIpAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "3",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbIsIdPa03SgXxIpPvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "3",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbIsIdPa03SgXxPeAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "3",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbIsIdPa03SgXxPePvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "3",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbIsIdPr03SgXxIpAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres",
- "Person": "3",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbIsIdPr03SgXxIpPvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres",
- "Person": "3",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbIsIdXx03SgXxPeAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres|Past",
- "Person": "3",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbIsIdXx03SgXxPePvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres|Past",
- "Person": "3",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbIsNfXxXxXxXxPeAvXx": {
- POS: VERB,
- "VerbForm": "Inf",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Sing|Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa01PlXxIpAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "1",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa01PlXxIpPvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "1",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa01PlXxPeAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "1",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa01PlXxPePvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "1",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa01SgXxIpAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "1",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa01SgXxIpPvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "1",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa01SgXxPeAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "1|2|3",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa01SgXxPePvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "1",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa02PlXxIpAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "2",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa02PlXxIpPvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "2",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa02PlXxPeAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "2",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa02PlXxPePvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "2",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa02SgXxIpAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "2",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa02SgXxIpPvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "2",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa02SgXxPeAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "2",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa02SgXxPePvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "2",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa03PlXxIpAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "3",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa03PlXxIpPvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "3",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa03PlXxPeAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "3",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa03PlXxPePvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "3",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa03SgXxIpAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "3",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa03SgXxIpPvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "3",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa03SgXxPeAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "3",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPa03SgXxPePvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Past",
- "Person": "3",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPr01PlXxIpAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres",
- "Person": "1",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPr01PlXxIpPvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres",
- "Person": "1",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPr01SgXxIpAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres",
- "Person": "1",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPr01SgXxIpPvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres",
- "Person": "1",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPr02PlXxIpAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres",
- "Person": "2",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPr02PlXxIpPvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres",
- "Person": "2",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPr02SgXxIpAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres",
- "Person": "2",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPr02SgXxIpPvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres",
- "Person": "2",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPr03PlXxIpAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres",
- "Person": "3",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPr03PlXxIpPvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres",
- "Person": "3",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPr03SgXxIpAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres",
- "Person": "3",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdPr03SgXxIpPvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres",
- "Person": "3",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdXx01PlXxPeAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres|Past",
- "Person": "1",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdXx01PlXxPePvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres|Past",
- "Person": "1",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdXx01SgXxPeAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres|Past",
- "Person": "1",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdXx01SgXxPePvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres|Past",
- "Person": "1",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdXx02PlXxPeAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres|Past",
- "Person": "2",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdXx02PlXxPePvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres|Past",
- "Person": "2",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdXx02SgXxPeAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres|Past",
- "Person": "2",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdXx02SgXxPePvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres|Past",
- "Person": "2",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdXx03PlXxPeAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres|Past",
- "Person": "3",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdXx03PlXxPePvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres|Past",
- "Person": "3",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdXx03SgXxPeAvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres|Past",
- "Person": "3",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnIdXx03SgXxPePvXx": {
- POS: VERB,
- "VerbForm": "Fin",
- "Mood": "Ind",
- "Tense": "Pres|Past",
- "Person": "3",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnMpXx02PlXxIpAvXx": {
- POS: VERB,
- "VerbForm": "",
- "Mood": "Imp",
- "Tense": "Pres|Past",
- "Person": "2",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnMpXx02PlXxIpPvXx": {
- POS: VERB,
- "VerbForm": "",
- "Mood": "Imp",
- "Tense": "Pres|Past",
- "Person": "2",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnMpXx02PlXxPeAvXx": {
- POS: VERB,
- "VerbForm": "",
- "Mood": "Imp",
- "Tense": "Pres|Past",
- "Person": "2",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnMpXx02PlXxPePvXx": {
- POS: VERB,
- "VerbForm": "",
- "Mood": "Imp",
- "Tense": "Pres|Past",
- "Person": "2",
- "Number": "Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnMpXx02SgXxIpAvXx": {
- POS: VERB,
- "VerbForm": "",
- "Mood": "Imp",
- "Tense": "Pres|Past",
- "Person": "2",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnMpXx02SgXxIpPvXx": {
- POS: VERB,
- "VerbForm": "",
- "Mood": "Imp",
- "Tense": "Pres|Past",
- "Person": "2",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnMpXx02SgXxPeAvXx": {
- POS: VERB,
- "VerbForm": "",
- "Mood": "Imp",
- "Tense": "Pres|Past",
- "Person": "2",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnMpXx02SgXxPePvXx": {
- POS: VERB,
- "VerbForm": "",
- "Mood": "Imp",
- "Tense": "Pres|Past",
- "Person": "2",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnMpXx03SgXxIpPvXx": {
- POS: VERB,
- "VerbForm": "",
- "Mood": "Imp",
- "Tense": "Pres|Past",
- "Person": "3",
- "Number": "Sing",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnNfXxXxXxXxPeAvXx": {
- POS: VERB,
- "VerbForm": "Inf",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Sing|Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnNfXxXxXxXxPePvXx": {
- POS: VERB,
- "VerbForm": "Inf",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Sing|Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnPpPrXxXxXxIpAvXx": {
- POS: VERB,
- "VerbForm": "Conv",
- "Mood": "",
- "Tense": "Pres",
- "Person": "1|2|3",
- "Number": "Sing|Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "VbMnPpXxXxPlFePePvAc": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Plur",
- "Gender": "Fem",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Acc",
- },
- "VbMnPpXxXxPlFePePvGe": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Plur",
- "Gender": "Fem",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Gen",
- },
- "VbMnPpXxXxPlFePePvNm": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Plur",
- "Gender": "Fem",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom",
- },
- "VbMnPpXxXxPlFePePvVo": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Plur",
- "Gender": "Fem",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Voc",
- },
- "VbMnPpXxXxPlMaPePvAc": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Plur",
- "Gender": "Masc",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Acc",
- },
- "VbMnPpXxXxPlMaPePvGe": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Plur",
- "Gender": "Masc",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Gen",
- },
- "VbMnPpXxXxPlMaPePvNm": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Plur",
- "Gender": "Masc",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom",
- },
- "VbMnPpXxXxPlMaPePvVo": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Plur",
- "Gender": "Masc",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Voc",
- },
- "VbMnPpXxXxPlNePePvAc": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Plur",
- "Gender": "Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Acc",
- },
- "VbMnPpXxXxPlNePePvGe": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Plur",
- "Gender": "Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Gen",
- },
- "VbMnPpXxXxPlNePePvNm": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Plur",
- "Gender": "Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom",
- },
- "VbMnPpXxXxPlNePePvVo": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Plur",
- "Gender": "Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Voc",
- },
- "VbMnPpXxXxSgFePePvAc": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Sing",
- "Gender": "Fem",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Acc",
- },
- "VbMnPpXxXxSgFePePvGe": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Sing",
- "Gender": "Fem",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Gen",
- },
- "VbMnPpXxXxSgFePePvNm": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Sing",
- "Gender": "Fem",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom",
- },
- "VbMnPpXxXxSgFePePvVo": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Sing",
- "Gender": "Fem",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Voc",
- },
- "VbMnPpXxXxSgMaPePvAc": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Sing",
- "Gender": "Masc",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Acc",
- },
- "VbMnPpXxXxSgMaPePvGe": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Sing",
- "Gender": "Masc",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Gen",
- },
- "VbMnPpXxXxSgMaPePvNm": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Sing",
- "Gender": "Masc",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom",
- },
- "VbMnPpXxXxSgMaPePvVo": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Sing",
- "Gender": "Masc",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Voc",
- },
- "VbMnPpXxXxSgNePePvAc": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Sing",
- "Gender": "Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Acc",
- },
- "VbMnPpXxXxSgNePePvGe": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Sing",
- "Gender": "Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Gen",
- },
- "VbMnPpXxXxSgNePePvNm": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Sing",
- "Gender": "Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Nom",
- },
- "VbMnPpXxXxSgNePePvVo": {
- POS: VERB,
- "VerbForm": "Part",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Sing",
- "Gender": "Neut",
- "Aspect": "Perf",
- "Voice": "Pass",
- "Case": "Voc",
- },
- "VbMnPpXxXxXxXxIpAvXx": {
- POS: VERB,
- "VerbForm": "Conv",
- "Mood": "",
- "Tense": "Pres|Past",
- "Person": "1|2|3",
- "Number": "Sing|Plur",
- "Gender": "Masc|Fem|Neut",
- "Aspect": "Imp",
- "Voice": "Act",
- "Case": "Nom|Gen|Dat|Acc|Voc",
- },
- "ADJ": {POS: ADJ},
- "ADP": {POS: ADP},
- "ADV": {POS: ADV},
- "AtDf": {POS: DET},
- "AUX": {POS: AUX},
- "CCONJ": {POS: CCONJ},
- "DET": {POS: DET},
- "NOUN": {POS: NOUN},
- "NUM": {POS: NUM},
- "PART": {POS: PART},
- "PRON": {POS: PRON},
- "PROPN": {POS: PROPN},
- "SCONJ": {POS: SCONJ},
- "SYM": {POS: SYM},
- "VERB": {POS: VERB},
- "X": {POS: X},
-}
diff --git a/spacy/lang/el/tokenizer_exceptions.py b/spacy/lang/el/tokenizer_exceptions.py
index a3c36542e..0a36d5d2b 100644
--- a/spacy/lang/el/tokenizer_exceptions.py
+++ b/spacy/lang/el/tokenizer_exceptions.py
@@ -1,132 +1,128 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import ORTH, LEMMA, NORM
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import ORTH, NORM
+from ...util import update_exc
_exc = {}
for token in ["Απ'", "ΑΠ'", "αφ'", "Αφ'"]:
- _exc[token] = [{ORTH: token, LEMMA: "από", NORM: "από"}]
+ _exc[token] = [{ORTH: token, NORM: "από"}]
for token in ["Αλλ'", "αλλ'"]:
- _exc[token] = [{ORTH: token, LEMMA: "αλλά", NORM: "αλλά"}]
+ _exc[token] = [{ORTH: token, NORM: "αλλά"}]
for token in ["παρ'", "Παρ'", "ΠΑΡ'"]:
- _exc[token] = [{ORTH: token, LEMMA: "παρά", NORM: "παρά"}]
+ _exc[token] = [{ORTH: token, NORM: "παρά"}]
for token in ["καθ'", "Καθ'"]:
- _exc[token] = [{ORTH: token, LEMMA: "κάθε", NORM: "κάθε"}]
+ _exc[token] = [{ORTH: token, NORM: "κάθε"}]
for token in ["κατ'", "Κατ'"]:
- _exc[token] = [{ORTH: token, LEMMA: "κατά", NORM: "κατά"}]
+ _exc[token] = [{ORTH: token, NORM: "κατά"}]
for token in ["'ΣΟΥΝ", "'ναι", "'ταν", "'τανε", "'μαστε", "'μουνα", "'μουν"]:
- _exc[token] = [{ORTH: token, LEMMA: "είμαι", NORM: "είμαι"}]
+ _exc[token] = [{ORTH: token, NORM: "είμαι"}]
for token in ["Επ'", "επ'", "εφ'", "Εφ'"]:
- _exc[token] = [{ORTH: token, LEMMA: "επί", NORM: "επί"}]
+ _exc[token] = [{ORTH: token, NORM: "επί"}]
for token in ["Δι'", "δι'"]:
- _exc[token] = [{ORTH: token, LEMMA: "δια", NORM: "δια"}]
+ _exc[token] = [{ORTH: token, NORM: "δια"}]
for token in ["'χουν", "'χουμε", "'χαμε", "'χα", "'χε", "'χεις", "'χει"]:
- _exc[token] = [{ORTH: token, LEMMA: "έχω", NORM: "έχω"}]
+ _exc[token] = [{ORTH: token, NORM: "έχω"}]
for token in ["υπ'", "Υπ'"]:
- _exc[token] = [{ORTH: token, LEMMA: "υπό", NORM: "υπό"}]
+ _exc[token] = [{ORTH: token, NORM: "υπό"}]
for token in ["Μετ'", "ΜΕΤ'", "'μετ"]:
- _exc[token] = [{ORTH: token, LEMMA: "μετά", NORM: "μετά"}]
+ _exc[token] = [{ORTH: token, NORM: "μετά"}]
for token in ["Μ'", "μ'"]:
- _exc[token] = [{ORTH: token, LEMMA: "με", NORM: "με"}]
+ _exc[token] = [{ORTH: token, NORM: "με"}]
for token in ["Γι'", "ΓΙ'", "γι'"]:
- _exc[token] = [{ORTH: token, LEMMA: "για", NORM: "για"}]
+ _exc[token] = [{ORTH: token, NORM: "για"}]
for token in ["Σ'", "σ'"]:
- _exc[token] = [{ORTH: token, LEMMA: "σε", NORM: "σε"}]
+ _exc[token] = [{ORTH: token, NORM: "σε"}]
for token in ["Θ'", "θ'"]:
- _exc[token] = [{ORTH: token, LEMMA: "θα", NORM: "θα"}]
+ _exc[token] = [{ORTH: token, NORM: "θα"}]
for token in ["Ν'", "ν'"]:
- _exc[token] = [{ORTH: token, LEMMA: "να", NORM: "να"}]
+ _exc[token] = [{ORTH: token, NORM: "να"}]
for token in ["Τ'", "τ'"]:
- _exc[token] = [{ORTH: token, LEMMA: "να", NORM: "να"}]
+ _exc[token] = [{ORTH: token, NORM: "να"}]
for token in ["'γω", "'σένα", "'μεις"]:
- _exc[token] = [{ORTH: token, LEMMA: "εγώ", NORM: "εγώ"}]
+ _exc[token] = [{ORTH: token, NORM: "εγώ"}]
for token in ["Τ'", "τ'"]:
- _exc[token] = [{ORTH: token, LEMMA: "το", NORM: "το"}]
+ _exc[token] = [{ORTH: token, NORM: "το"}]
for token in ["Φέρ'", "Φερ'", "φέρ'", "φερ'"]:
- _exc[token] = [{ORTH: token, LEMMA: "φέρνω", NORM: "φέρνω"}]
+ _exc[token] = [{ORTH: token, NORM: "φέρνω"}]
for token in ["'ρθούνε", "'ρθουν", "'ρθει", "'ρθεί", "'ρθε", "'ρχεται"]:
- _exc[token] = [{ORTH: token, LEMMA: "έρχομαι", NORM: "έρχομαι"}]
+ _exc[token] = [{ORTH: token, NORM: "έρχομαι"}]
for token in ["'πανε", "'λεγε", "'λεγαν", "'πε", "'λεγα"]:
- _exc[token] = [{ORTH: token, LEMMA: "λέγω", NORM: "λέγω"}]
+ _exc[token] = [{ORTH: token, NORM: "λέγω"}]
for token in ["Πάρ'", "πάρ'"]:
- _exc[token] = [{ORTH: token, LEMMA: "παίρνω", NORM: "παίρνω"}]
+ _exc[token] = [{ORTH: token, NORM: "παίρνω"}]
for token in ["μέσ'", "Μέσ'", "μεσ'"]:
- _exc[token] = [{ORTH: token, LEMMA: "μέσα", NORM: "μέσα"}]
+ _exc[token] = [{ORTH: token, NORM: "μέσα"}]
for token in ["Δέσ'", "Δεσ'", "δεσ'"]:
- _exc[token] = [{ORTH: token, LEMMA: "δένω", NORM: "δένω"}]
+ _exc[token] = [{ORTH: token, NORM: "δένω"}]
for token in ["'κανε", "Κάν'"]:
- _exc[token] = [{ORTH: token, LEMMA: "κάνω", NORM: "κάνω"}]
+ _exc[token] = [{ORTH: token, NORM: "κάνω"}]
_other_exc = {
- "κι": [{ORTH: "κι", LEMMA: "και", NORM: "και"}],
- "Παίξ'": [{ORTH: "Παίξ'", LEMMA: "παίζω", NORM: "παίζω"}],
- "Αντ'": [{ORTH: "Αντ'", LEMMA: "αντί", NORM: "αντί"}],
- "ολ'": [{ORTH: "ολ'", LEMMA: "όλος", NORM: "όλος"}],
- "ύστερ'": [{ORTH: "ύστερ'", LEMMA: "ύστερα", NORM: "ύστερα"}],
- "'πρεπε": [{ORTH: "'πρεπε", LEMMA: "πρέπει", NORM: "πρέπει"}],
- "Δύσκολ'": [{ORTH: "Δύσκολ'", LEMMA: "δύσκολος", NORM: "δύσκολος"}],
- "'θελα": [{ORTH: "'θελα", LEMMA: "θέλω", NORM: "θέλω"}],
- "'γραφα": [{ORTH: "'γραφα", LEMMA: "γράφω", NORM: "γράφω"}],
- "'παιρνα": [{ORTH: "'παιρνα", LEMMA: "παίρνω", NORM: "παίρνω"}],
- "'δειξε": [{ORTH: "'δειξε", LEMMA: "δείχνω", NORM: "δείχνω"}],
- "όμουρφ'": [{ORTH: "όμουρφ'", LEMMA: "όμορφος", NORM: "όμορφος"}],
- "κ'τσή": [{ORTH: "κ'τσή", LEMMA: "κουτσός", NORM: "κουτσός"}],
- "μηδ'": [{ORTH: "μηδ'", LEMMA: "μήδε", NORM: "μήδε"}],
- "'ξομολογήθηκε": [
- {ORTH: "'ξομολογήθηκε", LEMMA: "εξομολογούμαι", NORM: "εξομολογούμαι"}
- ],
- "'μας": [{ORTH: "'μας", LEMMA: "εμάς", NORM: "εμάς"}],
- "'ξερες": [{ORTH: "'ξερες", LEMMA: "ξέρω", NORM: "ξέρω"}],
- "έφθασ'": [{ORTH: "έφθασ'", LEMMA: "φθάνω", NORM: "φθάνω"}],
- "εξ'": [{ORTH: "εξ'", LEMMA: "εκ", NORM: "εκ"}],
- "δώσ'": [{ORTH: "δώσ'", LEMMA: "δίνω", NORM: "δίνω"}],
- "τίποτ'": [{ORTH: "τίποτ'", LEMMA: "τίποτα", NORM: "τίποτα"}],
- "Λήξ'": [{ORTH: "Λήξ'", LEMMA: "λήγω", NORM: "λήγω"}],
- "άσ'": [{ORTH: "άσ'", LEMMA: "αφήνω", NORM: "αφήνω"}],
- "Στ'": [{ORTH: "Στ'", LEMMA: "στο", NORM: "στο"}],
- "Δωσ'": [{ORTH: "Δωσ'", LEMMA: "δίνω", NORM: "δίνω"}],
- "Βάψ'": [{ORTH: "Βάψ'", LEMMA: "βάφω", NORM: "βάφω"}],
- "Αλλ'": [{ORTH: "Αλλ'", LEMMA: "αλλά", NORM: "αλλά"}],
- "Αμ'": [{ORTH: "Αμ'", LEMMA: "άμα", NORM: "άμα"}],
- "Αγόρασ'": [{ORTH: "Αγόρασ'", LEMMA: "αγοράζω", NORM: "αγοράζω"}],
- "'φύγε": [{ORTH: "'φύγε", LEMMA: "φεύγω", NORM: "φεύγω"}],
- "'φερε": [{ORTH: "'φερε", LEMMA: "φέρνω", NORM: "φέρνω"}],
- "'φαγε": [{ORTH: "'φαγε", LEMMA: "τρώω", NORM: "τρώω"}],
- "'σπαγαν": [{ORTH: "'σπαγαν", LEMMA: "σπάω", NORM: "σπάω"}],
- "'σκασε": [{ORTH: "'σκασε", LEMMA: "σκάω", NORM: "σκάω"}],
- "'σβηνε": [{ORTH: "'σβηνε", LEMMA: "σβήνω", NORM: "σβήνω"}],
- "'ριξε": [{ORTH: "'ριξε", LEMMA: "ρίχνω", NORM: "ρίχνω"}],
- "'κλεβε": [{ORTH: "'κλεβε", LEMMA: "κλέβω", NORM: "κλέβω"}],
- "'κει": [{ORTH: "'κει", LEMMA: "εκεί", NORM: "εκεί"}],
- "'βλεπε": [{ORTH: "'βλεπε", LEMMA: "βλέπω", NORM: "βλέπω"}],
- "'βγαινε": [{ORTH: "'βγαινε", LEMMA: "βγαίνω", NORM: "βγαίνω"}],
+ "κι": [{ORTH: "κι", NORM: "και"}],
+ "Παίξ'": [{ORTH: "Παίξ'", NORM: "παίζω"}],
+ "Αντ'": [{ORTH: "Αντ'", NORM: "αντί"}],
+ "ολ'": [{ORTH: "ολ'", NORM: "όλος"}],
+ "ύστερ'": [{ORTH: "ύστερ'", NORM: "ύστερα"}],
+ "'πρεπε": [{ORTH: "'πρεπε", NORM: "πρέπει"}],
+ "Δύσκολ'": [{ORTH: "Δύσκολ'", NORM: "δύσκολος"}],
+ "'θελα": [{ORTH: "'θελα", NORM: "θέλω"}],
+ "'γραφα": [{ORTH: "'γραφα", NORM: "γράφω"}],
+ "'παιρνα": [{ORTH: "'παιρνα", NORM: "παίρνω"}],
+ "'δειξε": [{ORTH: "'δειξε", NORM: "δείχνω"}],
+ "όμουρφ'": [{ORTH: "όμουρφ'", NORM: "όμορφος"}],
+ "κ'τσή": [{ORTH: "κ'τσή", NORM: "κουτσός"}],
+ "μηδ'": [{ORTH: "μηδ'", NORM: "μήδε"}],
+ "'ξομολογήθηκε": [{ORTH: "'ξομολογήθηκε", NORM: "εξομολογούμαι"}],
+ "'μας": [{ORTH: "'μας", NORM: "εμάς"}],
+ "'ξερες": [{ORTH: "'ξερες", NORM: "ξέρω"}],
+ "έφθασ'": [{ORTH: "έφθασ'", NORM: "φθάνω"}],
+ "εξ'": [{ORTH: "εξ'", NORM: "εκ"}],
+ "δώσ'": [{ORTH: "δώσ'", NORM: "δίνω"}],
+ "τίποτ'": [{ORTH: "τίποτ'", NORM: "τίποτα"}],
+ "Λήξ'": [{ORTH: "Λήξ'", NORM: "λήγω"}],
+ "άσ'": [{ORTH: "άσ'", NORM: "αφήνω"}],
+ "Στ'": [{ORTH: "Στ'", NORM: "στο"}],
+ "Δωσ'": [{ORTH: "Δωσ'", NORM: "δίνω"}],
+ "Βάψ'": [{ORTH: "Βάψ'", NORM: "βάφω"}],
+ "Αλλ'": [{ORTH: "Αλλ'", NORM: "αλλά"}],
+ "Αμ'": [{ORTH: "Αμ'", NORM: "άμα"}],
+ "Αγόρασ'": [{ORTH: "Αγόρασ'", NORM: "αγοράζω"}],
+ "'φύγε": [{ORTH: "'φύγε", NORM: "φεύγω"}],
+ "'φερε": [{ORTH: "'φερε", NORM: "φέρνω"}],
+ "'φαγε": [{ORTH: "'φαγε", NORM: "τρώω"}],
+ "'σπαγαν": [{ORTH: "'σπαγαν", NORM: "σπάω"}],
+ "'σκασε": [{ORTH: "'σκασε", NORM: "σκάω"}],
+ "'σβηνε": [{ORTH: "'σβηνε", NORM: "σβήνω"}],
+ "'ριξε": [{ORTH: "'ριξε", NORM: "ρίχνω"}],
+ "'κλεβε": [{ORTH: "'κλεβε", NORM: "κλέβω"}],
+ "'κει": [{ORTH: "'κει", NORM: "εκεί"}],
+ "'βλεπε": [{ORTH: "'βλεπε", NORM: "βλέπω"}],
+ "'βγαινε": [{ORTH: "'βγαινε", NORM: "βγαίνω"}],
}
_exc.update(_other_exc)
@@ -134,37 +130,37 @@ _exc.update(_other_exc)
for h in range(1, 12 + 1):
for period in ["π.μ.", "πμ"]:
- _exc["%d%s" % (h, period)] = [
- {ORTH: "%d" % h},
- {ORTH: period, LEMMA: "π.μ.", NORM: "π.μ."},
+ _exc[f"{h}{period}"] = [
+ {ORTH: f"{h}"},
+ {ORTH: period, NORM: "π.μ."},
]
for period in ["μ.μ.", "μμ"]:
- _exc["%d%s" % (h, period)] = [
- {ORTH: "%d" % h},
- {ORTH: period, LEMMA: "μ.μ.", NORM: "μ.μ."},
+ _exc[f"{h}{period}"] = [
+ {ORTH: f"{h}"},
+ {ORTH: period, NORM: "μ.μ."},
]
for exc_data in [
- {ORTH: "ΑΓΡ.", LEMMA: "Αγροτικός", NORM: "Αγροτικός"},
- {ORTH: "Αγ. Γρ.", LEMMA: "Αγία Γραφή", NORM: "Αγία Γραφή"},
- {ORTH: "Αθ.", LEMMA: "Αθανάσιος", NORM: "Αθανάσιος"},
- {ORTH: "Αλεξ.", LEMMA: "Αλέξανδρος", NORM: "Αλέξανδρος"},
- {ORTH: "Απρ.", LEMMA: "Απρίλιος", NORM: "Απρίλιος"},
- {ORTH: "Αύγ.", LEMMA: "Αύγουστος", NORM: "Αύγουστος"},
- {ORTH: "Δεκ.", LEMMA: "Δεκέμβριος", NORM: "Δεκέμβριος"},
- {ORTH: "Δημ.", LEMMA: "Δήμος", NORM: "Δήμος"},
- {ORTH: "Ιαν.", LEMMA: "Ιανουάριος", NORM: "Ιανουάριος"},
- {ORTH: "Ιούλ.", LEMMA: "Ιούλιος", NORM: "Ιούλιος"},
- {ORTH: "Ιούν.", LEMMA: "Ιούνιος", NORM: "Ιούνιος"},
- {ORTH: "Ιωαν.", LEMMA: "Ιωάννης", NORM: "Ιωάννης"},
- {ORTH: "Μ. Ασία", LEMMA: "Μικρά Ασία", NORM: "Μικρά Ασία"},
- {ORTH: "Μάρτ.", LEMMA: "Μάρτιος", NORM: "Μάρτιος"},
- {ORTH: "Μάρτ'", LEMMA: "Μάρτιος", NORM: "Μάρτιος"},
- {ORTH: "Νοέμβρ.", LEMMA: "Νοέμβριος", NORM: "Νοέμβριος"},
- {ORTH: "Οκτ.", LEMMA: "Οκτώβριος", NORM: "Οκτώβριος"},
- {ORTH: "Σεπτ.", LEMMA: "Σεπτέμβριος", NORM: "Σεπτέμβριος"},
- {ORTH: "Φεβρ.", LEMMA: "Φεβρουάριος", NORM: "Φεβρουάριος"},
+ {ORTH: "ΑΓΡ.", NORM: "Αγροτικός"},
+ {ORTH: "Αγ. Γρ.", NORM: "Αγία Γραφή"},
+ {ORTH: "Αθ.", NORM: "Αθανάσιος"},
+ {ORTH: "Αλεξ.", NORM: "Αλέξανδρος"},
+ {ORTH: "Απρ.", NORM: "Απρίλιος"},
+ {ORTH: "Αύγ.", NORM: "Αύγουστος"},
+ {ORTH: "Δεκ.", NORM: "Δεκέμβριος"},
+ {ORTH: "Δημ.", NORM: "Δήμος"},
+ {ORTH: "Ιαν.", NORM: "Ιανουάριος"},
+ {ORTH: "Ιούλ.", NORM: "Ιούλιος"},
+ {ORTH: "Ιούν.", NORM: "Ιούνιος"},
+ {ORTH: "Ιωαν.", NORM: "Ιωάννης"},
+ {ORTH: "Μ. Ασία", NORM: "Μικρά Ασία"},
+ {ORTH: "Μάρτ.", NORM: "Μάρτιος"},
+ {ORTH: "Μάρτ'", NORM: "Μάρτιος"},
+ {ORTH: "Νοέμβρ.", NORM: "Νοέμβριος"},
+ {ORTH: "Οκτ.", NORM: "Οκτώβριος"},
+ {ORTH: "Σεπτ.", NORM: "Σεπτέμβριος"},
+ {ORTH: "Φεβρ.", NORM: "Φεβρουάριος"},
]:
_exc[exc_data[ORTH]] = [exc_data]
@@ -395,4 +391,4 @@ for orth in [
]:
_exc[orth] = [{ORTH: orth}]
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
diff --git a/spacy/lang/en/__init__.py b/spacy/lang/en/__init__.py
index f58ae4a4e..1a595b6e7 100644
--- a/spacy/lang/en/__init__.py
+++ b/spacy/lang/en/__init__.py
@@ -1,75 +1,23 @@
-# coding: utf8
-from __future__ import unicode_literals
+from typing import Optional
+
+from thinc.api import Model
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .tag_map import TAG_MAP
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
-from .morph_rules import MORPH_RULES
from .syntax_iterators import SYNTAX_ITERATORS
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from .punctuation import TOKENIZER_INFIXES
+from .lemmatizer import EnglishLemmatizer
from ...language import Language
-from ...attrs import LANG
-from ...util import update_exc
-
-
-def _return_en(_):
- return "en"
+from ...lookups import Lookups
class EnglishDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters.update(LEX_ATTRS)
- lex_attr_getters[LANG] = _return_en
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
- tag_map = TAG_MAP
- stop_words = STOP_WORDS
- morph_rules = MORPH_RULES
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
+ infixes = TOKENIZER_INFIXES
+ lex_attr_getters = LEX_ATTRS
syntax_iterators = SYNTAX_ITERATORS
- single_orth_variants = [
- {"tags": ["NFP"], "variants": ["…", "..."]},
- {"tags": [":"], "variants": ["-", "—", "–", "--", "---", "——"]},
- ]
- paired_orth_variants = [
- {"tags": ["``", "''"], "variants": [("'", "'"), ("‘", "’")]},
- {"tags": ["``", "''"], "variants": [('"', '"'), ("“", "”")]},
- ]
-
- @classmethod
- def is_base_form(cls, univ_pos, morphology=None):
- """
- Check whether we're dealing with an uninflected paradigm, so we can
- avoid lemmatization entirely.
-
- univ_pos (unicode / int): The token's universal part-of-speech tag.
- morphology (dict): The token's morphological features following the
- Universal Dependencies scheme.
- """
- if morphology is None:
- morphology = {}
- if univ_pos == "noun" and morphology.get("Number") == "sing":
- return True
- elif univ_pos == "verb" and morphology.get("VerbForm") == "inf":
- return True
- # This maps 'VBP' to base form -- probably just need 'IS_BASE'
- # morphology
- elif univ_pos == "verb" and (
- morphology.get("VerbForm") == "fin"
- and morphology.get("Tense") == "pres"
- and morphology.get("Number") is None
- ):
- return True
- elif univ_pos == "adj" and morphology.get("Degree") == "pos":
- return True
- elif morphology.get("VerbForm") == "inf":
- return True
- elif morphology.get("VerbForm") == "none":
- return True
- elif morphology.get("Degree") == "pos":
- return True
- else:
- return False
+ stop_words = STOP_WORDS
class English(Language):
@@ -77,4 +25,22 @@ class English(Language):
Defaults = EnglishDefaults
+@English.factory(
+ "lemmatizer",
+ assigns=["token.lemma"],
+ default_config={"model": None, "mode": "rule", "lookups": None},
+ scores=["lemma_acc"],
+ default_score_weights={"lemma_acc": 1.0},
+)
+def make_lemmatizer(
+ nlp: Language,
+ model: Optional[Model],
+ name: str,
+ mode: str,
+ lookups: Optional[Lookups],
+):
+ lookups = EnglishLemmatizer.load_lookups(nlp.lang, mode, lookups)
+ return EnglishLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
+
+
__all__ = ["English"]
diff --git a/spacy/lang/en/examples.py b/spacy/lang/en/examples.py
index 946289c7c..2cca9e05f 100644
--- a/spacy/lang/en/examples.py
+++ b/spacy/lang/en/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/en/lemmatizer.py b/spacy/lang/en/lemmatizer.py
new file mode 100644
index 000000000..be389f117
--- /dev/null
+++ b/spacy/lang/en/lemmatizer.py
@@ -0,0 +1,41 @@
+from ...pipeline import Lemmatizer
+from ...tokens import Token
+
+
+class EnglishLemmatizer(Lemmatizer):
+ """English lemmatizer. Only overrides is_base_form.
+ """
+
+ def is_base_form(self, token: Token) -> bool:
+ """
+ Check whether we're dealing with an uninflected paradigm, so we can
+ avoid lemmatization entirely.
+
+ univ_pos (unicode / int): The token's universal part-of-speech tag.
+ morphology (dict): The token's morphological features following the
+ Universal Dependencies scheme.
+ """
+ univ_pos = token.pos_.lower()
+ morphology = token.morph.to_dict()
+ if univ_pos == "noun" and morphology.get("Number") == "Sing":
+ return True
+ elif univ_pos == "verb" and morphology.get("VerbForm") == "Inf":
+ return True
+ # This maps 'VBP' to base form -- probably just need 'IS_BASE'
+ # morphology
+ elif univ_pos == "verb" and (
+ morphology.get("VerbForm") == "Fin"
+ and morphology.get("Tense") == "Pres"
+ and morphology.get("Number") is None
+ ):
+ return True
+ elif univ_pos == "adj" and morphology.get("Degree") == "Pos":
+ return True
+ elif morphology.get("VerbForm") == "Inf":
+ return True
+ elif morphology.get("VerbForm") == "None":
+ return True
+ elif morphology.get("Degree") == "Pos":
+ return True
+ else:
+ return False
diff --git a/spacy/lang/en/lex_attrs.py b/spacy/lang/en/lex_attrs.py
index 4f6988bd5..fcc7c6bf2 100644
--- a/spacy/lang/en/lex_attrs.py
+++ b/spacy/lang/en/lex_attrs.py
@@ -1,87 +1,25 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ...attrs import LIKE_NUM
+# fmt: off
_num_words = [
- "zero",
- "one",
- "two",
- "three",
- "four",
- "five",
- "six",
- "seven",
- "eight",
- "nine",
- "ten",
- "eleven",
- "twelve",
- "thirteen",
- "fourteen",
- "fifteen",
- "sixteen",
- "seventeen",
- "eighteen",
- "nineteen",
- "twenty",
- "thirty",
- "forty",
- "fifty",
- "sixty",
- "seventy",
- "eighty",
- "ninety",
- "hundred",
- "thousand",
- "million",
- "billion",
- "trillion",
- "quadrillion",
- "gajillion",
- "bazillion",
+ "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
+ "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
+ "sixteen", "seventeen", "eighteen", "nineteen", "twenty", "thirty", "forty",
+ "fifty", "sixty", "seventy", "eighty", "ninety", "hundred", "thousand",
+ "million", "billion", "trillion", "quadrillion", "gajillion", "bazillion"
]
-
-
_ordinal_words = [
- "first",
- "second",
- "third",
- "fourth",
- "fifth",
- "sixth",
- "seventh",
- "eighth",
- "ninth",
- "tenth",
- "eleventh",
- "twelfth",
- "thirteenth",
- "fourteenth",
- "fifteenth",
- "sixteenth",
- "seventeenth",
- "eighteenth",
- "nineteenth",
- "twentieth",
- "thirtieth",
- "fortieth",
- "fiftieth",
- "sixtieth",
- "seventieth",
- "eightieth",
- "ninetieth",
- "hundredth",
- "thousandth",
- "millionth",
- "billionth",
- "trillionth",
- "quadrillionth",
- "gajillionth",
- "bazillionth",
+ "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth",
+ "ninth", "tenth", "eleventh", "twelfth", "thirteenth", "fourteenth",
+ "fifteenth", "sixteenth", "seventeenth", "eighteenth", "nineteenth",
+ "twentieth", "thirtieth", "fortieth", "fiftieth", "sixtieth", "seventieth",
+ "eightieth", "ninetieth", "hundredth", "thousandth", "millionth", "billionth",
+ "trillionth", "quadrillionth", "gajillionth", "bazillionth"
]
+# fmt: on
-def like_num(text):
+
+def like_num(text: str) -> bool:
if text.startswith(("+", "-", "±", "~")):
text = text[1:]
text = text.replace(",", "").replace(".", "")
@@ -91,18 +29,15 @@ def like_num(text):
num, denom = text.split("/")
if num.isdigit() and denom.isdigit():
return True
-
text_lower = text.lower()
if text_lower in _num_words:
return True
-
- # CHeck ordinal number
+ # Check ordinal number
if text_lower in _ordinal_words:
return True
if text_lower.endswith("th"):
if text_lower[:-2].isdigit():
- return True
-
+ return True
return False
diff --git a/spacy/lang/en/morph_rules.py b/spacy/lang/en/morph_rules.py
deleted file mode 100644
index 5ed4eac59..000000000
--- a/spacy/lang/en/morph_rules.py
+++ /dev/null
@@ -1,493 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import LEMMA, PRON_LEMMA
-
-# Several entries here look pretty suspicious. These will get the POS SCONJ
-# given the tag IN, when an adpositional reading seems much more likely for
-# a lot of these prepositions. I'm not sure what I was running in 04395ffa4
-# when I did this? It doesn't seem right.
-_subordinating_conjunctions = [
- "that",
- "if",
- "as",
- "because",
- # "of",
- # "for",
- # "before",
- # "in",
- "while",
- # "after",
- "since",
- "like",
- # "with",
- "so",
- # "to",
- # "by",
- # "on",
- # "about",
- "than",
- "whether",
- "although",
- # "from",
- "though",
- # "until",
- "unless",
- "once",
- # "without",
- # "at",
- # "into",
- "cause",
- # "over",
- "upon",
- "till",
- "whereas",
- # "beyond",
- "whilst",
- "except",
- "despite",
- "wether",
- # "then",
- "but",
- "becuse",
- "whie",
- # "below",
- # "against",
- "it",
- "w/out",
- # "toward",
- "albeit",
- "save",
- "besides",
- "becouse",
- "coz",
- "til",
- "ask",
- "i'd",
- "out",
- "near",
- "seince",
- # "towards",
- "tho",
- "sice",
- "will",
-]
-
-# This seems kind of wrong too?
-# _relative_pronouns = ["this", "that", "those", "these"]
-
-MORPH_RULES = {
- # "DT": {word: {"POS": "PRON"} for word in _relative_pronouns},
- "IN": {word: {"POS": "SCONJ"} for word in _subordinating_conjunctions},
- "NN": {
- "something": {"POS": "PRON"},
- "anyone": {"POS": "PRON"},
- "anything": {"POS": "PRON"},
- "nothing": {"POS": "PRON"},
- "someone": {"POS": "PRON"},
- "everything": {"POS": "PRON"},
- "everyone": {"POS": "PRON"},
- "everybody": {"POS": "PRON"},
- "nobody": {"POS": "PRON"},
- "somebody": {"POS": "PRON"},
- "anybody": {"POS": "PRON"},
- "any1": {"POS": "PRON"},
- },
- "PRP": {
- "I": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "One",
- "Number": "Sing",
- "Case": "Nom",
- },
- "me": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "One",
- "Number": "Sing",
- "Case": "Acc",
- },
- "you": {LEMMA: PRON_LEMMA, "POS": "PRON", "PronType": "Prs", "Person": "Two"},
- "he": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Masc",
- "Case": "Nom",
- },
- "him": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Masc",
- "Case": "Acc",
- },
- "she": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Fem",
- "Case": "Nom",
- },
- "her": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Fem",
- "Case": "Acc",
- },
- "it": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Neut",
- },
- "we": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "One",
- "Number": "Plur",
- "Case": "Nom",
- },
- "us": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "One",
- "Number": "Plur",
- "Case": "Acc",
- },
- "they": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Plur",
- "Case": "Nom",
- },
- "them": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Plur",
- "Case": "Acc",
- },
- "mine": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "One",
- "Number": "Sing",
- "Poss": "Yes",
- "Reflex": "Yes",
- },
- "his": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Masc",
- "Poss": "Yes",
- "Reflex": "Yes",
- },
- "hers": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Fem",
- "Poss": "Yes",
- "Reflex": "Yes",
- },
- "its": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Neut",
- "Poss": "Yes",
- "Reflex": "Yes",
- },
- "ours": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "One",
- "Number": "Plur",
- "Poss": "Yes",
- "Reflex": "Yes",
- },
- "yours": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Two",
- "Number": "Plur",
- "Poss": "Yes",
- "Reflex": "Yes",
- },
- "theirs": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Plur",
- "Poss": "Yes",
- "Reflex": "Yes",
- },
- "myself": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "One",
- "Number": "Sing",
- "Case": "Acc",
- "Reflex": "Yes",
- },
- "yourself": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Two",
- "Case": "Acc",
- "Reflex": "Yes",
- },
- "himself": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Case": "Acc",
- "Gender": "Masc",
- "Reflex": "Yes",
- },
- "herself": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Case": "Acc",
- "Gender": "Fem",
- "Reflex": "Yes",
- },
- "itself": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Case": "Acc",
- "Gender": "Neut",
- "Reflex": "Yes",
- },
- "themself": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Case": "Acc",
- "Reflex": "Yes",
- },
- "ourselves": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "One",
- "Number": "Plur",
- "Case": "Acc",
- "Reflex": "Yes",
- },
- "yourselves": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Two",
- "Case": "Acc",
- "Reflex": "Yes",
- },
- "themselves": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Plur",
- "Case": "Acc",
- "Reflex": "Yes",
- },
- },
- "PRP$": {
- "my": {
- LEMMA: PRON_LEMMA,
- "Person": "One",
- "Number": "Sing",
- "PronType": "Prs",
- "Poss": "Yes",
- },
- "your": {LEMMA: PRON_LEMMA, "Person": "Two", "PronType": "Prs", "Poss": "Yes"},
- "his": {
- LEMMA: PRON_LEMMA,
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Masc",
- "PronType": "Prs",
- "Poss": "Yes",
- },
- "her": {
- LEMMA: PRON_LEMMA,
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Fem",
- "PronType": "Prs",
- "Poss": "Yes",
- },
- "its": {
- LEMMA: PRON_LEMMA,
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Neut",
- "PronType": "Prs",
- "Poss": "Yes",
- },
- "our": {
- LEMMA: PRON_LEMMA,
- "Person": "One",
- "Number": "Plur",
- "PronType": "Prs",
- "Poss": "Yes",
- },
- "their": {
- LEMMA: PRON_LEMMA,
- "Person": "Three",
- "Number": "Plur",
- "PronType": "Prs",
- "Poss": "Yes",
- },
- },
- "RB": {word: {"POS": "PART"} for word in ["not", "n't", "nt", "n’t"]},
- "VB": {
- word: {"POS": "AUX"}
- for word in ["be", "have", "do", "get", "of", "am", "are", "'ve"]
- },
- "VBN": {"been": {LEMMA: "be", "POS": "AUX"}},
- "VBG": {"being": {LEMMA: "be", "POS": "AUX"}},
- "VBZ": {
- "am": {
- LEMMA: "be",
- "POS": "AUX",
- "VerbForm": "Fin",
- "Person": "One",
- "Tense": "Pres",
- "Mood": "Ind",
- },
- "are": {
- LEMMA: "be",
- "POS": "AUX",
- "VerbForm": "Fin",
- "Person": "Two",
- "Tense": "Pres",
- "Mood": "Ind",
- },
- "is": {
- LEMMA: "be",
- "POS": "AUX",
- "VerbForm": "Fin",
- "Person": "Three",
- "Tense": "Pres",
- "Mood": "Ind",
- },
- "'re": {
- LEMMA: "be",
- "POS": "AUX",
- "VerbForm": "Fin",
- "Person": "Two",
- "Tense": "Pres",
- "Mood": "Ind",
- },
- "'s": {
- LEMMA: "be",
- "POS": "AUX",
- "VerbForm": "Fin",
- "Person": "Three",
- "Tense": "Pres",
- "Mood": "Ind",
- },
- "has": {LEMMA: "have", "POS": "AUX"},
- "does": {LEMMA: "do", "POS": "AUX"},
- },
- "VBP": {
- "are": {
- LEMMA: "be",
- "POS": "AUX",
- "VerbForm": "Fin",
- "Tense": "Pres",
- "Mood": "Ind",
- },
- "'re": {
- LEMMA: "be",
- "POS": "AUX",
- "VerbForm": "Fin",
- "Tense": "Pres",
- "Mood": "Ind",
- },
- "am": {
- LEMMA: "be",
- "POS": "AUX",
- "VerbForm": "Fin",
- "Person": "One",
- "Tense": "Pres",
- "Mood": "Ind",
- },
- "do": {"POS": "AUX"},
- "have": {"POS": "AUX"},
- "'m": {"POS": "AUX", LEMMA: "be"},
- "'ve": {"POS": "AUX"},
- "'s": {"POS": "AUX"},
- "is": {"POS": "AUX"},
- "'d": {"POS": "AUX"},
- },
- "VBD": {
- "was": {
- LEMMA: "be",
- "POS": "AUX",
- "VerbForm": "Fin",
- "Tense": "Past",
- "Number": "Sing",
- },
- "were": {
- LEMMA: "be",
- "POS": "AUX",
- "VerbForm": "Fin",
- "Tense": "Past",
- "Number": "Plur",
- },
- "did": {LEMMA: "do", "POS": "AUX"},
- "had": {LEMMA: "have", "POS": "AUX"},
- "'d": {LEMMA: "have", "POS": "AUX"},
- },
-}
-
-
-for tag, rules in MORPH_RULES.items():
- for key, attrs in dict(rules).items():
- rules[key.title()] = attrs
diff --git a/spacy/lang/en/punctuation.py b/spacy/lang/en/punctuation.py
new file mode 100644
index 000000000..5d3eb792e
--- /dev/null
+++ b/spacy/lang/en/punctuation.py
@@ -0,0 +1,19 @@
+from ..char_classes import LIST_ELLIPSES, LIST_ICONS, HYPHENS
+from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA
+
+_infixes = (
+ LIST_ELLIPSES
+ + LIST_ICONS
+ + [
+ r"(?<=[0-9])[+\-\*^](?=[0-9-])",
+ r"(?<=[{al}{q}])\.(?=[{au}{q}])".format(
+ al=ALPHA_LOWER, au=ALPHA_UPPER, q=CONCAT_QUOTES
+ ),
+ r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA),
+ r"(?<=[{a}0-9])(?:{h})(?=[{a}])".format(a=ALPHA, h=HYPHENS),
+ r"(?<=[{a}0-9])[:<>=/](?=[{a}])".format(a=ALPHA),
+ ]
+)
+
+
+TOKENIZER_INFIXES = _infixes
diff --git a/spacy/lang/en/stop_words.py b/spacy/lang/en/stop_words.py
index 3505b13bf..1ca5cbc16 100644
--- a/spacy/lang/en/stop_words.py
+++ b/spacy/lang/en/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
# Stop words
STOP_WORDS = set(
"""
diff --git a/spacy/lang/en/syntax_iterators.py b/spacy/lang/en/syntax_iterators.py
index 0f2b28b58..59ae733bd 100644
--- a/spacy/lang/en/syntax_iterators.py
+++ b/spacy/lang/en/syntax_iterators.py
@@ -1,30 +1,18 @@
-# coding: utf8
-from __future__ import unicode_literals
+from typing import Union, Iterator
from ...symbols import NOUN, PROPN, PRON
from ...errors import Errors
+from ...tokens import Doc, Span
-def noun_chunks(doclike):
- """
- Detect base noun phrases from a dependency parse. Works on both Doc and Span.
- """
- labels = [
- "nsubj",
- "dobj",
- "nsubjpass",
- "pcomp",
- "pobj",
- "dative",
- "appos",
- "attr",
- "ROOT",
- ]
+def noun_chunks(doclike: Union[Doc, Span]) -> Iterator[Span]:
+ """Detect base noun phrases from a dependency parse. Works on Doc and Span."""
+ # fmt: off
+ labels = ["nsubj", "dobj", "nsubjpass", "pcomp", "pobj", "dative", "appos", "attr", "ROOT"]
+ # fmt: on
doc = doclike.doc # Ensure works on both Doc and Span.
-
if not doc.is_parsed:
raise ValueError(Errors.E029)
-
np_deps = [doc.vocab.strings.add(label) for label in labels]
conj = doc.vocab.strings.add("conj")
np_label = doc.vocab.strings.add("NP")
diff --git a/spacy/lang/en/tag_map.py b/spacy/lang/en/tag_map.py
deleted file mode 100644
index ecb3103cc..000000000
--- a/spacy/lang/en/tag_map.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, PUNCT, SYM, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB
-from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON
-
-
-TAG_MAP = {
- ".": {POS: PUNCT, "PunctType": "peri"},
- ",": {POS: PUNCT, "PunctType": "comm"},
- "-LRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "ini"},
- "-RRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "fin"},
- "``": {POS: PUNCT, "PunctType": "quot", "PunctSide": "ini"},
- '""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
- "''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
- ":": {POS: PUNCT},
- "$": {POS: SYM},
- "#": {POS: SYM},
- "AFX": {POS: ADJ, "Hyph": "yes"},
- "CC": {POS: CCONJ, "ConjType": "comp"},
- "CD": {POS: NUM, "NumType": "card"},
- "DT": {POS: DET},
- "EX": {POS: PRON, "AdvType": "ex"},
- "FW": {POS: X, "Foreign": "yes"},
- "HYPH": {POS: PUNCT, "PunctType": "dash"},
- "IN": {POS: ADP},
- "JJ": {POS: ADJ, "Degree": "pos"},
- "JJR": {POS: ADJ, "Degree": "comp"},
- "JJS": {POS: ADJ, "Degree": "sup"},
- "LS": {POS: X, "NumType": "ord"},
- "MD": {POS: VERB, "VerbType": "mod"},
- "NIL": {POS: X},
- "NN": {POS: NOUN, "Number": "sing"},
- "NNP": {POS: PROPN, "NounType": "prop", "Number": "sing"},
- "NNPS": {POS: PROPN, "NounType": "prop", "Number": "plur"},
- "NNS": {POS: NOUN, "Number": "plur"},
- "PDT": {POS: DET},
- "POS": {POS: PART, "Poss": "yes"},
- "PRP": {POS: PRON, "PronType": "prs"},
- "PRP$": {POS: DET, "PronType": "prs", "Poss": "yes"},
- "RB": {POS: ADV, "Degree": "pos"},
- "RBR": {POS: ADV, "Degree": "comp"},
- "RBS": {POS: ADV, "Degree": "sup"},
- "RP": {POS: ADP},
- "SP": {POS: SPACE},
- "SYM": {POS: SYM},
- "TO": {POS: PART, "PartType": "inf", "VerbForm": "inf"},
- "UH": {POS: INTJ},
- "VB": {POS: VERB, "VerbForm": "inf"},
- "VBD": {POS: VERB, "VerbForm": "fin", "Tense": "past"},
- "VBG": {POS: VERB, "VerbForm": "part", "Tense": "pres", "Aspect": "prog"},
- "VBN": {POS: VERB, "VerbForm": "part", "Tense": "past", "Aspect": "perf"},
- "VBP": {POS: VERB, "VerbForm": "fin", "Tense": "pres"},
- "VBZ": {
- POS: VERB,
- "VerbForm": "fin",
- "Tense": "pres",
- "Number": "sing",
- "Person": "three",
- },
- "WDT": {POS: DET},
- "WP": {POS: PRON},
- "WP$": {POS: DET, "Poss": "yes"},
- "WRB": {POS: ADV},
- "ADD": {POS: X},
- "NFP": {POS: PUNCT},
- "GW": {POS: X},
- "XX": {POS: X},
- "BES": {POS: VERB},
- "HVS": {POS: VERB},
- "_SP": {POS: SPACE},
-}
diff --git a/spacy/lang/en/tokenizer_exceptions.py b/spacy/lang/en/tokenizer_exceptions.py
index 964a714ae..c210e1a19 100644
--- a/spacy/lang/en/tokenizer_exceptions.py
+++ b/spacy/lang/en/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import ORTH, LEMMA, TAG, NORM, PRON_LEMMA
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import ORTH, NORM
+from ...util import update_exc
_exc = {}
@@ -29,258 +28,270 @@ _exclude = [
for pron in ["i"]:
for orth in [pron, pron.title()]:
_exc[orth + "'m"] = [
- {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
- {ORTH: "'m", LEMMA: "be", NORM: "am", TAG: "VBP"},
+ {ORTH: orth, NORM: pron},
+ {ORTH: "'m", NORM: "am"},
]
_exc[orth + "m"] = [
- {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
- {ORTH: "m", LEMMA: "be", TAG: "VBP", "tenspect": 1, "number": 1},
+ {ORTH: orth, NORM: pron},
+ {ORTH: "m", "tenspect": 1, "number": 1},
]
_exc[orth + "'ma"] = [
- {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
- {ORTH: "'m", LEMMA: "be", NORM: "am"},
- {ORTH: "a", LEMMA: "going to", NORM: "gonna"},
+ {ORTH: orth, NORM: pron},
+ {ORTH: "'m", NORM: "am"},
+ {ORTH: "a", NORM: "gonna"},
]
_exc[orth + "ma"] = [
- {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
- {ORTH: "m", LEMMA: "be", NORM: "am"},
- {ORTH: "a", LEMMA: "going to", NORM: "gonna"},
+ {ORTH: orth, NORM: pron},
+ {ORTH: "m", NORM: "am"},
+ {ORTH: "a", NORM: "gonna"},
]
for pron in ["i", "you", "he", "she", "it", "we", "they"]:
for orth in [pron, pron.title()]:
_exc[orth + "'ll"] = [
- {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
- {ORTH: "'ll", LEMMA: "will", NORM: "will", TAG: "MD"},
+ {ORTH: orth, NORM: pron},
+ {ORTH: "'ll", NORM: "will"},
]
_exc[orth + "ll"] = [
- {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
- {ORTH: "ll", LEMMA: "will", NORM: "will", TAG: "MD"},
+ {ORTH: orth, NORM: pron},
+ {ORTH: "ll", NORM: "will"},
]
_exc[orth + "'ll've"] = [
- {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
- {ORTH: "'ll", LEMMA: "will", NORM: "will", TAG: "MD"},
- {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"},
+ {ORTH: orth, NORM: pron},
+ {ORTH: "'ll", NORM: "will"},
+ {ORTH: "'ve", NORM: "have"},
]
_exc[orth + "llve"] = [
- {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
- {ORTH: "ll", LEMMA: "will", NORM: "will", TAG: "MD"},
- {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"},
+ {ORTH: orth, NORM: pron},
+ {ORTH: "ll", NORM: "will"},
+ {ORTH: "ve", NORM: "have"},
]
_exc[orth + "'d"] = [
- {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
+ {ORTH: orth, NORM: pron},
{ORTH: "'d", NORM: "'d"},
]
_exc[orth + "d"] = [
- {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
+ {ORTH: orth, NORM: pron},
{ORTH: "d", NORM: "'d"},
]
_exc[orth + "'d've"] = [
- {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
- {ORTH: "'d", LEMMA: "would", NORM: "would", TAG: "MD"},
- {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"},
+ {ORTH: orth, NORM: pron},
+ {ORTH: "'d", NORM: "would"},
+ {ORTH: "'ve", NORM: "have"},
]
_exc[orth + "dve"] = [
- {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
- {ORTH: "d", LEMMA: "would", NORM: "would", TAG: "MD"},
- {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"},
+ {ORTH: orth, NORM: pron},
+ {ORTH: "d", NORM: "would"},
+ {ORTH: "ve", NORM: "have"},
]
for pron in ["i", "you", "we", "they"]:
for orth in [pron, pron.title()]:
_exc[orth + "'ve"] = [
- {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
- {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"},
+ {ORTH: orth, NORM: pron},
+ {ORTH: "'ve", NORM: "have"},
]
_exc[orth + "ve"] = [
- {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
- {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"},
+ {ORTH: orth, NORM: pron},
+ {ORTH: "ve", NORM: "have"},
]
for pron in ["you", "we", "they"]:
for orth in [pron, pron.title()]:
_exc[orth + "'re"] = [
- {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
- {ORTH: "'re", LEMMA: "be", NORM: "are"},
+ {ORTH: orth, NORM: pron},
+ {ORTH: "'re", NORM: "are"},
]
_exc[orth + "re"] = [
- {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
- {ORTH: "re", LEMMA: "be", NORM: "are", TAG: "VBZ"},
+ {ORTH: orth, NORM: pron},
+ {ORTH: "re", NORM: "are"},
]
for pron in ["he", "she", "it"]:
for orth in [pron, pron.title()]:
_exc[orth + "'s"] = [
- {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
+ {ORTH: orth, NORM: pron},
{ORTH: "'s", NORM: "'s"},
]
_exc[orth + "s"] = [
- {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
+ {ORTH: orth, NORM: pron},
{ORTH: "s"},
]
# W-words, relative pronouns, prepositions etc.
-for word in ["who", "what", "when", "where", "why", "how", "there", "that", "this", "these", "those"]:
+for word in [
+ "who",
+ "what",
+ "when",
+ "where",
+ "why",
+ "how",
+ "there",
+ "that",
+ "this",
+ "these",
+ "those",
+]:
for orth in [word, word.title()]:
_exc[orth + "'s"] = [
- {ORTH: orth, LEMMA: word, NORM: word},
+ {ORTH: orth, NORM: word},
{ORTH: "'s", NORM: "'s"},
]
- _exc[orth + "s"] = [{ORTH: orth, LEMMA: word, NORM: word}, {ORTH: "s"}]
+ _exc[orth + "s"] = [{ORTH: orth, NORM: word}, {ORTH: "s"}]
_exc[orth + "'ll"] = [
- {ORTH: orth, LEMMA: word, NORM: word},
- {ORTH: "'ll", LEMMA: "will", NORM: "will", TAG: "MD"},
+ {ORTH: orth, NORM: word},
+ {ORTH: "'ll", NORM: "will"},
]
_exc[orth + "ll"] = [
- {ORTH: orth, LEMMA: word, NORM: word},
- {ORTH: "ll", LEMMA: "will", NORM: "will", TAG: "MD"},
+ {ORTH: orth, NORM: word},
+ {ORTH: "ll", NORM: "will"},
]
_exc[orth + "'ll've"] = [
- {ORTH: orth, LEMMA: word, NORM: word},
- {ORTH: "'ll", LEMMA: "will", NORM: "will", TAG: "MD"},
- {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"},
+ {ORTH: orth, NORM: word},
+ {ORTH: "'ll", NORM: "will"},
+ {ORTH: "'ve", NORM: "have"},
]
_exc[orth + "llve"] = [
- {ORTH: orth, LEMMA: word, NORM: word},
- {ORTH: "ll", LEMMA: "will", NORM: "will", TAG: "MD"},
- {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"},
+ {ORTH: orth, NORM: word},
+ {ORTH: "ll", NORM: "will"},
+ {ORTH: "ve", NORM: "have"},
]
_exc[orth + "'re"] = [
- {ORTH: orth, LEMMA: word, NORM: word},
- {ORTH: "'re", LEMMA: "be", NORM: "are"},
+ {ORTH: orth, NORM: word},
+ {ORTH: "'re", NORM: "are"},
]
_exc[orth + "re"] = [
- {ORTH: orth, LEMMA: word, NORM: word},
- {ORTH: "re", LEMMA: "be", NORM: "are"},
+ {ORTH: orth, NORM: word},
+ {ORTH: "re", NORM: "are"},
]
_exc[orth + "'ve"] = [
- {ORTH: orth, LEMMA: word, NORM: word},
- {ORTH: "'ve", LEMMA: "have", TAG: "VB"},
+ {ORTH: orth, NORM: word},
+ {ORTH: "'ve"},
]
_exc[orth + "ve"] = [
- {ORTH: orth, LEMMA: word},
- {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"},
+ {ORTH: orth},
+ {ORTH: "ve", NORM: "have"},
]
_exc[orth + "'d"] = [
- {ORTH: orth, LEMMA: word, NORM: word},
+ {ORTH: orth, NORM: word},
{ORTH: "'d", NORM: "'d"},
]
_exc[orth + "d"] = [
- {ORTH: orth, LEMMA: word, NORM: word},
+ {ORTH: orth, NORM: word},
{ORTH: "d", NORM: "'d"},
]
_exc[orth + "'d've"] = [
- {ORTH: orth, LEMMA: word, NORM: word},
- {ORTH: "'d", LEMMA: "would", NORM: "would", TAG: "MD"},
- {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"},
+ {ORTH: orth, NORM: word},
+ {ORTH: "'d", NORM: "would"},
+ {ORTH: "'ve", NORM: "have"},
]
_exc[orth + "dve"] = [
- {ORTH: orth, LEMMA: word, NORM: word},
- {ORTH: "d", LEMMA: "would", NORM: "would", TAG: "MD"},
- {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"},
+ {ORTH: orth, NORM: word},
+ {ORTH: "d", NORM: "would"},
+ {ORTH: "ve", NORM: "have"},
]
# Verbs
for verb_data in [
- {ORTH: "ca", LEMMA: "can", NORM: "can", TAG: "MD"},
- {ORTH: "could", NORM: "could", TAG: "MD"},
- {ORTH: "do", LEMMA: "do", NORM: "do"},
- {ORTH: "does", LEMMA: "do", NORM: "does"},
- {ORTH: "did", LEMMA: "do", NORM: "do", TAG: "VBD"},
- {ORTH: "had", LEMMA: "have", NORM: "have", TAG: "VBD"},
- {ORTH: "may", NORM: "may", TAG: "MD"},
- {ORTH: "might", NORM: "might", TAG: "MD"},
- {ORTH: "must", NORM: "must", TAG: "MD"},
+ {ORTH: "ca", NORM: "can"},
+ {ORTH: "could", NORM: "could"},
+ {ORTH: "do", NORM: "do"},
+ {ORTH: "does", NORM: "does"},
+ {ORTH: "did", NORM: "do"},
+ {ORTH: "had", NORM: "have"},
+ {ORTH: "may", NORM: "may"},
+ {ORTH: "might", NORM: "might"},
+ {ORTH: "must", NORM: "must"},
{ORTH: "need", NORM: "need"},
- {ORTH: "ought", NORM: "ought", TAG: "MD"},
- {ORTH: "sha", LEMMA: "shall", NORM: "shall", TAG: "MD"},
- {ORTH: "should", NORM: "should", TAG: "MD"},
- {ORTH: "wo", LEMMA: "will", NORM: "will", TAG: "MD"},
- {ORTH: "would", NORM: "would", TAG: "MD"},
+ {ORTH: "ought", NORM: "ought"},
+ {ORTH: "sha", NORM: "shall"},
+ {ORTH: "should", NORM: "should"},
+ {ORTH: "wo", NORM: "will"},
+ {ORTH: "would", NORM: "would"},
]:
verb_data_tc = dict(verb_data)
verb_data_tc[ORTH] = verb_data_tc[ORTH].title()
for data in [verb_data, verb_data_tc]:
_exc[data[ORTH] + "n't"] = [
dict(data),
- {ORTH: "n't", LEMMA: "not", NORM: "not", TAG: "RB"},
+ {ORTH: "n't", NORM: "not"},
]
_exc[data[ORTH] + "nt"] = [
dict(data),
- {ORTH: "nt", LEMMA: "not", NORM: "not", TAG: "RB"},
+ {ORTH: "nt", NORM: "not"},
]
_exc[data[ORTH] + "n't've"] = [
dict(data),
- {ORTH: "n't", LEMMA: "not", NORM: "not", TAG: "RB"},
- {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"},
+ {ORTH: "n't", NORM: "not"},
+ {ORTH: "'ve", NORM: "have"},
]
_exc[data[ORTH] + "ntve"] = [
dict(data),
- {ORTH: "nt", LEMMA: "not", NORM: "not", TAG: "RB"},
- {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"},
+ {ORTH: "nt", NORM: "not"},
+ {ORTH: "ve", NORM: "have"},
]
for verb_data in [
- {ORTH: "could", NORM: "could", TAG: "MD"},
- {ORTH: "might", NORM: "might", TAG: "MD"},
- {ORTH: "must", NORM: "must", TAG: "MD"},
- {ORTH: "should", NORM: "should", TAG: "MD"},
- {ORTH: "would", NORM: "would", TAG: "MD"},
+ {ORTH: "could", NORM: "could"},
+ {ORTH: "might", NORM: "might"},
+ {ORTH: "must", NORM: "must"},
+ {ORTH: "should", NORM: "should"},
+ {ORTH: "would", NORM: "would"},
]:
verb_data_tc = dict(verb_data)
verb_data_tc[ORTH] = verb_data_tc[ORTH].title()
for data in [verb_data, verb_data_tc]:
- _exc[data[ORTH] + "'ve"] = [dict(data), {ORTH: "'ve", LEMMA: "have", TAG: "VB"}]
+ _exc[data[ORTH] + "'ve"] = [dict(data), {ORTH: "'ve"}]
- _exc[data[ORTH] + "ve"] = [dict(data), {ORTH: "ve", LEMMA: "have", TAG: "VB"}]
+ _exc[data[ORTH] + "ve"] = [dict(data), {ORTH: "ve"}]
for verb_data in [
- {ORTH: "ai", LEMMA: "be", TAG: "VBP", "number": 2},
- {ORTH: "are", LEMMA: "be", NORM: "are", TAG: "VBP", "number": 2},
- {ORTH: "is", LEMMA: "be", NORM: "is", TAG: "VBZ"},
- {ORTH: "was", LEMMA: "be", NORM: "was"},
- {ORTH: "were", LEMMA: "be", NORM: "were"},
+ {ORTH: "ai", "number": 2},
+ {ORTH: "are", NORM: "are", "number": 2},
+ {ORTH: "is", NORM: "is"},
+ {ORTH: "was", NORM: "was"},
+ {ORTH: "were", NORM: "were"},
{ORTH: "have", NORM: "have"},
- {ORTH: "has", LEMMA: "have", NORM: "has"},
+ {ORTH: "has", NORM: "has"},
{ORTH: "dare", NORM: "dare"},
]:
verb_data_tc = dict(verb_data)
@@ -288,24 +299,24 @@ for verb_data in [
for data in [verb_data, verb_data_tc]:
_exc[data[ORTH] + "n't"] = [
dict(data),
- {ORTH: "n't", LEMMA: "not", NORM: "not", TAG: "RB"},
+ {ORTH: "n't", NORM: "not"},
]
_exc[data[ORTH] + "nt"] = [
dict(data),
- {ORTH: "nt", LEMMA: "not", NORM: "not", TAG: "RB"},
+ {ORTH: "nt", NORM: "not"},
]
# Other contractions with trailing apostrophe
for exc_data in [
- {ORTH: "doin", LEMMA: "do", NORM: "doing"},
- {ORTH: "goin", LEMMA: "go", NORM: "going"},
- {ORTH: "nothin", LEMMA: "nothing", NORM: "nothing"},
- {ORTH: "nuthin", LEMMA: "nothing", NORM: "nothing"},
- {ORTH: "ol", LEMMA: "old", NORM: "old"},
- {ORTH: "somethin", LEMMA: "something", NORM: "something"},
+ {ORTH: "doin", NORM: "doing"},
+ {ORTH: "goin", NORM: "going"},
+ {ORTH: "nothin", NORM: "nothing"},
+ {ORTH: "nuthin", NORM: "nothing"},
+ {ORTH: "ol", NORM: "old"},
+ {ORTH: "somethin", NORM: "something"},
]:
exc_data_tc = dict(exc_data)
exc_data_tc[ORTH] = exc_data_tc[ORTH].title()
@@ -320,9 +331,9 @@ for exc_data in [
for exc_data in [
{ORTH: "cause", NORM: "because"},
- {ORTH: "em", LEMMA: PRON_LEMMA, NORM: "them"},
- {ORTH: "ll", LEMMA: "will", NORM: "will"},
- {ORTH: "nuff", LEMMA: "enough", NORM: "enough"},
+ {ORTH: "em", NORM: "them"},
+ {ORTH: "ll", NORM: "will"},
+ {ORTH: "nuff", NORM: "enough"},
]:
exc_data_apos = dict(exc_data)
exc_data_apos[ORTH] = "'" + exc_data_apos[ORTH]
@@ -334,174 +345,133 @@ for exc_data in [
for h in range(1, 12 + 1):
for period in ["a.m.", "am"]:
- _exc["%d%s" % (h, period)] = [
- {ORTH: "%d" % h},
- {ORTH: period, LEMMA: "a.m.", NORM: "a.m."},
+ _exc[f"{h}{period}"] = [
+ {ORTH: f"{h}"},
+ {ORTH: period, NORM: "a.m."},
]
for period in ["p.m.", "pm"]:
- _exc["%d%s" % (h, period)] = [
- {ORTH: "%d" % h},
- {ORTH: period, LEMMA: "p.m.", NORM: "p.m."},
+ _exc[f"{h}{period}"] = [
+ {ORTH: f"{h}"},
+ {ORTH: period, NORM: "p.m."},
]
# Rest
_other_exc = {
- "y'all": [{ORTH: "y'", LEMMA: PRON_LEMMA, NORM: "you"}, {ORTH: "all"}],
- "yall": [{ORTH: "y", LEMMA: PRON_LEMMA, NORM: "you"}, {ORTH: "all"}],
- "how'd'y": [
- {ORTH: "how", LEMMA: "how"},
- {ORTH: "'d", LEMMA: "do"},
- {ORTH: "'y", LEMMA: PRON_LEMMA, NORM: "you"},
- ],
- "How'd'y": [
- {ORTH: "How", LEMMA: "how", NORM: "how"},
- {ORTH: "'d", LEMMA: "do"},
- {ORTH: "'y", LEMMA: PRON_LEMMA, NORM: "you"},
- ],
- "not've": [
- {ORTH: "not", LEMMA: "not", TAG: "RB"},
- {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"},
- ],
- "notve": [
- {ORTH: "not", LEMMA: "not", TAG: "RB"},
- {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"},
- ],
- "Not've": [
- {ORTH: "Not", LEMMA: "not", NORM: "not", TAG: "RB"},
- {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"},
- ],
- "Notve": [
- {ORTH: "Not", LEMMA: "not", NORM: "not", TAG: "RB"},
- {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"},
- ],
- "cannot": [
- {ORTH: "can", LEMMA: "can", TAG: "MD"},
- {ORTH: "not", LEMMA: "not", TAG: "RB"},
- ],
- "Cannot": [
- {ORTH: "Can", LEMMA: "can", NORM: "can", TAG: "MD"},
- {ORTH: "not", LEMMA: "not", TAG: "RB"},
- ],
- "gonna": [
- {ORTH: "gon", LEMMA: "go", NORM: "going"},
- {ORTH: "na", LEMMA: "to", NORM: "to"},
- ],
- "Gonna": [
- {ORTH: "Gon", LEMMA: "go", NORM: "going"},
- {ORTH: "na", LEMMA: "to", NORM: "to"},
- ],
- "gotta": [{ORTH: "got"}, {ORTH: "ta", LEMMA: "to", NORM: "to"}],
- "Gotta": [{ORTH: "Got", NORM: "got"}, {ORTH: "ta", LEMMA: "to", NORM: "to"}],
- "let's": [{ORTH: "let"}, {ORTH: "'s", LEMMA: PRON_LEMMA, NORM: "us"}],
- "Let's": [
- {ORTH: "Let", LEMMA: "let", NORM: "let"},
- {ORTH: "'s", LEMMA: PRON_LEMMA, NORM: "us"},
- ],
- "c'mon": [
- {ORTH: "c'm", NORM: "come", LEMMA: "come"},
- {ORTH: "on"}
- ],
- "C'mon": [
- {ORTH: "C'm", NORM: "come", LEMMA: "come"},
- {ORTH: "on"}
- ]
+ "y'all": [{ORTH: "y'", NORM: "you"}, {ORTH: "all"}],
+ "yall": [{ORTH: "y", NORM: "you"}, {ORTH: "all"}],
+ "how'd'y": [{ORTH: "how"}, {ORTH: "'d"}, {ORTH: "'y", NORM: "you"}],
+ "How'd'y": [{ORTH: "How", NORM: "how"}, {ORTH: "'d"}, {ORTH: "'y", NORM: "you"}],
+ "not've": [{ORTH: "not"}, {ORTH: "'ve", NORM: "have"}],
+ "notve": [{ORTH: "not"}, {ORTH: "ve", NORM: "have"}],
+ "Not've": [{ORTH: "Not", NORM: "not"}, {ORTH: "'ve", NORM: "have"}],
+ "Notve": [{ORTH: "Not", NORM: "not"}, {ORTH: "ve", NORM: "have"}],
+ "cannot": [{ORTH: "can"}, {ORTH: "not"}],
+ "Cannot": [{ORTH: "Can", NORM: "can"}, {ORTH: "not"}],
+ "gonna": [{ORTH: "gon", NORM: "going"}, {ORTH: "na", NORM: "to"}],
+ "Gonna": [{ORTH: "Gon", NORM: "going"}, {ORTH: "na", NORM: "to"}],
+ "gotta": [{ORTH: "got"}, {ORTH: "ta", NORM: "to"}],
+ "Gotta": [{ORTH: "Got", NORM: "got"}, {ORTH: "ta", NORM: "to"}],
+ "let's": [{ORTH: "let"}, {ORTH: "'s", NORM: "us"}],
+ "Let's": [{ORTH: "Let", NORM: "let"}, {ORTH: "'s", NORM: "us"}],
+ "c'mon": [{ORTH: "c'm", NORM: "come"}, {ORTH: "on"}],
+ "C'mon": [{ORTH: "C'm", NORM: "come"}, {ORTH: "on"}],
}
_exc.update(_other_exc)
for exc_data in [
- {ORTH: "'S", LEMMA: "'s", NORM: "'s"},
- {ORTH: "'s", LEMMA: "'s", NORM: "'s"},
- {ORTH: "\u2018S", LEMMA: "'s", NORM: "'s"},
- {ORTH: "\u2018s", LEMMA: "'s", NORM: "'s"},
- {ORTH: "and/or", LEMMA: "and/or", NORM: "and/or", TAG: "CC"},
- {ORTH: "w/o", LEMMA: "without", NORM: "without"},
- {ORTH: "'re", LEMMA: "be", NORM: "are"},
- {ORTH: "'Cause", LEMMA: "because", NORM: "because"},
- {ORTH: "'cause", LEMMA: "because", NORM: "because"},
- {ORTH: "'cos", LEMMA: "because", NORM: "because"},
- {ORTH: "'Cos", LEMMA: "because", NORM: "because"},
- {ORTH: "'coz", LEMMA: "because", NORM: "because"},
- {ORTH: "'Coz", LEMMA: "because", NORM: "because"},
- {ORTH: "'cuz", LEMMA: "because", NORM: "because"},
- {ORTH: "'Cuz", LEMMA: "because", NORM: "because"},
- {ORTH: "'bout", LEMMA: "about", NORM: "about"},
- {ORTH: "ma'am", LEMMA: "madam", NORM: "madam"},
- {ORTH: "Ma'am", LEMMA: "madam", NORM: "madam"},
- {ORTH: "o'clock", LEMMA: "o'clock", NORM: "o'clock"},
- {ORTH: "O'clock", LEMMA: "o'clock", NORM: "o'clock"},
- {ORTH: "lovin'", LEMMA: "love", NORM: "loving"},
- {ORTH: "Lovin'", LEMMA: "love", NORM: "loving"},
- {ORTH: "lovin", LEMMA: "love", NORM: "loving"},
- {ORTH: "Lovin", LEMMA: "love", NORM: "loving"},
- {ORTH: "havin'", LEMMA: "have", NORM: "having"},
- {ORTH: "Havin'", LEMMA: "have", NORM: "having"},
- {ORTH: "havin", LEMMA: "have", NORM: "having"},
- {ORTH: "Havin", LEMMA: "have", NORM: "having"},
- {ORTH: "doin'", LEMMA: "do", NORM: "doing"},
- {ORTH: "Doin'", LEMMA: "do", NORM: "doing"},
- {ORTH: "doin", LEMMA: "do", NORM: "doing"},
- {ORTH: "Doin", LEMMA: "do", NORM: "doing"},
- {ORTH: "goin'", LEMMA: "go", NORM: "going"},
- {ORTH: "Goin'", LEMMA: "go", NORM: "going"},
- {ORTH: "goin", LEMMA: "go", NORM: "going"},
- {ORTH: "Goin", LEMMA: "go", NORM: "going"},
- {ORTH: "Mt.", LEMMA: "Mount", NORM: "Mount"},
- {ORTH: "Ak.", LEMMA: "Alaska", NORM: "Alaska"},
- {ORTH: "Ala.", LEMMA: "Alabama", NORM: "Alabama"},
- {ORTH: "Apr.", LEMMA: "April", NORM: "April"},
- {ORTH: "Ariz.", LEMMA: "Arizona", NORM: "Arizona"},
- {ORTH: "Ark.", LEMMA: "Arkansas", NORM: "Arkansas"},
- {ORTH: "Aug.", LEMMA: "August", NORM: "August"},
- {ORTH: "Calif.", LEMMA: "California", NORM: "California"},
- {ORTH: "Colo.", LEMMA: "Colorado", NORM: "Colorado"},
- {ORTH: "Conn.", LEMMA: "Connecticut", NORM: "Connecticut"},
- {ORTH: "Dec.", LEMMA: "December", NORM: "December"},
- {ORTH: "Del.", LEMMA: "Delaware", NORM: "Delaware"},
- {ORTH: "Feb.", LEMMA: "February", NORM: "February"},
- {ORTH: "Fla.", LEMMA: "Florida", NORM: "Florida"},
- {ORTH: "Ga.", LEMMA: "Georgia", NORM: "Georgia"},
- {ORTH: "Ia.", LEMMA: "Iowa", NORM: "Iowa"},
- {ORTH: "Id.", LEMMA: "Idaho", NORM: "Idaho"},
- {ORTH: "Ill.", LEMMA: "Illinois", NORM: "Illinois"},
- {ORTH: "Ind.", LEMMA: "Indiana", NORM: "Indiana"},
- {ORTH: "Jan.", LEMMA: "January", NORM: "January"},
- {ORTH: "Jul.", LEMMA: "July", NORM: "July"},
- {ORTH: "Jun.", LEMMA: "June", NORM: "June"},
- {ORTH: "Kan.", LEMMA: "Kansas", NORM: "Kansas"},
- {ORTH: "Kans.", LEMMA: "Kansas", NORM: "Kansas"},
- {ORTH: "Ky.", LEMMA: "Kentucky", NORM: "Kentucky"},
- {ORTH: "La.", LEMMA: "Louisiana", NORM: "Louisiana"},
- {ORTH: "Mar.", LEMMA: "March", NORM: "March"},
- {ORTH: "Mass.", LEMMA: "Massachusetts", NORM: "Massachusetts"},
- {ORTH: "May.", LEMMA: "May", NORM: "May"},
- {ORTH: "Mich.", LEMMA: "Michigan", NORM: "Michigan"},
- {ORTH: "Minn.", LEMMA: "Minnesota", NORM: "Minnesota"},
- {ORTH: "Miss.", LEMMA: "Mississippi", NORM: "Mississippi"},
- {ORTH: "N.C.", LEMMA: "North Carolina", NORM: "North Carolina"},
- {ORTH: "N.D.", LEMMA: "North Dakota", NORM: "North Dakota"},
- {ORTH: "N.H.", LEMMA: "New Hampshire", NORM: "New Hampshire"},
- {ORTH: "N.J.", LEMMA: "New Jersey", NORM: "New Jersey"},
- {ORTH: "N.M.", LEMMA: "New Mexico", NORM: "New Mexico"},
- {ORTH: "N.Y.", LEMMA: "New York", NORM: "New York"},
- {ORTH: "Neb.", LEMMA: "Nebraska", NORM: "Nebraska"},
- {ORTH: "Nebr.", LEMMA: "Nebraska", NORM: "Nebraska"},
- {ORTH: "Nev.", LEMMA: "Nevada", NORM: "Nevada"},
- {ORTH: "Nov.", LEMMA: "November", NORM: "November"},
- {ORTH: "Oct.", LEMMA: "October", NORM: "October"},
- {ORTH: "Okla.", LEMMA: "Oklahoma", NORM: "Oklahoma"},
- {ORTH: "Ore.", LEMMA: "Oregon", NORM: "Oregon"},
- {ORTH: "Pa.", LEMMA: "Pennsylvania", NORM: "Pennsylvania"},
- {ORTH: "S.C.", LEMMA: "South Carolina", NORM: "South Carolina"},
- {ORTH: "Sep.", LEMMA: "September", NORM: "September"},
- {ORTH: "Sept.", LEMMA: "September", NORM: "September"},
- {ORTH: "Tenn.", LEMMA: "Tennessee", NORM: "Tennessee"},
- {ORTH: "Va.", LEMMA: "Virginia", NORM: "Virginia"},
- {ORTH: "Wash.", LEMMA: "Washington", NORM: "Washington"},
- {ORTH: "Wis.", LEMMA: "Wisconsin", NORM: "Wisconsin"},
+ {ORTH: "'S", NORM: "'s"},
+ {ORTH: "'s", NORM: "'s"},
+ {ORTH: "\u2018S", NORM: "'s"},
+ {ORTH: "\u2018s", NORM: "'s"},
+ {ORTH: "and/or", NORM: "and/or"},
+ {ORTH: "w/o", NORM: "without"},
+ {ORTH: "'re", NORM: "are"},
+ {ORTH: "'Cause", NORM: "because"},
+ {ORTH: "'cause", NORM: "because"},
+ {ORTH: "'cos", NORM: "because"},
+ {ORTH: "'Cos", NORM: "because"},
+ {ORTH: "'coz", NORM: "because"},
+ {ORTH: "'Coz", NORM: "because"},
+ {ORTH: "'cuz", NORM: "because"},
+ {ORTH: "'Cuz", NORM: "because"},
+ {ORTH: "'bout", NORM: "about"},
+ {ORTH: "ma'am", NORM: "madam"},
+ {ORTH: "Ma'am", NORM: "madam"},
+ {ORTH: "o'clock", NORM: "o'clock"},
+ {ORTH: "O'clock", NORM: "o'clock"},
+ {ORTH: "lovin'", NORM: "loving"},
+ {ORTH: "Lovin'", NORM: "loving"},
+ {ORTH: "lovin", NORM: "loving"},
+ {ORTH: "Lovin", NORM: "loving"},
+ {ORTH: "havin'", NORM: "having"},
+ {ORTH: "Havin'", NORM: "having"},
+ {ORTH: "havin", NORM: "having"},
+ {ORTH: "Havin", NORM: "having"},
+ {ORTH: "doin'", NORM: "doing"},
+ {ORTH: "Doin'", NORM: "doing"},
+ {ORTH: "doin", NORM: "doing"},
+ {ORTH: "Doin", NORM: "doing"},
+ {ORTH: "goin'", NORM: "going"},
+ {ORTH: "Goin'", NORM: "going"},
+ {ORTH: "goin", NORM: "going"},
+ {ORTH: "Goin", NORM: "going"},
+ {ORTH: "Mt.", NORM: "Mount"},
+ {ORTH: "Ak.", NORM: "Alaska"},
+ {ORTH: "Ala.", NORM: "Alabama"},
+ {ORTH: "Apr.", NORM: "April"},
+ {ORTH: "Ariz.", NORM: "Arizona"},
+ {ORTH: "Ark.", NORM: "Arkansas"},
+ {ORTH: "Aug.", NORM: "August"},
+ {ORTH: "Calif.", NORM: "California"},
+ {ORTH: "Colo.", NORM: "Colorado"},
+ {ORTH: "Conn.", NORM: "Connecticut"},
+ {ORTH: "Dec.", NORM: "December"},
+ {ORTH: "Del.", NORM: "Delaware"},
+ {ORTH: "Feb.", NORM: "February"},
+ {ORTH: "Fla.", NORM: "Florida"},
+ {ORTH: "Ga.", NORM: "Georgia"},
+ {ORTH: "Ia.", NORM: "Iowa"},
+ {ORTH: "Id.", NORM: "Idaho"},
+ {ORTH: "Ill.", NORM: "Illinois"},
+ {ORTH: "Ind.", NORM: "Indiana"},
+ {ORTH: "Jan.", NORM: "January"},
+ {ORTH: "Jul.", NORM: "July"},
+ {ORTH: "Jun.", NORM: "June"},
+ {ORTH: "Kan.", NORM: "Kansas"},
+ {ORTH: "Kans.", NORM: "Kansas"},
+ {ORTH: "Ky.", NORM: "Kentucky"},
+ {ORTH: "La.", NORM: "Louisiana"},
+ {ORTH: "Mar.", NORM: "March"},
+ {ORTH: "Mass.", NORM: "Massachusetts"},
+ {ORTH: "May.", NORM: "May"},
+ {ORTH: "Mich.", NORM: "Michigan"},
+ {ORTH: "Minn.", NORM: "Minnesota"},
+ {ORTH: "Miss.", NORM: "Mississippi"},
+ {ORTH: "N.C.", NORM: "North Carolina"},
+ {ORTH: "N.D.", NORM: "North Dakota"},
+ {ORTH: "N.H.", NORM: "New Hampshire"},
+ {ORTH: "N.J.", NORM: "New Jersey"},
+ {ORTH: "N.M.", NORM: "New Mexico"},
+ {ORTH: "N.Y.", NORM: "New York"},
+ {ORTH: "Neb.", NORM: "Nebraska"},
+ {ORTH: "Nebr.", NORM: "Nebraska"},
+ {ORTH: "Nev.", NORM: "Nevada"},
+ {ORTH: "Nov.", NORM: "November"},
+ {ORTH: "Oct.", NORM: "October"},
+ {ORTH: "Okla.", NORM: "Oklahoma"},
+ {ORTH: "Ore.", NORM: "Oregon"},
+ {ORTH: "Pa.", NORM: "Pennsylvania"},
+ {ORTH: "S.C.", NORM: "South Carolina"},
+ {ORTH: "Sep.", NORM: "September"},
+ {ORTH: "Sept.", NORM: "September"},
+ {ORTH: "Tenn.", NORM: "Tennessee"},
+ {ORTH: "Va.", NORM: "Virginia"},
+ {ORTH: "Wash.", NORM: "Washington"},
+ {ORTH: "Wis.", NORM: "Wisconsin"},
]:
_exc[exc_data[ORTH]] = [exc_data]
@@ -552,4 +522,4 @@ for string in _exclude:
_exc.pop(string)
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
diff --git a/spacy/lang/es/__init__.py b/spacy/lang/es/__init__.py
index 249748a17..9a47855b1 100644
--- a/spacy/lang/es/__init__.py
+++ b/spacy/lang/es/__init__.py
@@ -1,33 +1,18 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .tag_map import TAG_MAP
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
from .syntax_iterators import SYNTAX_ITERATORS
from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ..norm_exceptions import BASE_NORMS
from ...language import Language
-from ...attrs import LANG, NORM
-from ...util import update_exc, add_lookups
class SpanishDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters.update(LEX_ATTRS)
- lex_attr_getters[LANG] = lambda text: "es"
- lex_attr_getters[NORM] = add_lookups(
- Language.Defaults.lex_attr_getters[NORM], BASE_NORMS
- )
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
- tag_map = TAG_MAP
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
infixes = TOKENIZER_INFIXES
suffixes = TOKENIZER_SUFFIXES
- stop_words = STOP_WORDS
+ lex_attr_getters = LEX_ATTRS
syntax_iterators = SYNTAX_ITERATORS
+ stop_words = STOP_WORDS
class Spanish(Language):
diff --git a/spacy/lang/es/examples.py b/spacy/lang/es/examples.py
index 7ab0a7dfe..2bcbd8740 100644
--- a/spacy/lang/es/examples.py
+++ b/spacy/lang/es/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/es/lex_attrs.py b/spacy/lang/es/lex_attrs.py
index 632a638fc..988dbaba1 100644
--- a/spacy/lang/es/lex_attrs.py
+++ b/spacy/lang/es/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ...attrs import LIKE_NUM
diff --git a/spacy/lang/es/punctuation.py b/spacy/lang/es/punctuation.py
index f989221c2..e9552371e 100644
--- a/spacy/lang/es/punctuation.py
+++ b/spacy/lang/es/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES
from ..char_classes import LIST_ICONS, CURRENCY, LIST_UNITS, PUNCT
from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA
diff --git a/spacy/lang/es/stop_words.py b/spacy/lang/es/stop_words.py
index 20e929b48..004df4fca 100644
--- a/spacy/lang/es/stop_words.py
+++ b/spacy/lang/es/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
STOP_WORDS = set(
"""
actualmente acuerdo adelante ademas además adrede afirmó agregó ahi ahora ahí
diff --git a/spacy/lang/es/syntax_iterators.py b/spacy/lang/es/syntax_iterators.py
index d4572b682..427f1f203 100644
--- a/spacy/lang/es/syntax_iterators.py
+++ b/spacy/lang/es/syntax_iterators.py
@@ -1,16 +1,15 @@
-# coding: utf8
-from __future__ import unicode_literals
+from typing import Union, Iterator, Optional, List, Tuple
from ...symbols import NOUN, PROPN, PRON, VERB, AUX
from ...errors import Errors
+from ...tokens import Doc, Span, Token
-def noun_chunks(doclike):
+def noun_chunks(doclike: Union[Doc, Span]) -> Iterator[Span]:
+ """Detect base noun phrases from a dependency parse. Works on Doc and Span."""
doc = doclike.doc
-
if not doc.is_parsed:
raise ValueError(Errors.E029)
-
if not len(doc):
return
np_label = doc.vocab.strings.add("NP")
@@ -30,18 +29,24 @@ def noun_chunks(doclike):
token = next_token(token)
-def is_verb_token(token):
+def is_verb_token(token: Token) -> bool:
return token.pos in [VERB, AUX]
-def next_token(token):
+def next_token(token: Token) -> Optional[Token]:
try:
return token.nbor()
except IndexError:
return None
-def noun_bounds(doc, root, np_left_deps, np_right_deps, stop_deps):
+def noun_bounds(
+ doc: Doc,
+ root: Token,
+ np_left_deps: List[str],
+ np_right_deps: List[str],
+ stop_deps: List[str],
+) -> Tuple[Token, Token]:
left_bound = root
for token in reversed(list(root.lefts)):
if token.dep in np_left_deps:
@@ -52,12 +57,8 @@ def noun_bounds(doc, root, np_left_deps, np_right_deps, stop_deps):
left, right = noun_bounds(
doc, token, np_left_deps, np_right_deps, stop_deps
)
- if list(
- filter(
- lambda t: is_verb_token(t) or t.dep in stop_deps,
- doc[left_bound.i : right.i],
- )
- ):
+ filter_func = lambda t: is_verb_token(t) or t.dep in stop_deps
+ if list(filter(filter_func, doc[left_bound.i : right.i],)):
break
else:
right_bound = right
diff --git a/spacy/lang/es/tag_map.py b/spacy/lang/es/tag_map.py
deleted file mode 100644
index 7a7c9d549..000000000
--- a/spacy/lang/es/tag_map.py
+++ /dev/null
@@ -1,313 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, PUNCT, SYM, ADJ, NUM, DET, ADV, ADP, X, VERB
-from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON, SCONJ, AUX, CONJ
-
-# fmt: off
-TAG_MAP = {
- "ADJ___": {"morph": "_", POS: ADJ},
- "ADJ__AdpType=Prep": {"morph": "AdpType=Prep", POS: ADJ},
- "ADJ__AdpType=Preppron|Gender=Masc|Number=Sing": {"morph": "AdpType=Preppron|Gender=Masc|Number=Sing", POS: ADV},
- "ADJ__AdvType=Tim": {"morph": "AdvType=Tim", POS: ADJ},
- "ADJ__Gender=Fem|Number=Plur": {"morph": "Gender=Fem|Number=Plur", POS: ADJ},
- "ADJ__Gender=Fem|Number=Plur|NumType=Ord": {"morph": "Gender=Fem|Number=Plur|NumType=Ord", POS: ADJ},
- "ADJ__Gender=Fem|Number=Plur|VerbForm=Part": {"morph": "Gender=Fem|Number=Plur|VerbForm=Part", POS: ADJ},
- "ADJ__Gender=Fem|Number=Sing": {"morph": "Gender=Fem|Number=Sing", POS: ADJ},
- "ADJ__Gender=Fem|Number=Sing|NumType=Ord": {"morph": "Gender=Fem|Number=Sing|NumType=Ord", POS: ADJ},
- "ADJ__Gender=Fem|Number=Sing|VerbForm=Part": {"morph": "Gender=Fem|Number=Sing|VerbForm=Part", POS: ADJ},
- "ADJ__Gender=Masc": {"morph": "Gender=Masc", POS: ADJ},
- "ADJ__Gender=Masc|Number=Plur": {"morph": "Gender=Masc|Number=Plur", POS: ADJ},
- "ADJ__Gender=Masc|Number=Plur|NumType=Ord": {"morph": "Gender=Masc|Number=Plur|NumType=Ord", POS: ADJ},
- "ADJ__Gender=Masc|Number=Plur|VerbForm=Part": {"morph": "Gender=Masc|Number=Plur|VerbForm=Part", POS: ADJ},
- "ADJ__Gender=Masc|Number=Sing": {"morph": "Gender=Masc|Number=Sing", POS: ADJ},
- "ADJ__Gender=Masc|Number=Sing|NumType=Ord": {"morph": "Gender=Masc|Number=Sing|NumType=Ord", POS: ADJ},
- "ADJ__Gender=Masc|Number=Sing|VerbForm=Part": {"morph": "Gender=Masc|Number=Sing|VerbForm=Part", POS: ADJ},
- "ADJ__Number=Plur": {"morph": "Number=Plur", POS: ADJ},
- "ADJ__Number=Sing": {"morph": "Number=Sing", POS: ADJ},
- "ADP__AdpType=Prep": {"morph": "AdpType=Prep", POS: ADP},
- "ADP__AdpType=Preppron|Gender=Fem|Number=Sing": {"morph": "AdpType=Preppron|Gender=Fem|Number=Sing", POS: ADP},
- "ADP__AdpType=Preppron|Gender=Masc|Number=Plur": {"morph": "AdpType=Preppron|Gender=Masc|Number=Plur", POS: ADP},
- "ADP__AdpType=Preppron|Gender=Masc|Number=Sing": {"morph": "AdpType=Preppron|Gender=Masc|Number=Sing", POS: ADP},
- "ADP": {POS: ADP},
- "ADV___": {"morph": "_", POS: ADV},
- "ADV__AdpType=Prep": {"morph": "AdpType=Prep", POS: ADV},
- "ADV__AdpType=Preppron|Gender=Masc|Number=Sing": {"morph": "AdpType=Preppron|Gender=Masc|Number=Sing", POS: ADV},
- "ADV__AdvType=Tim": {"morph": "AdvType=Tim", POS: ADV},
- "ADV__Gender=Masc|Number=Sing": {"morph": "Gender=Masc|Number=Sing", POS: ADV},
- "ADV__Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", POS: ADV},
- "ADV__Negative=Neg": {"morph": "Negative=Neg", POS: ADV},
- "ADV__Number=Plur": {"morph": "Number=Plur", POS: ADV},
- "ADV__Polarity=Neg": {"morph": "Polarity=Neg", POS: ADV},
- "AUX__Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part": {"morph": "Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part", POS: AUX},
- "AUX__Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part": {"morph": "Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part", POS: AUX},
- "AUX__Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part": {"morph": "Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part", POS: AUX},
- "AUX__Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part": {"morph": "Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part", POS: AUX},
- "AUX__Mood=Cnd|Number=Plur|Person=1|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Plur|Person=1|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Cnd|Number=Plur|Person=3|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Plur|Person=3|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Cnd|Number=Sing|Person=1|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Sing|Person=1|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Cnd|Number=Sing|Person=2|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Sing|Person=2|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Imp|Number=Plur|Person=3|VerbForm=Fin": {"morph": "Mood=Imp|Number=Plur|Person=3|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Imp|Number=Sing|Person=2|VerbForm=Fin": {"morph": "Mood=Imp|Number=Sing|Person=2|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Imp|Number=Sing|Person=3|VerbForm=Fin": {"morph": "Mood=Imp|Number=Sing|Person=3|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Plur|Person=1|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Past|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Sing|Person=2|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Imp|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Sub|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Sub|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Sub|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Sub|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Sub|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", POS: AUX},
- "AUX__VerbForm=Ger": {"morph": "VerbForm=Ger", POS: AUX},
- "AUX__VerbForm=Inf": {"morph": "VerbForm=Inf", POS: AUX},
- "CCONJ___": {"morph": "_", POS: CONJ},
- "CONJ___": {"morph": "_", POS: CONJ},
- "DET__Definite=Def|Gender=Fem|Number=Plur|PronType=Art": {"morph": "Definite=Def|Gender=Fem|Number=Plur|PronType=Art", POS: DET},
- "DET__Definite=Def|Gender=Fem|Number=Sing|PronType=Art": {"morph": "Definite=Def|Gender=Fem|Number=Sing|PronType=Art", POS: DET},
- "DET__Definite=Def|Gender=Masc|Number=Plur|PronType=Art": {"morph": "Definite=Def|Gender=Masc|Number=Plur|PronType=Art", POS: DET},
- "DET__Definite=Def|Gender=Masc|Number=Sing|PronType=Art": {"morph": "Definite=Def|Gender=Masc|Number=Sing|PronType=Art", POS: DET},
- "DET__Definite=Def|Gender=Masc|PronType=Art": {"morph": "Definite=Def|Gender=Masc|PronType=Art", POS: DET},
- "DET__Definite=Def|Number=Sing|PronType=Art": {"morph": "Definite=Def|Number=Sing|PronType=Art", POS: DET},
- "DET__Definite=Ind|Gender=Fem|Number=Plur|PronType=Art": {"morph": "Definite=Ind|Gender=Fem|Number=Plur|PronType=Art", POS: DET},
- "DET__Definite=Ind|Gender=Fem|Number=Sing|NumType=Card|PronType=Art": {"morph": "Definite=Ind|Gender=Fem|Number=Sing|NumType=Card|PronType=Art", POS: DET},
- "DET__Definite=Ind|Gender=Fem|Number=Sing|PronType=Art": {"morph": "Definite=Ind|Gender=Fem|Number=Sing|PronType=Art", POS: DET},
- "DET__Definite=Ind|Gender=Masc|Number=Plur|PronType=Art": {"morph": "Definite=Ind|Gender=Masc|Number=Plur|PronType=Art", POS: DET},
- "DET__Definite=Ind|Gender=Masc|Number=Sing|NumType=Card|PronType=Art": {"morph": "Definite=Ind|Gender=Masc|Number=Sing|NumType=Card|PronType=Art", POS: DET},
- "DET__Definite=Ind|Gender=Masc|Number=Sing|PronType=Art": {"morph": "Definite=Ind|Gender=Masc|Number=Sing|PronType=Art", POS: DET},
- "DET__Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", POS: DET},
- "DET__Gender=Fem|Number=Plur|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Plur|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs", POS: DET},
- "DET__Gender=Fem|Number=Plur|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Plur|Person=3|Poss=Yes|PronType=Prs", POS: DET},
- "DET__Gender=Fem|Number=Plur|PronType=Art": {"morph": "Gender=Fem|Number=Plur|PronType=Art", POS: DET},
- "DET__Gender=Fem|Number=Plur|PronType=Dem": {"morph": "Gender=Fem|Number=Plur|PronType=Dem", POS: DET},
- "DET__Gender=Fem|Number=Plur|PronType=Ind": {"morph": "Gender=Fem|Number=Plur|PronType=Ind", POS: DET},
- "DET__Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", POS: DET},
- "DET__Gender=Fem|Number=Sing|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs", POS: DET},
- "DET__Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", POS: DET},
- "DET__Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs", POS: DET},
- "DET__Gender=Fem|Number=Sing|PronType=Art": {"morph": "Gender=Fem|Number=Sing|PronType=Art", POS: DET},
- "DET__Gender=Fem|Number=Sing|PronType=Dem": {"morph": "Gender=Fem|Number=Sing|PronType=Dem", POS: DET},
- "DET__Gender=Fem|Number=Sing|PronType=Ind": {"morph": "Gender=Fem|Number=Sing|PronType=Ind", POS: DET},
- "DET__Gender=Fem|Number=Sing|PronType=Int": {"morph": "Gender=Fem|Number=Sing|PronType=Int", POS: DET},
- "DET__Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", POS: DET},
- "DET__Gender=Masc|Number=Plur|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Plur|Person=3|Poss=Yes|PronType=Prs", POS: DET},
- "DET__Gender=Masc|Number=Plur|PronType=Art": {"morph": "Gender=Masc|Number=Plur|PronType=Art", POS: DET},
- "DET__Gender=Masc|Number=Plur|PronType=Dem": {"morph": "Gender=Masc|Number=Plur|PronType=Dem", POS: DET},
- "DET__Gender=Masc|Number=Plur|PronType=Ind": {"morph": "Gender=Masc|Number=Plur|PronType=Ind", POS: DET},
- "DET__Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", POS: DET},
- "DET__Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", POS: DET},
- "DET__Gender=Masc|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Person=3|Poss=Yes|PronType=Prs", POS: DET},
- "DET__Gender=Masc|Number=Sing|PronType=Art": {"morph": "Gender=Masc|Number=Sing|PronType=Art", POS: DET},
- "DET__Gender=Masc|Number=Sing|PronType=Dem": {"morph": "Gender=Masc|Number=Sing|PronType=Dem", POS: DET},
- "DET__Gender=Masc|Number=Sing|PronType=Ind": {"morph": "Gender=Masc|Number=Sing|PronType=Ind", POS: DET},
- "DET__Gender=Masc|Number=Sing|PronType=Int": {"morph": "Gender=Masc|Number=Sing|PronType=Int", POS: DET},
- "DET__Gender=Masc|Number=Sing|PronType=Tot": {"morph": "Gender=Masc|Number=Sing|PronType=Tot", POS: DET},
- "DET__Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {"morph": "Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", POS: DET},
- "DET__Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {"morph": "Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs", POS: DET},
- "DET__Number=Plur|Person=3|Poss=Yes|PronType=Prs": {"morph": "Number=Plur|Person=3|Poss=Yes|PronType=Prs", POS: DET},
- "DET__Number=Plur|PronType=Dem": {"morph": "Number=Plur|PronType=Dem", POS: DET},
- "DET__Number=Plur|PronType=Ind": {"morph": "Number=Plur|PronType=Ind", POS: DET},
- "DET__Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {"morph": "Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", POS: DET},
- "DET__Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {"morph": "Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs", POS: DET},
- "DET__Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"morph": "Number=Sing|Person=3|Poss=Yes|PronType=Prs", POS: DET},
- "DET__Number=Sing|PronType=Dem": {"morph": "Number=Sing|PronType=Dem", POS: DET},
- "DET__Number=Sing|PronType=Ind": {"morph": "Number=Sing|PronType=Ind", POS: DET},
- "DET__PronType=Int": {"morph": "PronType=Int", POS: DET},
- "DET__PronType=Rel": {"morph": "PronType=Rel", POS: DET},
- "DET": {POS: DET},
- "INTJ___": {"morph": "_", POS: INTJ},
- "NOUN___": {"morph": "_", POS: NOUN},
- "NOUN__AdvType=Tim": {"morph": "AdvType=Tim", POS: NOUN},
- "NOUN__AdvType=Tim|Gender=Masc|Number=Sing": {"morph": "AdvType=Tim|Gender=Masc|Number=Sing", POS: NOUN},
- "NOUN__Gender=Fem": {"morph": "Gender=Fem", POS: NOUN},
- "NOUN__Gender=Fem|Number=Plur": {"morph": "Gender=Fem|Number=Plur", POS: NOUN},
- "NOUN__Gender=Fem|Number=Sing": {"morph": "Gender=Fem|Number=Sing", POS: NOUN},
- "NOUN__Gender=Masc": {"morph": "Gender=Masc", POS: NOUN},
- "NOUN__Gender=Masc|Number=Plur": {"morph": "Gender=Masc|Number=Plur", POS: NOUN},
- "NOUN__Gender=Masc|Number=Sing": {"morph": "Gender=Masc|Number=Sing", POS: NOUN},
- "NOUN__Gender=Masc|Number=Sing|VerbForm=Part": {"morph": "Gender=Masc|Number=Sing|VerbForm=Part", POS: NOUN},
- "NOUN__Number=Plur": {"morph": "Number=Plur", POS: NOUN},
- "NOUN__Number=Sing": {"morph": "Number=Sing", POS: NOUN},
- "NOUN__NumForm=Digit": {"morph": "NumForm=Digit", POS: NOUN},
- "NUM__Gender=Fem|Number=Plur|NumType=Card": {"morph": "Gender=Fem|Number=Plur|NumType=Card", POS: NUM},
- "NUM__Gender=Fem|Number=Sing|NumType=Card": {"morph": "Gender=Fem|Number=Sing|NumType=Card", POS: NUM},
- "NUM__Gender=Masc|Number=Plur|NumType=Card": {"morph": "Gender=Masc|Number=Plur|NumType=Card", POS: NUM},
- "NUM__Gender=Masc|Number=Sing|NumType=Card": {"morph": "Gender=Masc|Number=Sing|NumType=Card", POS: NUM},
- "NUM__Number=Plur|NumType=Card": {"morph": "Number=Plur|NumType=Card", POS: NUM},
- "NUM__Number=Sing|NumType=Card": {"morph": "Number=Sing|NumType=Card", POS: NUM},
- "NUM__NumForm=Digit": {"morph": "NumForm=Digit", POS: NUM},
- "NUM__NumForm=Digit|NumType=Card": {"morph": "NumForm=Digit|NumType=Card", POS: NUM},
- "NUM__NumForm=Digit|NumType=Frac": {"morph": "NumForm=Digit|NumType=Frac", POS: NUM},
- "NUM__NumType=Card": {"morph": "NumType=Card", POS: NUM},
- "PART___": {"morph": "_", POS: PART},
- "PART__Negative=Neg": {"morph": "Negative=Neg", POS: PART},
- "PRON___": {"morph": "_", POS: PRON},
- "PRON__Case=Acc|Gender=Fem|Number=Plur|Person=3|PronType=Prs": {"morph": "Case=Acc|Gender=Fem|Number=Plur|Person=3|PronType=Prs", POS: PRON},
- "PRON__Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs": {"morph": "Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs", POS: PRON},
- "PRON__Case=Acc|Gender=Masc|Number=Plur|Person=3|PronType=Prs": {"morph": "Case=Acc|Gender=Masc|Number=Plur|Person=3|PronType=Prs", POS: PRON},
- "PRON__Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs": {"morph": "Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs", POS: PRON},
- "PRON__Case=Acc|Number=Plur|Person=3|PronType=Prs": {"morph": "Case=Acc|Number=Plur|Person=3|PronType=Prs", POS: PRON},
- "PRON__Case=Acc|Number=Sing|Person=3|PronType=Prs": {"morph": "Case=Acc|Number=Sing|Person=3|PronType=Prs", POS: PRON},
- "PRON__Case=Acc|Person=3|PronType=Prs": {"morph": "Case=Acc|Person=3|PronType=Prs", POS: PRON},
- "PRON__Case=Dat|Number=Plur|Person=3|PronType=Prs": {"morph": "Case=Dat|Number=Plur|Person=3|PronType=Prs", POS: PRON},
- "PRON__Case=Dat|Number=Sing|Person=3|PronType=Prs": {"morph": "Case=Dat|Number=Sing|Person=3|PronType=Prs", POS: PRON},
- "PRON__Case=Nom|Number=Sing|Person=1|PronType=Prs": {"morph": "Case=Nom|Number=Sing|Person=1|PronType=Prs", POS: PRON},
- "PRON__Case=Nom|Number=Sing|Person=2|PronType=Prs": {"morph": "Case=Nom|Number=Sing|Person=2|PronType=Prs", POS: PRON},
- "PRON__Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", POS: PRON},
- "PRON__Gender=Fem|Number=Plur|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Plur|Person=3|Poss=Yes|PronType=Prs", POS: PRON},
- "PRON__Gender=Fem|Number=Plur|Person=3|PronType=Prs": {"morph": "Gender=Fem|Number=Plur|Person=3|PronType=Prs", POS: PRON},
- "PRON__Gender=Fem|Number=Plur|PronType=Dem": {"morph": "Gender=Fem|Number=Plur|PronType=Dem", POS: PRON},
- "PRON__Gender=Fem|Number=Plur|PronType=Ind": {"morph": "Gender=Fem|Number=Plur|PronType=Ind", POS: PRON},
- "PRON__Gender=Fem|Number=Plur|PronType=Int": {"morph": "Gender=Fem|Number=Plur|PronType=Int", POS: PRON},
- "PRON__Gender=Fem|Number=Plur|PronType=Rel": {"morph": "Gender=Fem|Number=Plur|PronType=Rel", POS: PRON},
- "PRON__Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", POS: PRON},
- "PRON__Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", POS: PRON},
- "PRON__Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs", POS: PRON},
- "PRON__Gender=Fem|Number=Sing|Person=1|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Person=1|PronType=Prs", POS: PRON},
- "PRON__Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs", POS: PRON},
- "PRON__Gender=Fem|Number=Sing|Person=3|PronType=Prs": {"morph": "Gender=Fem|Number=Sing|Person=3|PronType=Prs", POS: PRON},
- "PRON__Gender=Fem|Number=Sing|PronType=Dem": {"morph": "Gender=Fem|Number=Sing|PronType=Dem", POS: PRON},
- "PRON__Gender=Fem|Number=Sing|PronType=Ind": {"morph": "Gender=Fem|Number=Sing|PronType=Ind", POS: PRON},
- "PRON__Gender=Fem|Number=Sing|PronType=Rel": {"morph": "Gender=Fem|Number=Sing|PronType=Rel", POS: PRON},
- "PRON__Gender=Masc|Number=Plur|Person=1|PronType=Prs": {"morph": "Gender=Masc|Number=Plur|Person=1|PronType=Prs", POS: PRON},
- "PRON__Gender=Masc|Number=Plur|Person=2|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Plur|Person=2|Poss=Yes|PronType=Prs", POS: PRON},
- "PRON__Gender=Masc|Number=Plur|Person=3|PronType=Prs": {"morph": "Gender=Masc|Number=Plur|Person=3|PronType=Prs", POS: PRON},
- "PRON__Gender=Masc|Number=Plur|PronType=Dem": {"morph": "Gender=Masc|Number=Plur|PronType=Dem", POS: PRON},
- "PRON__Gender=Masc|Number=Plur|PronType=Ind": {"morph": "Gender=Masc|Number=Plur|PronType=Ind", POS: PRON},
- "PRON__Gender=Masc|Number=Plur|PronType=Int": {"morph": "Gender=Masc|Number=Plur|PronType=Int", POS: PRON},
- "PRON__Gender=Masc|Number=Plur|PronType=Rel": {"morph": "Gender=Masc|Number=Plur|PronType=Rel", POS: PRON},
- "PRON__Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs", POS: PRON},
- "PRON__Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs", POS: PRON},
- "PRON__Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs", POS: PRON},
- "PRON__Gender=Masc|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Person=3|Poss=Yes|PronType=Prs", POS: PRON},
- "PRON__Gender=Masc|Number=Sing|Person=3|PronType=Prs": {"morph": "Gender=Masc|Number=Sing|Person=3|PronType=Prs", POS: PRON},
- "PRON__Gender=Masc|Number=Sing|PronType=Dem": {"morph": "Gender=Masc|Number=Sing|PronType=Dem", POS: PRON},
- "PRON__Gender=Masc|Number=Sing|PronType=Ind": {"morph": "Gender=Masc|Number=Sing|PronType=Ind", POS: PRON},
- "PRON__Gender=Masc|Number=Sing|PronType=Int": {"morph": "Gender=Masc|Number=Sing|PronType=Int", POS: PRON},
- "PRON__Gender=Masc|Number=Sing|PronType=Rel": {"morph": "Gender=Masc|Number=Sing|PronType=Rel", POS: PRON},
- "PRON__Gender=Masc|Number=Sing|PronType=Tot": {"morph": "Gender=Masc|Number=Sing|PronType=Tot", POS: PRON},
- "PRON__Number=Plur|Person=1": {"morph": "Number=Plur|Person=1", POS: PRON},
- "PRON__Number=Plur|Person=1|PronType=Prs": {"morph": "Number=Plur|Person=1|PronType=Prs", POS: PRON},
- "PRON__Number=Plur|Person=2|Polite=Form|PronType=Prs": {"morph": "Number=Plur|Person=2|Polite=Form|PronType=Prs", POS: PRON},
- "PRON__Number=Plur|Person=2|PronType=Prs": {"morph": "Number=Plur|Person=2|PronType=Prs", POS: PRON},
- "PRON__Number=Plur|Person=3|Poss=Yes|PronType=Prs": {"morph": "Number=Plur|Person=3|Poss=Yes|PronType=Prs", POS: PRON},
- "PRON__Number=Plur|Person=3|PronType=Prs": {"morph": "Number=Plur|Person=3|PronType=Prs", POS: PRON},
- "PRON__Number=Plur|PronType=Dem": {"morph": "Number=Plur|PronType=Dem", POS: PRON},
- "PRON__Number=Plur|PronType=Ind": {"morph": "Number=Plur|PronType=Ind", POS: PRON},
- "PRON__Number=Plur|PronType=Int": {"morph": "Number=Plur|PronType=Int", POS: PRON},
- "PRON__Number=Plur|PronType=Rel": {"morph": "Number=Plur|PronType=Rel", POS: PRON},
- "PRON__Number=Sing|Person=1": {"morph": "Number=Sing|Person=1", POS: PRON},
- "PRON__Number=Sing|Person=1|PrepCase=Pre|PronType=Prs": {"morph": "Number=Sing|Person=1|PrepCase=Pre|PronType=Prs", POS: PRON},
- "PRON__Number=Sing|Person=1|PronType=Prs": {"morph": "Number=Sing|Person=1|PronType=Prs", POS: PRON},
- "PRON__Number=Sing|Person=2": {"morph": "Number=Sing|Person=2", POS: PRON},
- "PRON__Number=Sing|Person=2|Polite=Form|PronType=Prs": {"morph": "Number=Sing|Person=2|Polite=Form|PronType=Prs", POS: PRON},
- "PRON__Number=Sing|Person=2|PrepCase=Pre|PronType=Prs": {"morph": "Number=Sing|Person=2|PrepCase=Pre|PronType=Prs", POS: PRON},
- "PRON__Number=Sing|Person=2|PronType=Prs": {"morph": "Number=Sing|Person=2|PronType=Prs", POS: PRON},
- "PRON__Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"morph": "Number=Sing|Person=3|Poss=Yes|PronType=Prs", POS: PRON},
- "PRON__Number=Sing|Person=3|PronType=Prs": {"morph": "Number=Sing|Person=3|PronType=Prs", POS: PRON},
- "PRON__Number=Sing|PronType=Dem": {"morph": "Number=Sing|PronType=Dem", POS: PRON},
- "PRON__Number=Sing|PronType=Ind": {"morph": "Number=Sing|PronType=Ind", POS: PRON},
- "PRON__Number=Sing|PronType=Int": {"morph": "Number=Sing|PronType=Int", POS: PRON},
- "PRON__Number=Sing|PronType=Rel": {"morph": "Number=Sing|PronType=Rel", POS: PRON},
- "PRON__Person=1|PronType=Prs": {"morph": "Person=1|PronType=Prs", POS: PRON},
- "PRON__Person=3": {"morph": "Person=3", POS: PRON},
- "PRON__Person=3|PrepCase=Pre|PronType=Prs": {"morph": "Person=3|PrepCase=Pre|PronType=Prs", POS: PRON},
- "PRON__Person=3|PronType=Prs": {"morph": "Person=3|PronType=Prs", POS: PRON},
- "PRON__PronType=Ind": {"morph": "PronType=Ind", POS: PRON},
- "PRON__PronType=Int": {"morph": "PronType=Int", POS: PRON},
- "PRON__PronType=Rel": {"morph": "PronType=Rel", POS: PRON},
- "PROPN___": {"morph": "_", POS: PROPN},
- "PUNCT___": {"morph": "_", POS: PUNCT},
- "PUNCT__PunctSide=Fin|PunctType=Brck": {"morph": "PunctSide=Fin|PunctType=Brck", POS: PUNCT},
- "PUNCT__PunctSide=Fin|PunctType=Excl": {"morph": "PunctSide=Fin|PunctType=Excl", POS: PUNCT},
- "PUNCT__PunctSide=Fin|PunctType=Qest": {"morph": "PunctSide=Fin|PunctType=Qest", POS: PUNCT},
- "PUNCT__PunctSide=Ini|PunctType=Brck": {"morph": "PunctSide=Ini|PunctType=Brck", POS: PUNCT},
- "PUNCT__PunctSide=Ini|PunctType=Excl": {"morph": "PunctSide=Ini|PunctType=Excl", POS: PUNCT},
- "PUNCT__PunctSide=Ini|PunctType=Qest": {"morph": "PunctSide=Ini|PunctType=Qest", POS: PUNCT},
- "PUNCT__PunctType=Colo": {"morph": "PunctType=Colo", POS: PUNCT},
- "PUNCT__PunctType=Comm": {"morph": "PunctType=Comm", POS: PUNCT},
- "PUNCT__PunctType=Dash": {"morph": "PunctType=Dash", POS: PUNCT},
- "PUNCT__PunctType=Peri": {"morph": "PunctType=Peri", POS: PUNCT},
- "PUNCT__PunctType=Quot": {"morph": "PunctType=Quot", POS: PUNCT},
- "PUNCT__PunctType=Semi": {"morph": "PunctType=Semi", POS: PUNCT},
- "SCONJ___": {"morph": "_", POS: SCONJ},
- "SYM___": {"morph": "_", POS: SYM},
- "SYM__NumForm=Digit": {"morph": "NumForm=Digit", POS: SYM},
- "SYM__NumForm=Digit|NumType=Frac": {"morph": "NumForm=Digit|NumType=Frac", POS: SYM},
- "VERB__Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part": {"morph": "Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part", POS: VERB},
- "VERB__Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part": {"morph": "Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part", POS: VERB},
- "VERB__Gender=Masc|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Gender=Masc|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", POS: VERB},
- "VERB__Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part": {"morph": "Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part", POS: VERB},
- "VERB__Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part": {"morph": "Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part", POS: VERB},
- "VERB__Mood=Cnd|Number=Plur|Person=1|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Plur|Person=1|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Cnd|Number=Plur|Person=3|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Plur|Person=3|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Cnd|Number=Sing|Person=1|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Sing|Person=1|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Cnd|Number=Sing|Person=2|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Sing|Person=2|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin": {"morph": "Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Imp|Number=Plur|Person=1|VerbForm=Fin": {"morph": "Mood=Imp|Number=Plur|Person=1|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Imp|Number=Plur|Person=2|VerbForm=Fin": {"morph": "Mood=Imp|Number=Plur|Person=2|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Imp|Number=Plur|Person=3|VerbForm=Fin": {"morph": "Mood=Imp|Number=Plur|Person=3|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Imp|Number=Sing|Person=2|VerbForm=Fin": {"morph": "Mood=Imp|Number=Sing|Person=2|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Imp|Number=Sing|Person=3|VerbForm=Fin": {"morph": "Mood=Imp|Number=Sing|Person=3|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Past|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=2|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Imp|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=2|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Past|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Ind|Person=3|Tense=Pres|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Sub|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Sub|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Sub|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Sub|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Sub|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Sub|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {"morph": "Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin", POS: VERB},
- "VERB__VerbForm=Ger": {"morph": "VerbForm=Ger", POS: VERB},
- "VERB__VerbForm=Inf": {"morph": "VerbForm=Inf", POS: VERB},
- "X___": {"morph": "_", POS: X},
- "___PunctType=Quot": {POS: PUNCT},
- "___VerbForm=Inf": {POS: VERB},
- "___Number=Sing|Person=2|PronType=Prs": {POS: PRON},
- "_SP": {"morph": "_", POS: SPACE},
-}
-# fmt: on
diff --git a/spacy/lang/es/tokenizer_exceptions.py b/spacy/lang/es/tokenizer_exceptions.py
index 891323705..fbfe75545 100644
--- a/spacy/lang/es/tokenizer_exceptions.py
+++ b/spacy/lang/es/tokenizer_exceptions.py
@@ -1,42 +1,42 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import ORTH, LEMMA, NORM, PRON_LEMMA
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import ORTH, NORM
+from ...util import update_exc
-_exc = {}
+_exc = {
+ "pal": [{ORTH: "pa"}, {ORTH: "l", NORM: "el"}],
+}
for exc_data in [
- {ORTH: "n°", LEMMA: "número"},
- {ORTH: "°C", LEMMA: "grados Celcius"},
- {ORTH: "aprox.", LEMMA: "aproximadamente"},
- {ORTH: "dna.", LEMMA: "docena"},
- {ORTH: "dpto.", LEMMA: "departamento"},
- {ORTH: "ej.", LEMMA: "ejemplo"},
- {ORTH: "esq.", LEMMA: "esquina"},
- {ORTH: "pág.", LEMMA: "página"},
- {ORTH: "p.ej.", LEMMA: "por ejemplo"},
- {ORTH: "Ud.", LEMMA: PRON_LEMMA, NORM: "usted"},
- {ORTH: "Vd.", LEMMA: PRON_LEMMA, NORM: "usted"},
- {ORTH: "Uds.", LEMMA: PRON_LEMMA, NORM: "ustedes"},
- {ORTH: "Vds.", LEMMA: PRON_LEMMA, NORM: "ustedes"},
+ {ORTH: "n°"},
+ {ORTH: "°C"},
+ {ORTH: "aprox."},
+ {ORTH: "dna."},
+ {ORTH: "dpto."},
+ {ORTH: "ej."},
+ {ORTH: "esq."},
+ {ORTH: "pág."},
+ {ORTH: "p.ej."},
+ {ORTH: "Ud.", NORM: "usted"},
+ {ORTH: "Vd.", NORM: "usted"},
+ {ORTH: "Uds.", NORM: "ustedes"},
+ {ORTH: "Vds.", NORM: "ustedes"},
{ORTH: "vol.", NORM: "volúmen"},
-
]:
_exc[exc_data[ORTH]] = [exc_data]
# Times
-_exc["12m."] = [{ORTH: "12"}, {ORTH: "m.", LEMMA: "p.m."}]
+_exc["12m."] = [{ORTH: "12"}, {ORTH: "m."}]
for h in range(1, 12 + 1):
for period in ["a.m.", "am"]:
- _exc["%d%s" % (h, period)] = [{ORTH: "%d" % h}, {ORTH: period, LEMMA: "a.m."}]
+ _exc[f"{h}{period}"] = [{ORTH: f"{h}"}, {ORTH: period}]
for period in ["p.m.", "pm"]:
- _exc["%d%s" % (h, period)] = [{ORTH: "%d" % h}, {ORTH: period, LEMMA: "p.m."}]
+ _exc[f"{h}{period}"] = [{ORTH: f"{h}"}, {ORTH: period}]
for orth in [
@@ -65,11 +65,9 @@ for orth in [
"Prof.",
"Profa.",
"q.e.p.d.",
- "Q.E.P.D."
- "S.A.",
+ "Q.E.P.D." "S.A.",
"S.L.",
- "S.R.L."
- "s.s.s.",
+ "S.R.L." "s.s.s.",
"Sr.",
"Sra.",
"Srta.",
@@ -77,4 +75,4 @@ for orth in [
_exc[orth] = [{ORTH: orth}]
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
diff --git a/spacy/lang/et/__init__.py b/spacy/lang/et/__init__.py
index d84c081ef..9f71882d2 100644
--- a/spacy/lang/et/__init__.py
+++ b/spacy/lang/et/__init__.py
@@ -1,14 +1,8 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
from ...language import Language
-from ...attrs import LANG
class EstonianDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "et"
stop_words = STOP_WORDS
diff --git a/spacy/lang/et/stop_words.py b/spacy/lang/et/stop_words.py
index 15070db5f..e1da1f14d 100644
--- a/spacy/lang/et/stop_words.py
+++ b/spacy/lang/et/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
# Source: https://github.com/stopwords-iso/stopwords-et
STOP_WORDS = set(
diff --git a/spacy/lang/eu/__init__.py b/spacy/lang/eu/__init__.py
index b72529fab..89550be96 100644
--- a/spacy/lang/eu/__init__.py
+++ b/spacy/lang/eu/__init__.py
@@ -1,23 +1,13 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
from .punctuation import TOKENIZER_SUFFIXES
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...language import Language
-from ...attrs import LANG
class BasqueDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters.update(LEX_ATTRS)
- lex_attr_getters[LANG] = lambda text: "eu"
-
- tokenizer_exceptions = BASE_EXCEPTIONS
- stop_words = STOP_WORDS
suffixes = TOKENIZER_SUFFIXES
+ stop_words = STOP_WORDS
+ lex_attr_getters = LEX_ATTRS
class Basque(Language):
diff --git a/spacy/lang/eu/examples.py b/spacy/lang/eu/examples.py
index 463494abd..3b9ef71b6 100644
--- a/spacy/lang/eu/examples.py
+++ b/spacy/lang/eu/examples.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/eu/lex_attrs.py b/spacy/lang/eu/lex_attrs.py
index 19b75c111..a3ab018ee 100644
--- a/spacy/lang/eu/lex_attrs.py
+++ b/spacy/lang/eu/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ...attrs import LIKE_NUM
# Source http://mylanguages.org/basque_numbers.php
diff --git a/spacy/lang/eu/punctuation.py b/spacy/lang/eu/punctuation.py
index b8b1a1c83..5d35d0a25 100644
--- a/spacy/lang/eu/punctuation.py
+++ b/spacy/lang/eu/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..punctuation import TOKENIZER_SUFFIXES
diff --git a/spacy/lang/eu/stop_words.py b/spacy/lang/eu/stop_words.py
index dda11a7fd..d213b5b81 100644
--- a/spacy/lang/eu/stop_words.py
+++ b/spacy/lang/eu/stop_words.py
@@ -1,6 +1,3 @@
-# encoding: utf8
-from __future__ import unicode_literals
-
# Source: https://github.com/stopwords-iso/stopwords-eu
# https://www.ranks.nl/stopwords/basque
# https://www.mustgo.com/worldlanguages/basque/
diff --git a/spacy/lang/fa/__init__.py b/spacy/lang/fa/__init__.py
index c93bca671..7fdb9d065 100644
--- a/spacy/lang/fa/__init__.py
+++ b/spacy/lang/fa/__init__.py
@@ -1,31 +1,18 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...language import Language
-from ...attrs import LANG, NORM
-from ...util import update_exc, add_lookups
-from ..norm_exceptions import BASE_NORMS
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .tag_map import TAG_MAP
from .punctuation import TOKENIZER_SUFFIXES
from .syntax_iterators import SYNTAX_ITERATORS
+from ...language import Language
class PersianDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters.update(LEX_ATTRS)
- lex_attr_getters[NORM] = add_lookups(
- Language.Defaults.lex_attr_getters[NORM], BASE_NORMS
- )
- lex_attr_getters[LANG] = lambda text: "fa"
- tokenizer_exceptions = update_exc(TOKENIZER_EXCEPTIONS)
- stop_words = STOP_WORDS
- tag_map = TAG_MAP
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
suffixes = TOKENIZER_SUFFIXES
- writing_system = {"direction": "rtl", "has_case": False, "has_letters": True}
+ lex_attr_getters = LEX_ATTRS
syntax_iterators = SYNTAX_ITERATORS
+ stop_words = STOP_WORDS
+ writing_system = {"direction": "rtl", "has_case": False, "has_letters": True}
class Persian(Language):
diff --git a/spacy/lang/fa/examples.py b/spacy/lang/fa/examples.py
index 3f65a366d..9c6fb0345 100644
--- a/spacy/lang/fa/examples.py
+++ b/spacy/lang/fa/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/fa/generate_verbs_exc.py b/spacy/lang/fa/generate_verbs_exc.py
index 5d0ff944d..62094c6de 100644
--- a/spacy/lang/fa/generate_verbs_exc.py
+++ b/spacy/lang/fa/generate_verbs_exc.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
verb_roots = """
#هست
آخت#آهنج
diff --git a/spacy/lang/fa/lex_attrs.py b/spacy/lang/fa/lex_attrs.py
index dbea66b68..99b8e2787 100644
--- a/spacy/lang/fa/lex_attrs.py
+++ b/spacy/lang/fa/lex_attrs.py
@@ -1,5 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
from ...attrs import LIKE_NUM
diff --git a/spacy/lang/fa/punctuation.py b/spacy/lang/fa/punctuation.py
index 33aa46ae2..4b258c13d 100644
--- a/spacy/lang/fa/punctuation.py
+++ b/spacy/lang/fa/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, CURRENCY
from ..char_classes import UNITS, ALPHA_UPPER
diff --git a/spacy/lang/fa/stop_words.py b/spacy/lang/fa/stop_words.py
index 682fb7a71..f462f2e7a 100644
--- a/spacy/lang/fa/stop_words.py
+++ b/spacy/lang/fa/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
# Stop words from HAZM package
STOP_WORDS = set(
"""
diff --git a/spacy/lang/fa/syntax_iterators.py b/spacy/lang/fa/syntax_iterators.py
index 0f2b28b58..b63db3539 100644
--- a/spacy/lang/fa/syntax_iterators.py
+++ b/spacy/lang/fa/syntax_iterators.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ...symbols import NOUN, PROPN, PRON
from ...errors import Errors
diff --git a/spacy/lang/fa/tag_map.py b/spacy/lang/fa/tag_map.py
deleted file mode 100644
index b9043adf0..000000000
--- a/spacy/lang/fa/tag_map.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, PUNCT, ADJ, CONJ, NUM, DET, ADV, ADP, X, VERB
-from ...symbols import PRON, NOUN, PART, INTJ, AUX
-
-
-TAG_MAP = {
- "ADJ": {POS: ADJ},
- "ADJ_CMPR": {POS: ADJ},
- "ADJ_INO": {POS: ADJ},
- "ADJ_SUP": {POS: ADJ},
- "ADV": {POS: ADV},
- "ADV_COMP": {POS: ADV},
- "ADV_I": {POS: ADV},
- "ADV_LOC": {POS: ADV},
- "ADV_NEG": {POS: ADV},
- "ADV_TIME": {POS: ADV},
- "CLITIC": {POS: PART},
- "CON": {POS: CONJ},
- "CONJ": {POS: CONJ},
- "DELM": {POS: PUNCT},
- "DET": {POS: DET},
- "FW": {POS: X},
- "INT": {POS: INTJ},
- "N_PL": {POS: NOUN},
- "N_SING": {POS: NOUN},
- "N_VOC": {POS: NOUN},
- "NUM": {POS: NUM},
- "P": {POS: ADP},
- "PREV": {POS: ADP},
- "PRO": {POS: PRON},
- "V_AUX": {POS: AUX},
- "V_IMP": {POS: VERB},
- "V_PA": {POS: VERB},
- "V_PP": {POS: VERB},
- "V_PRS": {POS: VERB},
- "V_SUB": {POS: VERB},
-}
diff --git a/spacy/lang/fa/tokenizer_exceptions.py b/spacy/lang/fa/tokenizer_exceptions.py
index b3f8dcbf5..30df798ab 100644
--- a/spacy/lang/fa/tokenizer_exceptions.py
+++ b/spacy/lang/fa/tokenizer_exceptions.py
@@ -1,2756 +1,747 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import ORTH, LEMMA, TAG, NORM
+from ...symbols import ORTH, NORM
-_exc = {
- ".ق ": [{LEMMA: "قمری", ORTH: ".ق "}],
- ".م": [{LEMMA: "میلادی", ORTH: ".م"}],
- ".هـ": [{LEMMA: "هجری", ORTH: ".هـ"}],
- "ب.م": [{LEMMA: "بعد از میلاد", ORTH: "ب.م"}],
- "ق.م": [{LEMMA: "قبل از میلاد", ORTH: "ق.م"}],
+TOKENIZER_EXCEPTIONS = {
+ ".ق ": [{ORTH: ".ق "}],
+ ".م": [{ORTH: ".م"}],
+ ".هـ": [{ORTH: ".هـ"}],
+ "ب.م": [{ORTH: "ب.م"}],
+ "ق.م": [{ORTH: "ق.م"}],
+ "آبرویت": [{ORTH: "آبروی", NORM: "آبروی"}, {ORTH: "ت", NORM: "ت"}],
+ "آبنباتش": [{ORTH: "آبنبات", NORM: "آبنبات"}, {ORTH: "ش", NORM: "ش"}],
+ "آثارش": [{ORTH: "آثار", NORM: "آثار"}, {ORTH: "ش", NORM: "ش"}],
+ "آخرش": [{ORTH: "آخر", NORM: "آخر"}, {ORTH: "ش", NORM: "ش"}],
+ "آدمهاست": [{ORTH: "آدمها", NORM: "آدمها"}, {ORTH: "ست", NORM: "ست"}],
+ "آرزومندیم": [{ORTH: "آرزومند", NORM: "آرزومند"}, {ORTH: "یم", NORM: "یم"}],
+ "آزادند": [{ORTH: "آزاد", NORM: "آزاد"}, {ORTH: "ند", NORM: "ند"}],
+ "آسیبپذیرند": [{ORTH: "آسیبپذیر", NORM: "آسیبپذیر"}, {ORTH: "ند", NORM: "ند"}],
+ "آفریدهاند": [{ORTH: "آفریده", NORM: "آفریده"}, {ORTH: "اند", NORM: "اند"}],
+ "آمدنش": [{ORTH: "آمدن", NORM: "آمدن"}, {ORTH: "ش", NORM: "ش"}],
+ "آمریکاست": [{ORTH: "آمریکا", NORM: "آمریکا"}, {ORTH: "ست", NORM: "ست"}],
+ "آنجاست": [{ORTH: "آنجا", NORM: "آنجا"}, {ORTH: "ست", NORM: "ست"}],
+ "آنست": [{ORTH: "آن", NORM: "آن"}, {ORTH: "ست", NORM: "ست"}],
+ "آنند": [{ORTH: "آن", NORM: "آن"}, {ORTH: "ند", NORM: "ند"}],
+ "آنهاست": [{ORTH: "آنها", NORM: "آنها"}, {ORTH: "ست", NORM: "ست"}],
+ "آپاداناست": [{ORTH: "آپادانا", NORM: "آپادانا"}, {ORTH: "ست", NORM: "ست"}],
+ "اجتماعیمان": [{ORTH: "اجتماعی", NORM: "اجتماعی"}, {ORTH: "مان", NORM: "مان"}],
+ "اجدادت": [{ORTH: "اجداد", NORM: "اجداد"}, {ORTH: "ت", NORM: "ت"}],
+ "اجدادش": [{ORTH: "اجداد", NORM: "اجداد"}, {ORTH: "ش", NORM: "ش"}],
+ "اجدادیشان": [{ORTH: "اجدادی", NORM: "اجدادی"}, {ORTH: "شان", NORM: "شان"}],
+ "اجراست": [{ORTH: "اجرا", NORM: "اجرا"}, {ORTH: "ست", NORM: "ست"}],
+ "اختیارش": [{ORTH: "اختیار", NORM: "اختیار"}, {ORTH: "ش", NORM: "ش"}],
+ "اخلاقشان": [{ORTH: "اخلاق", NORM: "اخلاق"}, {ORTH: "شان", NORM: "شان"}],
+ "ادعایمان": [{ORTH: "ادعای", NORM: "ادعای"}, {ORTH: "مان", NORM: "مان"}],
+ "اذیتش": [{ORTH: "اذیت", NORM: "اذیت"}, {ORTH: "ش", NORM: "ش"}],
+ "ارادهاش": [{ORTH: "اراده", NORM: "اراده"}, {ORTH: "اش", NORM: "اش"}],
+ "ارتباطش": [{ORTH: "ارتباط", NORM: "ارتباط"}, {ORTH: "ش", NORM: "ش"}],
+ "ارتباطمان": [{ORTH: "ارتباط", NORM: "ارتباط"}, {ORTH: "مان", NORM: "مان"}],
+ "ارزشهاست": [{ORTH: "ارزشها", NORM: "ارزشها"}, {ORTH: "ست", NORM: "ست"}],
+ "ارزیاش": [{ORTH: "ارزی", NORM: "ارزی"}, {ORTH: "اش", NORM: "اش"}],
+ "ارهاش": [{ORTH: "اره", NORM: "اره"}, {ORTH: "اش", NORM: "اش"}],
+ "ازش": [{ORTH: "از", NORM: "از"}, {ORTH: "ش", NORM: "ش"}],
+ "ازین": [{ORTH: "از", NORM: "از"}, {ORTH: "ین", NORM: "ین"}],
+ "ازینهاست": [
+ {ORTH: "از", NORM: "از"},
+ {ORTH: "ینها", NORM: "ینها"},
+ {ORTH: "ست", NORM: "ست"},
+ ],
+ "استخوانند": [{ORTH: "استخوان", NORM: "استخوان"}, {ORTH: "ند", NORM: "ند"}],
+ "اسلامند": [{ORTH: "اسلام", NORM: "اسلام"}, {ORTH: "ند", NORM: "ند"}],
+ "اسلامیاند": [{ORTH: "اسلامی", NORM: "اسلامی"}, {ORTH: "اند", NORM: "اند"}],
+ "اسلحههایشان": [
+ {ORTH: "اسلحههای", NORM: "اسلحههای"},
+ {ORTH: "شان", NORM: "شان"},
+ ],
+ "اسمت": [{ORTH: "اسم", NORM: "اسم"}, {ORTH: "ت", NORM: "ت"}],
+ "اسمش": [{ORTH: "اسم", NORM: "اسم"}, {ORTH: "ش", NORM: "ش"}],
+ "اشتباهند": [{ORTH: "اشتباه", NORM: "اشتباه"}, {ORTH: "ند", NORM: "ند"}],
+ "اصلش": [{ORTH: "اصل", NORM: "اصل"}, {ORTH: "ش", NORM: "ش"}],
+ "اطاقش": [{ORTH: "اطاق", NORM: "اطاق"}, {ORTH: "ش", NORM: "ش"}],
+ "اعتقادند": [{ORTH: "اعتقاد", NORM: "اعتقاد"}, {ORTH: "ند", NORM: "ند"}],
+ "اعلایش": [{ORTH: "اعلای", NORM: "اعلای"}, {ORTH: "ش", NORM: "ش"}],
+ "افتراست": [{ORTH: "افترا", NORM: "افترا"}, {ORTH: "ست", NORM: "ست"}],
+ "افطارت": [{ORTH: "افطار", NORM: "افطار"}, {ORTH: "ت", NORM: "ت"}],
+ "اقوامش": [{ORTH: "اقوام", NORM: "اقوام"}, {ORTH: "ش", NORM: "ش"}],
+ "امروزیش": [{ORTH: "امروزی", NORM: "امروزی"}, {ORTH: "ش", NORM: "ش"}],
+ "اموالش": [{ORTH: "اموال", NORM: "اموال"}, {ORTH: "ش", NORM: "ش"}],
+ "امیدوارند": [{ORTH: "امیدوار", NORM: "امیدوار"}, {ORTH: "ند", NORM: "ند"}],
+ "امیدواریم": [{ORTH: "امیدوار", NORM: "امیدوار"}, {ORTH: "یم", NORM: "یم"}],
+ "انتخابهایم": [{ORTH: "انتخابها", NORM: "انتخابها"}, {ORTH: "یم", NORM: "یم"}],
+ "انتظارم": [{ORTH: "انتظار", NORM: "انتظار"}, {ORTH: "م", NORM: "م"}],
+ "انجمنم": [{ORTH: "انجمن", NORM: "انجمن"}, {ORTH: "م", NORM: "م"}],
+ "اندرش": [{ORTH: "اندر", NORM: "اندر"}, {ORTH: "ش", NORM: "ش"}],
+ "انشایش": [{ORTH: "انشای", NORM: "انشای"}, {ORTH: "ش", NORM: "ش"}],
+ "انگشتشان": [{ORTH: "انگشت", NORM: "انگشت"}, {ORTH: "شان", NORM: "شان"}],
+ "انگشتهایش": [{ORTH: "انگشتهای", NORM: "انگشتهای"}, {ORTH: "ش", NORM: "ش"}],
+ "اهمیتشان": [{ORTH: "اهمیت", NORM: "اهمیت"}, {ORTH: "شان", NORM: "شان"}],
+ "اهمیتند": [{ORTH: "اهمیت", NORM: "اهمیت"}, {ORTH: "ند", NORM: "ند"}],
+ "اوایلش": [{ORTH: "اوایل", NORM: "اوایل"}, {ORTH: "ش", NORM: "ش"}],
+ "اوست": [{ORTH: "او", NORM: "او"}, {ORTH: "ست", NORM: "ست"}],
+ "اولش": [{ORTH: "اول", NORM: "اول"}, {ORTH: "ش", NORM: "ش"}],
+ "اولشان": [{ORTH: "اول", NORM: "اول"}, {ORTH: "شان", NORM: "شان"}],
+ "اولم": [{ORTH: "اول", NORM: "اول"}, {ORTH: "م", NORM: "م"}],
+ "اکثرشان": [{ORTH: "اکثر", NORM: "اکثر"}, {ORTH: "شان", NORM: "شان"}],
+ "ایتالیاست": [{ORTH: "ایتالیا", NORM: "ایتالیا"}, {ORTH: "ست", NORM: "ست"}],
+ "ایرانیاش": [{ORTH: "ایرانی", NORM: "ایرانی"}, {ORTH: "اش", NORM: "اش"}],
+ "اینجاست": [{ORTH: "اینجا", NORM: "اینجا"}, {ORTH: "ست", NORM: "ست"}],
+ "اینهاست": [{ORTH: "اینها", NORM: "اینها"}, {ORTH: "ست", NORM: "ست"}],
+ "بابات": [{ORTH: "بابا", NORM: "بابا"}, {ORTH: "ت", NORM: "ت"}],
+ "بارش": [{ORTH: "بار", NORM: "بار"}, {ORTH: "ش", NORM: "ش"}],
+ "بازیگرانش": [{ORTH: "بازیگران", NORM: "بازیگران"}, {ORTH: "ش", NORM: "ش"}],
+ "بازیگرمان": [{ORTH: "بازیگر", NORM: "بازیگر"}, {ORTH: "مان", NORM: "مان"}],
+ "بازیگرهایم": [{ORTH: "بازیگرها", NORM: "بازیگرها"}, {ORTH: "یم", NORM: "یم"}],
+ "بازیاش": [{ORTH: "بازی", NORM: "بازی"}, {ORTH: "اش", NORM: "اش"}],
+ "بالاست": [{ORTH: "بالا", NORM: "بالا"}, {ORTH: "ست", NORM: "ست"}],
+ "باورند": [{ORTH: "باور", NORM: "باور"}, {ORTH: "ند", NORM: "ند"}],
+ "بجاست": [{ORTH: "بجا", NORM: "بجا"}, {ORTH: "ست", NORM: "ست"}],
+ "بدان": [{ORTH: "ب", NORM: "ب"}, {ORTH: "دان", NORM: "دان"}],
+ "بدش": [{ORTH: "بد", NORM: "بد"}, {ORTH: "ش", NORM: "ش"}],
+ "بدشان": [{ORTH: "بد", NORM: "بد"}, {ORTH: "شان", NORM: "شان"}],
+ "بدنم": [{ORTH: "بدن", NORM: "بدن"}, {ORTH: "م", NORM: "م"}],
+ "بدهیات": [{ORTH: "بدهی", NORM: "بدهی"}, {ORTH: "ات", NORM: "ات"}],
+ "بدین": [{ORTH: "ب", NORM: "ب"}, {ORTH: "دین", NORM: "دین"}],
+ "برابرش": [{ORTH: "برابر", NORM: "برابر"}, {ORTH: "ش", NORM: "ش"}],
+ "برادرت": [{ORTH: "برادر", NORM: "برادر"}, {ORTH: "ت", NORM: "ت"}],
+ "برادرش": [{ORTH: "برادر", NORM: "برادر"}, {ORTH: "ش", NORM: "ش"}],
+ "برایت": [{ORTH: "برای", NORM: "برای"}, {ORTH: "ت", NORM: "ت"}],
+ "برایتان": [{ORTH: "برای", NORM: "برای"}, {ORTH: "تان", NORM: "تان"}],
+ "برایش": [{ORTH: "برای", NORM: "برای"}, {ORTH: "ش", NORM: "ش"}],
+ "برایشان": [{ORTH: "برای", NORM: "برای"}, {ORTH: "شان", NORM: "شان"}],
+ "برایم": [{ORTH: "برای", NORM: "برای"}, {ORTH: "م", NORM: "م"}],
+ "برایمان": [{ORTH: "برای", NORM: "برای"}, {ORTH: "مان", NORM: "مان"}],
+ "برخوردارند": [{ORTH: "برخوردار", NORM: "برخوردار"}, {ORTH: "ند", NORM: "ند"}],
+ "برنامهسازهاست": [
+ {ORTH: "برنامهسازها", NORM: "برنامهسازها"},
+ {ORTH: "ست", NORM: "ست"},
+ ],
+ "برهمش": [{ORTH: "برهم", NORM: "برهم"}, {ORTH: "ش", NORM: "ش"}],
+ "برهنهاش": [{ORTH: "برهنه", NORM: "برهنه"}, {ORTH: "اش", NORM: "اش"}],
+ "برگهایش": [{ORTH: "برگها", NORM: "برگها"}, {ORTH: "یش", NORM: "یش"}],
+ "برین": [{ORTH: "بر", NORM: "بر"}, {ORTH: "ین", NORM: "ین"}],
+ "بزرگش": [{ORTH: "بزرگ", NORM: "بزرگ"}, {ORTH: "ش", NORM: "ش"}],
+ "بزرگتری": [{ORTH: "بزرگتر", NORM: "بزرگتر"}, {ORTH: "ی", NORM: "ی"}],
+ "بساطش": [{ORTH: "بساط", NORM: "بساط"}, {ORTH: "ش", NORM: "ش"}],
+ "بعدش": [{ORTH: "بعد", NORM: "بعد"}, {ORTH: "ش", NORM: "ش"}],
+ "بعضیهایشان": [{ORTH: "بعضیهای", NORM: "بعضیهای"}, {ORTH: "شان", NORM: "شان"}],
+ "بعضیشان": [{ORTH: "بعضی", NORM: "بعضی"}, {ORTH: "شان", NORM: "شان"}],
+ "بقیهاش": [{ORTH: "بقیه", NORM: "بقیه"}, {ORTH: "اش", NORM: "اش"}],
+ "بلندش": [{ORTH: "بلند", NORM: "بلند"}, {ORTH: "ش", NORM: "ش"}],
+ "بناگوشش": [{ORTH: "بناگوش", NORM: "بناگوش"}, {ORTH: "ش", NORM: "ش"}],
+ "بنظرم": [
+ {ORTH: "ب", NORM: "ب"},
+ {ORTH: "نظر", NORM: "نظر"},
+ {ORTH: "م", NORM: "م"},
+ ],
+ "بهت": [{ORTH: "به", NORM: "به"}, {ORTH: "ت", NORM: "ت"}],
+ "بهترش": [{ORTH: "بهتر", NORM: "بهتر"}, {ORTH: "ش", NORM: "ش"}],
+ "بهترم": [{ORTH: "بهتر", NORM: "بهتر"}, {ORTH: "م", NORM: "م"}],
+ "بهتری": [{ORTH: "بهتر", NORM: "بهتر"}, {ORTH: "ی", NORM: "ی"}],
+ "بهش": [{ORTH: "به", NORM: "به"}, {ORTH: "ش", NORM: "ش"}],
+ "بهشان": [{ORTH: "به", NORM: "به"}, {ORTH: "شان", NORM: "شان"}],
+ "بودمش": [{ORTH: "بودم", NORM: "بودم"}, {ORTH: "ش", NORM: "ش"}],
+ "بودنش": [{ORTH: "بودن", NORM: "بودن"}, {ORTH: "ش", NORM: "ش"}],
+ "بودنشان": [{ORTH: "بودن", NORM: "بودن"}, {ORTH: "شان", NORM: "شان"}],
+ "بوستانش": [{ORTH: "بوستان", NORM: "بوستان"}, {ORTH: "ش", NORM: "ش"}],
+ "بویش": [{ORTH: "بو", NORM: "بو"}, {ORTH: "یش", NORM: "یش"}],
+ "بچهاش": [{ORTH: "بچه", NORM: "بچه"}, {ORTH: "اش", NORM: "اش"}],
+ "بچهم": [{ORTH: "بچه", NORM: "بچه"}, {ORTH: "م", NORM: "م"}],
+ "بچههایش": [{ORTH: "بچههای", NORM: "بچههای"}, {ORTH: "ش", NORM: "ش"}],
+ "بیانیهشان": [{ORTH: "بیانیه", NORM: "بیانیه"}, {ORTH: "شان", NORM: "شان"}],
+ "بیدارم": [{ORTH: "بیدار", NORM: "بیدار"}, {ORTH: "م", NORM: "م"}],
+ "بیناتری": [{ORTH: "بیناتر", NORM: "بیناتر"}, {ORTH: "ی", NORM: "ی"}],
+ "بیاطلاعند": [{ORTH: "بیاطلاع", NORM: "بیاطلاع"}, {ORTH: "ند", NORM: "ند"}],
+ "بیاطلاعید": [{ORTH: "بیاطلاع", NORM: "بیاطلاع"}, {ORTH: "ید", NORM: "ید"}],
+ "بیبهرهاند": [{ORTH: "بیبهره", NORM: "بیبهره"}, {ORTH: "اند", NORM: "اند"}],
+ "بیتفاوتند": [{ORTH: "بیتفاوت", NORM: "بیتفاوت"}, {ORTH: "ند", NORM: "ند"}],
+ "بیحسابش": [{ORTH: "بیحساب", NORM: "بیحساب"}, {ORTH: "ش", NORM: "ش"}],
+ "بینیش": [{ORTH: "بینی", NORM: "بینی"}, {ORTH: "ش", NORM: "ش"}],
+ "تجربههایم": [{ORTH: "تجربهها", NORM: "تجربهها"}, {ORTH: "یم", NORM: "یم"}],
+ "تحریمهاست": [{ORTH: "تحریمها", NORM: "تحریمها"}, {ORTH: "ست", NORM: "ست"}],
+ "تحولند": [{ORTH: "تحول", NORM: "تحول"}, {ORTH: "ند", NORM: "ند"}],
+ "تخیلیاش": [{ORTH: "تخیلی", NORM: "تخیلی"}, {ORTH: "اش", NORM: "اش"}],
+ "ترا": [{ORTH: "ت", NORM: "ت"}, {ORTH: "را", NORM: "را"}],
+ "ترسشان": [{ORTH: "ترس", NORM: "ترس"}, {ORTH: "شان", NORM: "شان"}],
+ "ترکش": [{ORTH: "ترک", NORM: "ترک"}, {ORTH: "ش", NORM: "ش"}],
+ "تشنهت": [{ORTH: "تشنه", NORM: "تشنه"}, {ORTH: "ت", NORM: "ت"}],
+ "تشکیلاتیاش": [{ORTH: "تشکیلاتی", NORM: "تشکیلاتی"}, {ORTH: "اش", NORM: "اش"}],
+ "تعلقش": [{ORTH: "تعلق", NORM: "تعلق"}, {ORTH: "ش", NORM: "ش"}],
+ "تلاششان": [{ORTH: "تلاش", NORM: "تلاش"}, {ORTH: "شان", NORM: "شان"}],
+ "تلاشمان": [{ORTH: "تلاش", NORM: "تلاش"}, {ORTH: "مان", NORM: "مان"}],
+ "تماشاگرش": [{ORTH: "تماشاگر", NORM: "تماشاگر"}, {ORTH: "ش", NORM: "ش"}],
+ "تمامشان": [{ORTH: "تمام", NORM: "تمام"}, {ORTH: "شان", NORM: "شان"}],
+ "تنش": [{ORTH: "تن", NORM: "تن"}, {ORTH: "ش", NORM: "ش"}],
+ "تنمان": [{ORTH: "تن", NORM: "تن"}, {ORTH: "مان", NORM: "مان"}],
+ "تنهاییاش": [{ORTH: "تنهایی", NORM: "تنهایی"}, {ORTH: "اش", NORM: "اش"}],
+ "تواناییاش": [{ORTH: "توانایی", NORM: "توانایی"}, {ORTH: "اش", NORM: "اش"}],
+ "توجهش": [{ORTH: "توجه", NORM: "توجه"}, {ORTH: "ش", NORM: "ش"}],
+ "توست": [{ORTH: "تو", NORM: "تو"}, {ORTH: "ست", NORM: "ست"}],
+ "توصیهاش": [{ORTH: "توصیه", NORM: "توصیه"}, {ORTH: "اش", NORM: "اش"}],
+ "تیغهاش": [{ORTH: "تیغه", NORM: "تیغه"}, {ORTH: "اش", NORM: "اش"}],
+ "جاست": [{ORTH: "جا", NORM: "جا"}, {ORTH: "ست", NORM: "ست"}],
+ "جامعهاند": [{ORTH: "جامعه", NORM: "جامعه"}, {ORTH: "اند", NORM: "اند"}],
+ "جانم": [{ORTH: "جان", NORM: "جان"}, {ORTH: "م", NORM: "م"}],
+ "جایش": [{ORTH: "جای", NORM: "جای"}, {ORTH: "ش", NORM: "ش"}],
+ "جایشان": [{ORTH: "جای", NORM: "جای"}, {ORTH: "شان", NORM: "شان"}],
+ "جدیدش": [{ORTH: "جدید", NORM: "جدید"}, {ORTH: "ش", NORM: "ش"}],
+ "جرمزاست": [{ORTH: "جرمزا", NORM: "جرمزا"}, {ORTH: "ست", NORM: "ست"}],
+ "جلوست": [{ORTH: "جلو", NORM: "جلو"}, {ORTH: "ست", NORM: "ست"}],
+ "جلویش": [{ORTH: "جلوی", NORM: "جلوی"}, {ORTH: "ش", NORM: "ش"}],
+ "جمهوریست": [{ORTH: "جمهوری", NORM: "جمهوری"}, {ORTH: "ست", NORM: "ست"}],
+ "جنسش": [{ORTH: "جنس", NORM: "جنس"}, {ORTH: "ش", NORM: "ش"}],
+ "جنساند": [{ORTH: "جنس", NORM: "جنس"}, {ORTH: "اند", NORM: "اند"}],
+ "جوانانش": [{ORTH: "جوانان", NORM: "جوانان"}, {ORTH: "ش", NORM: "ش"}],
+ "جویش": [{ORTH: "جوی", NORM: "جوی"}, {ORTH: "ش", NORM: "ش"}],
+ "جگرش": [{ORTH: "جگر", NORM: "جگر"}, {ORTH: "ش", NORM: "ش"}],
+ "حاضرم": [{ORTH: "حاضر", NORM: "حاضر"}, {ORTH: "م", NORM: "م"}],
+ "حالتهایشان": [{ORTH: "حالتهای", NORM: "حالتهای"}, {ORTH: "شان", NORM: "شان"}],
+ "حالیست": [{ORTH: "حالی", NORM: "حالی"}, {ORTH: "ست", NORM: "ست"}],
+ "حالیمان": [{ORTH: "حالی", NORM: "حالی"}, {ORTH: "مان", NORM: "مان"}],
+ "حاکیست": [{ORTH: "حاکی", NORM: "حاکی"}, {ORTH: "ست", NORM: "ست"}],
+ "حرامزادگیاش": [
+ {ORTH: "حرامزادگی", NORM: "حرامزادگی"},
+ {ORTH: "اش", NORM: "اش"},
+ ],
+ "حرفتان": [{ORTH: "حرف", NORM: "حرف"}, {ORTH: "تان", NORM: "تان"}],
+ "حرفش": [{ORTH: "حرف", NORM: "حرف"}, {ORTH: "ش", NORM: "ش"}],
+ "حرفشان": [{ORTH: "حرف", NORM: "حرف"}, {ORTH: "شان", NORM: "شان"}],
+ "حرفم": [{ORTH: "حرف", NORM: "حرف"}, {ORTH: "م", NORM: "م"}],
+ "حرفهایشان": [{ORTH: "حرفهای", NORM: "حرفهای"}, {ORTH: "شان", NORM: "شان"}],
+ "حرکتمان": [{ORTH: "حرکت", NORM: "حرکت"}, {ORTH: "مان", NORM: "مان"}],
+ "حریفانشان": [{ORTH: "حریفان", NORM: "حریفان"}, {ORTH: "شان", NORM: "شان"}],
+ "حضورشان": [{ORTH: "حضور", NORM: "حضور"}, {ORTH: "شان", NORM: "شان"}],
+ "حمایتش": [{ORTH: "حمایت", NORM: "حمایت"}, {ORTH: "ش", NORM: "ش"}],
+ "حواسش": [{ORTH: "حواس", NORM: "حواس"}, {ORTH: "ش", NORM: "ش"}],
+ "حواسشان": [{ORTH: "حواس", NORM: "حواس"}, {ORTH: "شان", NORM: "شان"}],
+ "حوصلهمان": [{ORTH: "حوصله", NORM: "حوصله"}, {ORTH: "مان", NORM: "مان"}],
+ "حکومتش": [{ORTH: "حکومت", NORM: "حکومت"}, {ORTH: "ش", NORM: "ش"}],
+ "حکومتشان": [{ORTH: "حکومت", NORM: "حکومت"}, {ORTH: "شان", NORM: "شان"}],
+ "حیفم": [{ORTH: "حیف", NORM: "حیف"}, {ORTH: "م", NORM: "م"}],
+ "خاندانش": [{ORTH: "خاندان", NORM: "خاندان"}, {ORTH: "ش", NORM: "ش"}],
+ "خانهاش": [{ORTH: "خانه", NORM: "خانه"}, {ORTH: "اش", NORM: "اش"}],
+ "خانهشان": [{ORTH: "خانه", NORM: "خانه"}, {ORTH: "شان", NORM: "شان"}],
+ "خانهمان": [{ORTH: "خانه", NORM: "خانه"}, {ORTH: "مان", NORM: "مان"}],
+ "خانههایشان": [{ORTH: "خانههای", NORM: "خانههای"}, {ORTH: "شان", NORM: "شان"}],
+ "خانوادهات": [{ORTH: "خانواده", NORM: "خانواده"}, {ORTH: "ات", NORM: "ات"}],
+ "خانوادهاش": [{ORTH: "خانواده", NORM: "خانواده"}, {ORTH: "اش", NORM: "اش"}],
+ "خانوادهام": [{ORTH: "خانواده", NORM: "خانواده"}, {ORTH: "ام", NORM: "ام"}],
+ "خانوادهشان": [{ORTH: "خانواده", NORM: "خانواده"}, {ORTH: "شان", NORM: "شان"}],
+ "خداست": [{ORTH: "خدا", NORM: "خدا"}, {ORTH: "ست", NORM: "ست"}],
+ "خدایش": [{ORTH: "خدا", NORM: "خدا"}, {ORTH: "یش", NORM: "یش"}],
+ "خدایشان": [{ORTH: "خدای", NORM: "خدای"}, {ORTH: "شان", NORM: "شان"}],
+ "خردسالش": [{ORTH: "خردسال", NORM: "خردسال"}, {ORTH: "ش", NORM: "ش"}],
+ "خروپفشان": [{ORTH: "خروپف", NORM: "خروپف"}, {ORTH: "شان", NORM: "شان"}],
+ "خستهای": [{ORTH: "خسته", NORM: "خسته"}, {ORTH: "ای", NORM: "ای"}],
+ "خطت": [{ORTH: "خط", NORM: "خط"}, {ORTH: "ت", NORM: "ت"}],
+ "خوابمان": [{ORTH: "خواب", NORM: "خواب"}, {ORTH: "مان", NORM: "مان"}],
+ "خواندنش": [{ORTH: "خواندن", NORM: "خواندن"}, {ORTH: "ش", NORM: "ش"}],
+ "خواهرش": [{ORTH: "خواهر", NORM: "خواهر"}, {ORTH: "ش", NORM: "ش"}],
+ "خوبش": [{ORTH: "خوب", NORM: "خوب"}, {ORTH: "ش", NORM: "ش"}],
+ "خودت": [{ORTH: "خود", NORM: "خود"}, {ORTH: "ت", NORM: "ت"}],
+ "خودتان": [{ORTH: "خود", NORM: "خود"}, {ORTH: "تان", NORM: "تان"}],
+ "خودش": [{ORTH: "خود", NORM: "خود"}, {ORTH: "ش", NORM: "ش"}],
+ "خودشان": [{ORTH: "خود", NORM: "خود"}, {ORTH: "شان", NORM: "شان"}],
+ "خودمان": [{ORTH: "خود", NORM: "خود"}, {ORTH: "مان", NORM: "مان"}],
+ "خوردمان": [{ORTH: "خورد", NORM: "خورد"}, {ORTH: "مان", NORM: "مان"}],
+ "خوردنشان": [{ORTH: "خوردن", NORM: "خوردن"}, {ORTH: "شان", NORM: "شان"}],
+ "خوشش": [{ORTH: "خوش", NORM: "خوش"}, {ORTH: "ش", NORM: "ش"}],
+ "خوشوقتم": [{ORTH: "خوشوقت", NORM: "خوشوقت"}, {ORTH: "م", NORM: "م"}],
+ "خونشان": [{ORTH: "خون", NORM: "خون"}, {ORTH: "شان", NORM: "شان"}],
+ "خویش": [{ORTH: "خوی", NORM: "خوی"}, {ORTH: "ش", NORM: "ش"}],
+ "خویشتنم": [{ORTH: "خویشتن", NORM: "خویشتن"}, {ORTH: "م", NORM: "م"}],
+ "خیالش": [{ORTH: "خیال", NORM: "خیال"}, {ORTH: "ش", NORM: "ش"}],
+ "خیسش": [{ORTH: "خیس", NORM: "خیس"}, {ORTH: "ش", NORM: "ش"}],
+ "داراست": [{ORTH: "دارا", NORM: "دارا"}, {ORTH: "ست", NORM: "ست"}],
+ "داستانهایش": [{ORTH: "داستانهای", NORM: "داستانهای"}, {ORTH: "ش", NORM: "ش"}],
+ "دخترمان": [{ORTH: "دختر", NORM: "دختر"}, {ORTH: "مان", NORM: "مان"}],
+ "دخیلند": [{ORTH: "دخیل", NORM: "دخیل"}, {ORTH: "ند", NORM: "ند"}],
+ "دربارهات": [{ORTH: "درباره", NORM: "درباره"}, {ORTH: "ات", NORM: "ات"}],
+ "دربارهاش": [{ORTH: "درباره", NORM: "درباره"}, {ORTH: "اش", NORM: "اش"}],
+ "دردش": [{ORTH: "درد", NORM: "درد"}, {ORTH: "ش", NORM: "ش"}],
+ "دردشان": [{ORTH: "درد", NORM: "درد"}, {ORTH: "شان", NORM: "شان"}],
+ "درسته": [{ORTH: "درست", NORM: "درست"}, {ORTH: "ه", NORM: "ه"}],
+ "درش": [{ORTH: "در", NORM: "در"}, {ORTH: "ش", NORM: "ش"}],
+ "درونشان": [{ORTH: "درون", NORM: "درون"}, {ORTH: "شان", NORM: "شان"}],
+ "درین": [{ORTH: "در", NORM: "در"}, {ORTH: "ین", NORM: "ین"}],
+ "دریچههایش": [{ORTH: "دریچههای", NORM: "دریچههای"}, {ORTH: "ش", NORM: "ش"}],
+ "دزدانش": [{ORTH: "دزدان", NORM: "دزدان"}, {ORTH: "ش", NORM: "ش"}],
+ "دستت": [{ORTH: "دست", NORM: "دست"}, {ORTH: "ت", NORM: "ت"}],
+ "دستش": [{ORTH: "دست", NORM: "دست"}, {ORTH: "ش", NORM: "ش"}],
+ "دستمان": [{ORTH: "دست", NORM: "دست"}, {ORTH: "مان", NORM: "مان"}],
+ "دستهایشان": [{ORTH: "دستهای", NORM: "دستهای"}, {ORTH: "شان", NORM: "شان"}],
+ "دستیافتنیست": [
+ {ORTH: "دستیافتنی", NORM: "دستیافتنی"},
+ {ORTH: "ست", NORM: "ست"},
+ ],
+ "دشمنند": [{ORTH: "دشمن", NORM: "دشمن"}, {ORTH: "ند", NORM: "ند"}],
+ "دشمنیشان": [{ORTH: "دشمنی", NORM: "دشمنی"}, {ORTH: "شان", NORM: "شان"}],
+ "دشمنیم": [{ORTH: "دشمن", NORM: "دشمن"}, {ORTH: "یم", NORM: "یم"}],
+ "دفترش": [{ORTH: "دفتر", NORM: "دفتر"}, {ORTH: "ش", NORM: "ش"}],
+ "دفنشان": [{ORTH: "دفن", NORM: "دفن"}, {ORTH: "شان", NORM: "شان"}],
+ "دلت": [{ORTH: "دل", NORM: "دل"}, {ORTH: "ت", NORM: "ت"}],
+ "دلش": [{ORTH: "دل", NORM: "دل"}, {ORTH: "ش", NORM: "ش"}],
+ "دلشان": [{ORTH: "دل", NORM: "دل"}, {ORTH: "شان", NORM: "شان"}],
+ "دلم": [{ORTH: "دل", NORM: "دل"}, {ORTH: "م", NORM: "م"}],
+ "دلیلش": [{ORTH: "دلیل", NORM: "دلیل"}, {ORTH: "ش", NORM: "ش"}],
+ "دنبالش": [{ORTH: "دنبال", NORM: "دنبال"}, {ORTH: "ش", NORM: "ش"}],
+ "دنبالهاش": [{ORTH: "دنباله", NORM: "دنباله"}, {ORTH: "اش", NORM: "اش"}],
+ "دهاتیهایش": [{ORTH: "دهاتیهای", NORM: "دهاتیهای"}, {ORTH: "ش", NORM: "ش"}],
+ "دهانت": [{ORTH: "دهان", NORM: "دهان"}, {ORTH: "ت", NORM: "ت"}],
+ "دهنش": [{ORTH: "دهن", NORM: "دهن"}, {ORTH: "ش", NORM: "ش"}],
+ "دورش": [{ORTH: "دور", NORM: "دور"}, {ORTH: "ش", NORM: "ش"}],
+ "دوروبریهاشان": [
+ {ORTH: "دوروبریها", NORM: "دوروبریها"},
+ {ORTH: "شان", NORM: "شان"},
+ ],
+ "دوستانش": [{ORTH: "دوستان", NORM: "دوستان"}, {ORTH: "ش", NORM: "ش"}],
+ "دوستانشان": [{ORTH: "دوستان", NORM: "دوستان"}, {ORTH: "شان", NORM: "شان"}],
+ "دوستت": [{ORTH: "دوست", NORM: "دوست"}, {ORTH: "ت", NORM: "ت"}],
+ "دوستش": [{ORTH: "دوست", NORM: "دوست"}, {ORTH: "ش", NORM: "ش"}],
+ "دومش": [{ORTH: "دوم", NORM: "دوم"}, {ORTH: "ش", NORM: "ش"}],
+ "دویدنش": [{ORTH: "دویدن", NORM: "دویدن"}, {ORTH: "ش", NORM: "ش"}],
+ "دکورهایمان": [{ORTH: "دکورهای", NORM: "دکورهای"}, {ORTH: "مان", NORM: "مان"}],
+ "دیدگاهش": [{ORTH: "دیدگاه", NORM: "دیدگاه"}, {ORTH: "ش", NORM: "ش"}],
+ "دیرت": [{ORTH: "دیر", NORM: "دیر"}, {ORTH: "ت", NORM: "ت"}],
+ "دیرم": [{ORTH: "دیر", NORM: "دیر"}, {ORTH: "م", NORM: "م"}],
+ "دینت": [{ORTH: "دین", NORM: "دین"}, {ORTH: "ت", NORM: "ت"}],
+ "دینش": [{ORTH: "دین", NORM: "دین"}, {ORTH: "ش", NORM: "ش"}],
+ "دینشان": [{ORTH: "دین", NORM: "دین"}, {ORTH: "شان", NORM: "شان"}],
+ "دیوارههایش": [{ORTH: "دیوارههای", NORM: "دیوارههای"}, {ORTH: "ش", NORM: "ش"}],
+ "دیوانهای": [{ORTH: "دیوانه", NORM: "دیوانه"}, {ORTH: "ای", NORM: "ای"}],
+ "دیوی": [{ORTH: "دیو", NORM: "دیو"}, {ORTH: "ی", NORM: "ی"}],
+ "دیگرم": [{ORTH: "دیگر", NORM: "دیگر"}, {ORTH: "م", NORM: "م"}],
+ "دیگرمان": [{ORTH: "دیگر", NORM: "دیگر"}, {ORTH: "مان", NORM: "مان"}],
+ "ذهنش": [{ORTH: "ذهن", NORM: "ذهن"}, {ORTH: "ش", NORM: "ش"}],
+ "ذهنشان": [{ORTH: "ذهن", NORM: "ذهن"}, {ORTH: "شان", NORM: "شان"}],
+ "ذهنم": [{ORTH: "ذهن", NORM: "ذهن"}, {ORTH: "م", NORM: "م"}],
+ "رئوسش": [{ORTH: "رئوس", NORM: "رئوس"}, {ORTH: "ش", NORM: "ش"}],
+ "راهشان": [{ORTH: "راه", NORM: "راه"}, {ORTH: "شان", NORM: "شان"}],
+ "راهگشاست": [{ORTH: "راهگشا", NORM: "راهگشا"}, {ORTH: "ست", NORM: "ست"}],
+ "رایانههایشان": [
+ {ORTH: "رایانههای", NORM: "رایانههای"},
+ {ORTH: "شان", NORM: "شان"},
+ ],
+ "رعایتشان": [{ORTH: "رعایت", NORM: "رعایت"}, {ORTH: "شان", NORM: "شان"}],
+ "رفتارش": [{ORTH: "رفتار", NORM: "رفتار"}, {ORTH: "ش", NORM: "ش"}],
+ "رفتارشان": [{ORTH: "رفتار", NORM: "رفتار"}, {ORTH: "شان", NORM: "شان"}],
+ "رفتارمان": [{ORTH: "رفتار", NORM: "رفتار"}, {ORTH: "مان", NORM: "مان"}],
+ "رفتارهاست": [{ORTH: "رفتارها", NORM: "رفتارها"}, {ORTH: "ست", NORM: "ست"}],
+ "رفتارهایشان": [{ORTH: "رفتارهای", NORM: "رفتارهای"}, {ORTH: "شان", NORM: "شان"}],
+ "رفقایم": [{ORTH: "رفقا", NORM: "رفقا"}, {ORTH: "یم", NORM: "یم"}],
+ "رقیقترش": [{ORTH: "رقیقتر", NORM: "رقیقتر"}, {ORTH: "ش", NORM: "ش"}],
+ "رنجند": [{ORTH: "رنج", NORM: "رنج"}, {ORTH: "ند", NORM: "ند"}],
+ "رهگشاست": [{ORTH: "رهگشا", NORM: "رهگشا"}, {ORTH: "ست", NORM: "ست"}],
+ "رواست": [{ORTH: "روا", NORM: "روا"}, {ORTH: "ست", NORM: "ست"}],
+ "روبروست": [{ORTH: "روبرو", NORM: "روبرو"}, {ORTH: "ست", NORM: "ست"}],
+ "روحیاش": [{ORTH: "روحی", NORM: "روحی"}, {ORTH: "اش", NORM: "اش"}],
+ "روزنامهاش": [{ORTH: "روزنامه", NORM: "روزنامه"}, {ORTH: "اش", NORM: "اش"}],
+ "روزهست": [{ORTH: "روزه", NORM: "روزه"}, {ORTH: "ست", NORM: "ست"}],
+ "روسریاش": [{ORTH: "روسری", NORM: "روسری"}, {ORTH: "اش", NORM: "اش"}],
+ "روشتان": [{ORTH: "روش", NORM: "روش"}, {ORTH: "تان", NORM: "تان"}],
+ "رویش": [{ORTH: "روی", NORM: "روی"}, {ORTH: "ش", NORM: "ش"}],
+ "زبانش": [{ORTH: "زبان", NORM: "زبان"}, {ORTH: "ش", NORM: "ش"}],
+ "زحماتشان": [{ORTH: "زحمات", NORM: "زحمات"}, {ORTH: "شان", NORM: "شان"}],
+ "زدنهایشان": [{ORTH: "زدنهای", NORM: "زدنهای"}, {ORTH: "شان", NORM: "شان"}],
+ "زرنگشان": [{ORTH: "زرنگ", NORM: "زرنگ"}, {ORTH: "شان", NORM: "شان"}],
+ "زشتش": [{ORTH: "زشت", NORM: "زشت"}, {ORTH: "ش", NORM: "ش"}],
+ "زشتکارانند": [{ORTH: "زشتکاران", NORM: "زشتکاران"}, {ORTH: "ند", NORM: "ند"}],
+ "زلفش": [{ORTH: "زلف", NORM: "زلف"}, {ORTH: "ش", NORM: "ش"}],
+ "زمن": [{ORTH: "ز", NORM: "ز"}, {ORTH: "من", NORM: "من"}],
+ "زنبوریاش": [{ORTH: "زنبوری", NORM: "زنبوری"}, {ORTH: "اش", NORM: "اش"}],
+ "زندانم": [{ORTH: "زندان", NORM: "زندان"}, {ORTH: "م", NORM: "م"}],
+ "زندهام": [{ORTH: "زنده", NORM: "زنده"}, {ORTH: "ام", NORM: "ام"}],
+ "زندگانیاش": [{ORTH: "زندگانی", NORM: "زندگانی"}, {ORTH: "اش", NORM: "اش"}],
+ "زندگیاش": [{ORTH: "زندگی", NORM: "زندگی"}, {ORTH: "اش", NORM: "اش"}],
+ "زندگیام": [{ORTH: "زندگی", NORM: "زندگی"}, {ORTH: "ام", NORM: "ام"}],
+ "زندگیشان": [{ORTH: "زندگی", NORM: "زندگی"}, {ORTH: "شان", NORM: "شان"}],
+ "زنش": [{ORTH: "زن", NORM: "زن"}, {ORTH: "ش", NORM: "ش"}],
+ "زنند": [{ORTH: "زن", NORM: "زن"}, {ORTH: "ند", NORM: "ند"}],
+ "زو": [{ORTH: "ز", NORM: "ز"}, {ORTH: "و", NORM: "و"}],
+ "زیاده": [{ORTH: "زیاد", NORM: "زیاد"}, {ORTH: "ه", NORM: "ه"}],
+ "زیباست": [{ORTH: "زیبا", NORM: "زیبا"}, {ORTH: "ست", NORM: "ست"}],
+ "زیبایش": [{ORTH: "زیبای", NORM: "زیبای"}, {ORTH: "ش", NORM: "ش"}],
+ "زیبایی": [{ORTH: "زیبای", NORM: "زیبای"}, {ORTH: "ی", NORM: "ی"}],
+ "زیربناست": [{ORTH: "زیربنا", NORM: "زیربنا"}, {ORTH: "ست", NORM: "ست"}],
+ "زیرکاند": [{ORTH: "زیرک", NORM: "زیرک"}, {ORTH: "اند", NORM: "اند"}],
+ "سؤالتان": [{ORTH: "سؤال", NORM: "سؤال"}, {ORTH: "تان", NORM: "تان"}],
+ "سؤالم": [{ORTH: "سؤال", NORM: "سؤال"}, {ORTH: "م", NORM: "م"}],
+ "سابقهاش": [{ORTH: "سابقه", NORM: "سابقه"}, {ORTH: "اش", NORM: "اش"}],
+ "ساختنم": [{ORTH: "ساختن", NORM: "ساختن"}, {ORTH: "م", NORM: "م"}],
+ "سادهاش": [{ORTH: "ساده", NORM: "ساده"}, {ORTH: "اش", NORM: "اش"}],
+ "سادهاند": [{ORTH: "ساده", NORM: "ساده"}, {ORTH: "اند", NORM: "اند"}],
+ "سازمانش": [{ORTH: "سازمان", NORM: "سازمان"}, {ORTH: "ش", NORM: "ش"}],
+ "ساعتم": [{ORTH: "ساعت", NORM: "ساعت"}, {ORTH: "م", NORM: "م"}],
+ "سالته": [
+ {ORTH: "سال", NORM: "سال"},
+ {ORTH: "ت", NORM: "ت"},
+ {ORTH: "ه", NORM: "ه"},
+ ],
+ "سالش": [{ORTH: "سال", NORM: "سال"}, {ORTH: "ش", NORM: "ش"}],
+ "سالهاست": [{ORTH: "سالها", NORM: "سالها"}, {ORTH: "ست", NORM: "ست"}],
+ "سالهاش": [{ORTH: "ساله", NORM: "ساله"}, {ORTH: "اش", NORM: "اش"}],
+ "ساکتند": [{ORTH: "ساکت", NORM: "ساکت"}, {ORTH: "ند", NORM: "ند"}],
+ "ساکنند": [{ORTH: "ساکن", NORM: "ساکن"}, {ORTH: "ند", NORM: "ند"}],
+ "سبزشان": [{ORTH: "سبز", NORM: "سبز"}, {ORTH: "شان", NORM: "شان"}],
+ "سبیلمان": [{ORTH: "سبیل", NORM: "سبیل"}, {ORTH: "مان", NORM: "مان"}],
+ "ستمهایش": [{ORTH: "ستمهای", NORM: "ستمهای"}, {ORTH: "ش", NORM: "ش"}],
+ "سخنانش": [{ORTH: "سخنان", NORM: "سخنان"}, {ORTH: "ش", NORM: "ش"}],
+ "سخنانشان": [{ORTH: "سخنان", NORM: "سخنان"}, {ORTH: "شان", NORM: "شان"}],
+ "سخنتان": [{ORTH: "سخن", NORM: "سخن"}, {ORTH: "تان", NORM: "تان"}],
+ "سخنش": [{ORTH: "سخن", NORM: "سخن"}, {ORTH: "ش", NORM: "ش"}],
+ "سخنم": [{ORTH: "سخن", NORM: "سخن"}, {ORTH: "م", NORM: "م"}],
+ "سردش": [{ORTH: "سرد", NORM: "سرد"}, {ORTH: "ش", NORM: "ش"}],
+ "سرزمینشان": [{ORTH: "سرزمین", NORM: "سرزمین"}, {ORTH: "شان", NORM: "شان"}],
+ "سرش": [{ORTH: "سر", NORM: "سر"}, {ORTH: "ش", NORM: "ش"}],
+ "سرمایهدارهاست": [
+ {ORTH: "سرمایهدارها", NORM: "سرمایهدارها"},
+ {ORTH: "ست", NORM: "ست"},
+ ],
+ "سرنوشتش": [{ORTH: "سرنوشت", NORM: "سرنوشت"}, {ORTH: "ش", NORM: "ش"}],
+ "سرنوشتشان": [{ORTH: "سرنوشت", NORM: "سرنوشت"}, {ORTH: "شان", NORM: "شان"}],
+ "سروتهش": [{ORTH: "سروته", NORM: "سروته"}, {ORTH: "ش", NORM: "ش"}],
+ "سرچشمهاش": [{ORTH: "سرچشمه", NORM: "سرچشمه"}, {ORTH: "اش", NORM: "اش"}],
+ "سقمش": [{ORTH: "سقم", NORM: "سقم"}, {ORTH: "ش", NORM: "ش"}],
+ "سنش": [{ORTH: "سن", NORM: "سن"}, {ORTH: "ش", NORM: "ش"}],
+ "سپاهش": [{ORTH: "سپاه", NORM: "سپاه"}, {ORTH: "ش", NORM: "ش"}],
+ "سیاسیشان": [{ORTH: "سیاسی", NORM: "سیاسی"}, {ORTH: "شان", NORM: "شان"}],
+ "سیاهچالههاست": [
+ {ORTH: "سیاهچالهها", NORM: "سیاهچالهها"},
+ {ORTH: "ست", NORM: "ست"},
+ ],
+ "شاخههایشان": [{ORTH: "شاخههای", NORM: "شاخههای"}, {ORTH: "شان", NORM: "شان"}],
+ "شالودهاش": [{ORTH: "شالوده", NORM: "شالوده"}, {ORTH: "اش", NORM: "اش"}],
+ "شانههایش": [{ORTH: "شانههای", NORM: "شانههای"}, {ORTH: "ش", NORM: "ش"}],
+ "شاهدیم": [{ORTH: "شاهد", NORM: "شاهد"}, {ORTH: "یم", NORM: "یم"}],
+ "شاهکارهایش": [{ORTH: "شاهکارهای", NORM: "شاهکارهای"}, {ORTH: "ش", NORM: "ش"}],
+ "شخصیتش": [{ORTH: "شخصیت", NORM: "شخصیت"}, {ORTH: "ش", NORM: "ش"}],
+ "شدنشان": [{ORTH: "شدن", NORM: "شدن"}, {ORTH: "شان", NORM: "شان"}],
+ "شرکتیست": [{ORTH: "شرکتی", NORM: "شرکتی"}, {ORTH: "ست", NORM: "ست"}],
+ "شعارهاشان": [{ORTH: "شعارها", NORM: "شعارها"}, {ORTH: "شان", NORM: "شان"}],
+ "شعورش": [{ORTH: "شعور", NORM: "شعور"}, {ORTH: "ش", NORM: "ش"}],
+ "شغلش": [{ORTH: "شغل", NORM: "شغل"}, {ORTH: "ش", NORM: "ش"}],
+ "شماست": [{ORTH: "شما", NORM: "شما"}, {ORTH: "ست", NORM: "ست"}],
+ "شمشیرش": [{ORTH: "شمشیر", NORM: "شمشیر"}, {ORTH: "ش", NORM: "ش"}],
+ "شنیدنش": [{ORTH: "شنیدن", NORM: "شنیدن"}, {ORTH: "ش", NORM: "ش"}],
+ "شوراست": [{ORTH: "شورا", NORM: "شورا"}, {ORTH: "ست", NORM: "ست"}],
+ "شومت": [{ORTH: "شوم", NORM: "شوم"}, {ORTH: "ت", NORM: "ت"}],
+ "شیرینترش": [{ORTH: "شیرینتر", NORM: "شیرینتر"}, {ORTH: "ش", NORM: "ش"}],
+ "شیطاناند": [{ORTH: "شیطان", NORM: "شیطان"}, {ORTH: "اند", NORM: "اند"}],
+ "شیوههاست": [{ORTH: "شیوهها", NORM: "شیوهها"}, {ORTH: "ست", NORM: "ست"}],
+ "صاحبش": [{ORTH: "صاحب", NORM: "صاحب"}, {ORTH: "ش", NORM: "ش"}],
+ "صحنهاش": [{ORTH: "صحنه", NORM: "صحنه"}, {ORTH: "اش", NORM: "اش"}],
+ "صدایش": [{ORTH: "صدای", NORM: "صدای"}, {ORTH: "ش", NORM: "ش"}],
+ "صددند": [{ORTH: "صدد", NORM: "صدد"}, {ORTH: "ند", NORM: "ند"}],
+ "صندوقهاست": [{ORTH: "صندوقها", NORM: "صندوقها"}, {ORTH: "ست", NORM: "ست"}],
+ "صندوقهایش": [{ORTH: "صندوقهای", NORM: "صندوقهای"}, {ORTH: "ش", NORM: "ش"}],
+ "صورتش": [{ORTH: "صورت", NORM: "صورت"}, {ORTH: "ش", NORM: "ش"}],
+ "ضروریاند": [{ORTH: "ضروری", NORM: "ضروری"}, {ORTH: "اند", NORM: "اند"}],
+ "ضمیرش": [{ORTH: "ضمیر", NORM: "ضمیر"}, {ORTH: "ش", NORM: "ش"}],
+ "طرفش": [{ORTH: "طرف", NORM: "طرف"}, {ORTH: "ش", NORM: "ش"}],
+ "طلسمش": [{ORTH: "طلسم", NORM: "طلسم"}, {ORTH: "ش", NORM: "ش"}],
+ "طوره": [{ORTH: "طور", NORM: "طور"}, {ORTH: "ه", NORM: "ه"}],
+ "عاشوراست": [{ORTH: "عاشورا", NORM: "عاشورا"}, {ORTH: "ست", NORM: "ست"}],
+ "عبارتند": [{ORTH: "عبارت", NORM: "عبارت"}, {ORTH: "ند", NORM: "ند"}],
+ "عزیزانتان": [{ORTH: "عزیزان", NORM: "عزیزان"}, {ORTH: "تان", NORM: "تان"}],
+ "عزیزانش": [{ORTH: "عزیزان", NORM: "عزیزان"}, {ORTH: "ش", NORM: "ش"}],
+ "عزیزش": [{ORTH: "عزیز", NORM: "عزیز"}, {ORTH: "ش", NORM: "ش"}],
+ "عشرتطلبیاش": [
+ {ORTH: "عشرتطلبی", NORM: "عشرتطلبی"},
+ {ORTH: "اش", NORM: "اش"},
+ ],
+ "عقبیم": [{ORTH: "عقب", NORM: "عقب"}, {ORTH: "یم", NORM: "یم"}],
+ "علاقهاش": [{ORTH: "علاقه", NORM: "علاقه"}, {ORTH: "اش", NORM: "اش"}],
+ "علمیمان": [{ORTH: "علمی", NORM: "علمی"}, {ORTH: "مان", NORM: "مان"}],
+ "عمرش": [{ORTH: "عمر", NORM: "عمر"}, {ORTH: "ش", NORM: "ش"}],
+ "عمرشان": [{ORTH: "عمر", NORM: "عمر"}, {ORTH: "شان", NORM: "شان"}],
+ "عملش": [{ORTH: "عمل", NORM: "عمل"}, {ORTH: "ش", NORM: "ش"}],
+ "عملیاند": [{ORTH: "عملی", NORM: "عملی"}, {ORTH: "اند", NORM: "اند"}],
+ "عمویت": [{ORTH: "عموی", NORM: "عموی"}, {ORTH: "ت", NORM: "ت"}],
+ "عمویش": [{ORTH: "عموی", NORM: "عموی"}, {ORTH: "ش", NORM: "ش"}],
+ "عمیقش": [{ORTH: "عمیق", NORM: "عمیق"}, {ORTH: "ش", NORM: "ش"}],
+ "عواملش": [{ORTH: "عوامل", NORM: "عوامل"}, {ORTH: "ش", NORM: "ش"}],
+ "عوضشان": [{ORTH: "عوض", NORM: "عوض"}, {ORTH: "شان", NORM: "شان"}],
+ "غذاییشان": [{ORTH: "غذایی", NORM: "غذایی"}, {ORTH: "شان", NORM: "شان"}],
+ "غریبهاند": [{ORTH: "غریبه", NORM: "غریبه"}, {ORTH: "اند", NORM: "اند"}],
+ "غلامانش": [{ORTH: "غلامان", NORM: "غلامان"}, {ORTH: "ش", NORM: "ش"}],
+ "غلطهاست": [{ORTH: "غلطها", NORM: "غلطها"}, {ORTH: "ست", NORM: "ست"}],
+ "فراموشتان": [{ORTH: "فراموش", NORM: "فراموش"}, {ORTH: "تان", NORM: "تان"}],
+ "فردیاند": [{ORTH: "فردی", NORM: "فردی"}, {ORTH: "اند", NORM: "اند"}],
+ "فرزندانش": [{ORTH: "فرزندان", NORM: "فرزندان"}, {ORTH: "ش", NORM: "ش"}],
+ "فرزندش": [{ORTH: "فرزند", NORM: "فرزند"}, {ORTH: "ش", NORM: "ش"}],
+ "فرمهایش": [{ORTH: "فرمهای", NORM: "فرمهای"}, {ORTH: "ش", NORM: "ش"}],
+ "فرهنگیمان": [{ORTH: "فرهنگی", NORM: "فرهنگی"}, {ORTH: "مان", NORM: "مان"}],
+ "فریادشان": [{ORTH: "فریاد", NORM: "فریاد"}, {ORTH: "شان", NORM: "شان"}],
+ "فضاییشان": [{ORTH: "فضایی", NORM: "فضایی"}, {ORTH: "شان", NORM: "شان"}],
+ "فقیرشان": [{ORTH: "فقیر", NORM: "فقیر"}, {ORTH: "شان", NORM: "شان"}],
+ "فوریشان": [{ORTH: "فوری", NORM: "فوری"}, {ORTH: "شان", NORM: "شان"}],
+ "قائلند": [{ORTH: "قائل", NORM: "قائل"}, {ORTH: "ند", NORM: "ند"}],
+ "قائلیم": [{ORTH: "قائل", NORM: "قائل"}, {ORTH: "یم", NORM: "یم"}],
+ "قادرند": [{ORTH: "قادر", NORM: "قادر"}, {ORTH: "ند", NORM: "ند"}],
+ "قانونمندش": [{ORTH: "قانونمند", NORM: "قانونمند"}, {ORTH: "ش", NORM: "ش"}],
+ "قبلند": [{ORTH: "قبل", NORM: "قبل"}, {ORTH: "ند", NORM: "ند"}],
+ "قبلیاش": [{ORTH: "قبلی", NORM: "قبلی"}, {ORTH: "اش", NORM: "اش"}],
+ "قبلیمان": [{ORTH: "قبلی", NORM: "قبلی"}, {ORTH: "مان", NORM: "مان"}],
+ "قدریست": [{ORTH: "قدری", NORM: "قدری"}, {ORTH: "ست", NORM: "ست"}],
+ "قدمش": [{ORTH: "قدم", NORM: "قدم"}, {ORTH: "ش", NORM: "ش"}],
+ "قسمتش": [{ORTH: "قسمت", NORM: "قسمت"}, {ORTH: "ش", NORM: "ش"}],
+ "قضایاست": [{ORTH: "قضایا", NORM: "قضایا"}, {ORTH: "ست", NORM: "ست"}],
+ "قضیهشان": [{ORTH: "قضیه", NORM: "قضیه"}, {ORTH: "شان", NORM: "شان"}],
+ "قهرمانهایشان": [
+ {ORTH: "قهرمانهای", NORM: "قهرمانهای"},
+ {ORTH: "شان", NORM: "شان"},
+ ],
+ "قهرمانیش": [{ORTH: "قهرمانی", NORM: "قهرمانی"}, {ORTH: "ش", NORM: "ش"}],
+ "قومت": [{ORTH: "قوم", NORM: "قوم"}, {ORTH: "ت", NORM: "ت"}],
+ "لازمهاش": [{ORTH: "لازمه", NORM: "لازمه"}, {ORTH: "اش", NORM: "اش"}],
+ "مأموریتش": [{ORTH: "مأموریت", NORM: "مأموریت"}, {ORTH: "ش", NORM: "ش"}],
+ "مأموریتم": [{ORTH: "مأموریت", NORM: "مأموریت"}, {ORTH: "م", NORM: "م"}],
+ "مأموریتاند": [{ORTH: "مأموریت", NORM: "مأموریت"}, {ORTH: "اند", NORM: "اند"}],
+ "مادرانشان": [{ORTH: "مادران", NORM: "مادران"}, {ORTH: "شان", NORM: "شان"}],
+ "مادرت": [{ORTH: "مادر", NORM: "مادر"}, {ORTH: "ت", NORM: "ت"}],
+ "مادرش": [{ORTH: "مادر", NORM: "مادر"}, {ORTH: "ش", NORM: "ش"}],
+ "مادرم": [{ORTH: "مادر", NORM: "مادر"}, {ORTH: "م", NORM: "م"}],
+ "ماست": [{ORTH: "ما", NORM: "ما"}, {ORTH: "ست", NORM: "ست"}],
+ "مالیاش": [{ORTH: "مالی", NORM: "مالی"}, {ORTH: "اش", NORM: "اش"}],
+ "ماهیتش": [{ORTH: "ماهیت", NORM: "ماهیت"}, {ORTH: "ش", NORM: "ش"}],
+ "مایی": [{ORTH: "ما", NORM: "ما"}, {ORTH: "یی", NORM: "یی"}],
+ "مجازاتش": [{ORTH: "مجازات", NORM: "مجازات"}, {ORTH: "ش", NORM: "ش"}],
+ "مجبورند": [{ORTH: "مجبور", NORM: "مجبور"}, {ORTH: "ند", NORM: "ند"}],
+ "محتاجند": [{ORTH: "محتاج", NORM: "محتاج"}, {ORTH: "ند", NORM: "ند"}],
+ "محرمم": [{ORTH: "محرم", NORM: "محرم"}, {ORTH: "م", NORM: "م"}],
+ "محلش": [{ORTH: "محل", NORM: "محل"}, {ORTH: "ش", NORM: "ش"}],
+ "مخالفند": [{ORTH: "مخالف", NORM: "مخالف"}, {ORTH: "ند", NORM: "ند"}],
+ "مخدرش": [{ORTH: "مخدر", NORM: "مخدر"}, {ORTH: "ش", NORM: "ش"}],
+ "مدتهاست": [{ORTH: "مدتها", NORM: "مدتها"}, {ORTH: "ست", NORM: "ست"}],
+ "مدرسهات": [{ORTH: "مدرسه", NORM: "مدرسه"}, {ORTH: "ات", NORM: "ات"}],
+ "مدرکم": [{ORTH: "مدرک", NORM: "مدرک"}, {ORTH: "م", NORM: "م"}],
+ "مدیرانش": [{ORTH: "مدیران", NORM: "مدیران"}, {ORTH: "ش", NORM: "ش"}],
+ "مدیونم": [{ORTH: "مدیون", NORM: "مدیون"}, {ORTH: "م", NORM: "م"}],
+ "مذهبیاند": [{ORTH: "مذهبی", NORM: "مذهبی"}, {ORTH: "اند", NORM: "اند"}],
+ "مرا": [{ORTH: "م", NORM: "م"}, {ORTH: "را", NORM: "را"}],
+ "مرادت": [{ORTH: "مراد", NORM: "مراد"}, {ORTH: "ت", NORM: "ت"}],
+ "مردمشان": [{ORTH: "مردم", NORM: "مردم"}, {ORTH: "شان", NORM: "شان"}],
+ "مردمند": [{ORTH: "مردم", NORM: "مردم"}, {ORTH: "ند", NORM: "ند"}],
+ "مردماند": [{ORTH: "مردم", NORM: "مردم"}, {ORTH: "اند", NORM: "اند"}],
+ "مرزشان": [{ORTH: "مرز", NORM: "مرز"}, {ORTH: "شان", NORM: "شان"}],
+ "مرزهاشان": [{ORTH: "مرزها", NORM: "مرزها"}, {ORTH: "شان", NORM: "شان"}],
+ "مزدورش": [{ORTH: "مزدور", NORM: "مزدور"}, {ORTH: "ش", NORM: "ش"}],
+ "مسئولیتش": [{ORTH: "مسئولیت", NORM: "مسئولیت"}, {ORTH: "ش", NORM: "ش"}],
+ "مسائلش": [{ORTH: "مسائل", NORM: "مسائل"}, {ORTH: "ش", NORM: "ش"}],
+ "مستحضرید": [{ORTH: "مستحضر", NORM: "مستحضر"}, {ORTH: "ید", NORM: "ید"}],
+ "مسلمانم": [{ORTH: "مسلمان", NORM: "مسلمان"}, {ORTH: "م", NORM: "م"}],
+ "مسلمانند": [{ORTH: "مسلمان", NORM: "مسلمان"}, {ORTH: "ند", NORM: "ند"}],
+ "مشتریانش": [{ORTH: "مشتریان", NORM: "مشتریان"}, {ORTH: "ش", NORM: "ش"}],
+ "مشتهایمان": [{ORTH: "مشتهای", NORM: "مشتهای"}, {ORTH: "مان", NORM: "مان"}],
+ "مشخصند": [{ORTH: "مشخص", NORM: "مشخص"}, {ORTH: "ند", NORM: "ند"}],
+ "مشغولند": [{ORTH: "مشغول", NORM: "مشغول"}, {ORTH: "ند", NORM: "ند"}],
+ "مشغولیم": [{ORTH: "مشغول", NORM: "مشغول"}, {ORTH: "یم", NORM: "یم"}],
+ "مشهورش": [{ORTH: "مشهور", NORM: "مشهور"}, {ORTH: "ش", NORM: "ش"}],
+ "مشکلاتشان": [{ORTH: "مشکلات", NORM: "مشکلات"}, {ORTH: "شان", NORM: "شان"}],
+ "مشکلم": [{ORTH: "مشکل", NORM: "مشکل"}, {ORTH: "م", NORM: "م"}],
+ "مطمئنم": [{ORTH: "مطمئن", NORM: "مطمئن"}, {ORTH: "م", NORM: "م"}],
+ "معاملهمان": [{ORTH: "معامله", NORM: "معامله"}, {ORTH: "مان", NORM: "مان"}],
+ "معتقدم": [{ORTH: "معتقد", NORM: "معتقد"}, {ORTH: "م", NORM: "م"}],
+ "معتقدند": [{ORTH: "معتقد", NORM: "معتقد"}, {ORTH: "ند", NORM: "ند"}],
+ "معتقدیم": [{ORTH: "معتقد", NORM: "معتقد"}, {ORTH: "یم", NORM: "یم"}],
+ "معرفیاش": [{ORTH: "معرفی", NORM: "معرفی"}, {ORTH: "اش", NORM: "اش"}],
+ "معروفش": [{ORTH: "معروف", NORM: "معروف"}, {ORTH: "ش", NORM: "ش"}],
+ "معضلاتمان": [{ORTH: "معضلات", NORM: "معضلات"}, {ORTH: "مان", NORM: "مان"}],
+ "معلمش": [{ORTH: "معلم", NORM: "معلم"}, {ORTH: "ش", NORM: "ش"}],
+ "معنایش": [{ORTH: "معنای", NORM: "معنای"}, {ORTH: "ش", NORM: "ش"}],
+ "مغزشان": [{ORTH: "مغز", NORM: "مغز"}, {ORTH: "شان", NORM: "شان"}],
+ "مفیدند": [{ORTH: "مفید", NORM: "مفید"}, {ORTH: "ند", NORM: "ند"}],
+ "مقابلش": [{ORTH: "مقابل", NORM: "مقابل"}, {ORTH: "ش", NORM: "ش"}],
+ "مقالهاش": [{ORTH: "مقاله", NORM: "مقاله"}, {ORTH: "اش", NORM: "اش"}],
+ "مقدمش": [{ORTH: "مقدم", NORM: "مقدم"}, {ORTH: "ش", NORM: "ش"}],
+ "مقرش": [{ORTH: "مقر", NORM: "مقر"}, {ORTH: "ش", NORM: "ش"}],
+ "مقصدشان": [{ORTH: "مقصد", NORM: "مقصد"}, {ORTH: "شان", NORM: "شان"}],
+ "مقصرند": [{ORTH: "مقصر", NORM: "مقصر"}, {ORTH: "ند", NORM: "ند"}],
+ "مقصودتان": [{ORTH: "مقصود", NORM: "مقصود"}, {ORTH: "تان", NORM: "تان"}],
+ "ملاقاتهایش": [{ORTH: "ملاقاتهای", NORM: "ملاقاتهای"}, {ORTH: "ش", NORM: "ش"}],
+ "ممکنشان": [{ORTH: "ممکن", NORM: "ممکن"}, {ORTH: "شان", NORM: "شان"}],
+ "ممیزیهاست": [{ORTH: "ممیزیها", NORM: "ممیزیها"}, {ORTH: "ست", NORM: "ست"}],
+ "منظورم": [{ORTH: "منظور", NORM: "منظور"}, {ORTH: "م", NORM: "م"}],
+ "منی": [{ORTH: "من", NORM: "من"}, {ORTH: "ی", NORM: "ی"}],
+ "منید": [{ORTH: "من", NORM: "من"}, {ORTH: "ید", NORM: "ید"}],
+ "مهربانش": [{ORTH: "مهربان", NORM: "مهربان"}, {ORTH: "ش", NORM: "ش"}],
+ "مهماند": [{ORTH: "مهم", NORM: "مهم"}, {ORTH: "اند", NORM: "اند"}],
+ "مواجهند": [{ORTH: "مواجه", NORM: "مواجه"}, {ORTH: "ند", NORM: "ند"}],
+ "مواجهاند": [{ORTH: "مواجه", NORM: "مواجه"}, {ORTH: "اند", NORM: "اند"}],
+ "مواخذهات": [{ORTH: "مواخذه", NORM: "مواخذه"}, {ORTH: "ات", NORM: "ات"}],
+ "مواضعشان": [{ORTH: "مواضع", NORM: "مواضع"}, {ORTH: "شان", NORM: "شان"}],
+ "مواضعمان": [{ORTH: "مواضع", NORM: "مواضع"}, {ORTH: "مان", NORM: "مان"}],
+ "موافقند": [{ORTH: "موافق", NORM: "موافق"}, {ORTH: "ند", NORM: "ند"}],
+ "موجوداتش": [{ORTH: "موجودات", NORM: "موجودات"}, {ORTH: "ش", NORM: "ش"}],
+ "موجودند": [{ORTH: "موجود", NORM: "موجود"}, {ORTH: "ند", NORM: "ند"}],
+ "موردش": [{ORTH: "مورد", NORM: "مورد"}, {ORTH: "ش", NORM: "ش"}],
+ "موضعشان": [{ORTH: "موضع", NORM: "موضع"}, {ORTH: "شان", NORM: "شان"}],
+ "موظفند": [{ORTH: "موظف", NORM: "موظف"}, {ORTH: "ند", NORM: "ند"}],
+ "موهایش": [{ORTH: "موهای", NORM: "موهای"}, {ORTH: "ش", NORM: "ش"}],
+ "موهایمان": [{ORTH: "موهای", NORM: "موهای"}, {ORTH: "مان", NORM: "مان"}],
+ "مویم": [{ORTH: "مو", NORM: "مو"}, {ORTH: "یم", NORM: "یم"}],
+ "ناخرسندند": [{ORTH: "ناخرسند", NORM: "ناخرسند"}, {ORTH: "ند", NORM: "ند"}],
+ "ناراحتیش": [{ORTH: "ناراحتی", NORM: "ناراحتی"}, {ORTH: "ش", NORM: "ش"}],
+ "ناراضیاند": [{ORTH: "ناراضی", NORM: "ناراضی"}, {ORTH: "اند", NORM: "اند"}],
+ "نارواست": [{ORTH: "ناروا", NORM: "ناروا"}, {ORTH: "ست", NORM: "ست"}],
+ "نازش": [{ORTH: "ناز", NORM: "ناز"}, {ORTH: "ش", NORM: "ش"}],
+ "نامش": [{ORTH: "نام", NORM: "نام"}, {ORTH: "ش", NORM: "ش"}],
+ "نامشان": [{ORTH: "نام", NORM: "نام"}, {ORTH: "شان", NORM: "شان"}],
+ "نامم": [{ORTH: "نام", NORM: "نام"}, {ORTH: "م", NORM: "م"}],
+ "نامهات": [{ORTH: "نامه", NORM: "نامه"}, {ORTH: "ات", NORM: "ات"}],
+ "نامهام": [{ORTH: "نامه", NORM: "نامه"}, {ORTH: "ام", NORM: "ام"}],
+ "ناچارم": [{ORTH: "ناچار", NORM: "ناچار"}, {ORTH: "م", NORM: "م"}],
+ "نخستوزیریاش": [
+ {ORTH: "نخستوزیری", NORM: "نخستوزیری"},
+ {ORTH: "اش", NORM: "اش"},
+ ],
+ "نزدش": [{ORTH: "نزد", NORM: "نزد"}, {ORTH: "ش", NORM: "ش"}],
+ "نشانم": [{ORTH: "نشان", NORM: "نشان"}, {ORTH: "م", NORM: "م"}],
+ "نظراتشان": [{ORTH: "نظرات", NORM: "نظرات"}, {ORTH: "شان", NORM: "شان"}],
+ "نظرتان": [{ORTH: "نظر", NORM: "نظر"}, {ORTH: "تان", NORM: "تان"}],
+ "نظرش": [{ORTH: "نظر", NORM: "نظر"}, {ORTH: "ش", NORM: "ش"}],
+ "نظرشان": [{ORTH: "نظر", NORM: "نظر"}, {ORTH: "شان", NORM: "شان"}],
+ "نظرم": [{ORTH: "نظر", NORM: "نظر"}, {ORTH: "م", NORM: "م"}],
+ "نظرهایشان": [{ORTH: "نظرهای", NORM: "نظرهای"}, {ORTH: "شان", NORM: "شان"}],
+ "نفاقش": [{ORTH: "نفاق", NORM: "نفاق"}, {ORTH: "ش", NORM: "ش"}],
+ "نفرند": [{ORTH: "نفر", NORM: "نفر"}, {ORTH: "ند", NORM: "ند"}],
+ "نفوذیند": [{ORTH: "نفوذی", NORM: "نفوذی"}, {ORTH: "ند", NORM: "ند"}],
+ "نقطهنظراتتان": [
+ {ORTH: "نقطهنظرات", NORM: "نقطهنظرات"},
+ {ORTH: "تان", NORM: "تان"},
+ ],
+ "نمایشیمان": [{ORTH: "نمایشی", NORM: "نمایشی"}, {ORTH: "مان", NORM: "مان"}],
+ "نمایندگیشان": [
+ {ORTH: "نمایندگی", NORM: "نمایندگی"},
+ {ORTH: "شان", NORM: "شان"},
+ ],
+ "نمونهاش": [{ORTH: "نمونه", NORM: "نمونه"}, {ORTH: "اش", NORM: "اش"}],
+ "نمیپذیرندش": [{ORTH: "نمیپذیرند", NORM: "نمیپذیرند"}, {ORTH: "ش", NORM: "ش"}],
+ "نوآوریاش": [{ORTH: "نوآوری", NORM: "نوآوری"}, {ORTH: "اش", NORM: "اش"}],
+ "نوشتههایشان": [
+ {ORTH: "نوشتههای", NORM: "نوشتههای"},
+ {ORTH: "شان", NORM: "شان"},
+ ],
+ "نوشتههایم": [{ORTH: "نوشتهها", NORM: "نوشتهها"}, {ORTH: "یم", NORM: "یم"}],
+ "نکردنشان": [{ORTH: "نکردن", NORM: "نکردن"}, {ORTH: "شان", NORM: "شان"}],
+ "نگاهداریشان": [
+ {ORTH: "نگاهداری", NORM: "نگاهداری"},
+ {ORTH: "شان", NORM: "شان"},
+ ],
+ "نگاهش": [{ORTH: "نگاه", NORM: "نگاه"}, {ORTH: "ش", NORM: "ش"}],
+ "نگرانم": [{ORTH: "نگران", NORM: "نگران"}, {ORTH: "م", NORM: "م"}],
+ "نگرشهایشان": [{ORTH: "نگرشهای", NORM: "نگرشهای"}, {ORTH: "شان", NORM: "شان"}],
+ "نیازمندند": [{ORTH: "نیازمند", NORM: "نیازمند"}, {ORTH: "ند", NORM: "ند"}],
+ "هدفش": [{ORTH: "هدف", NORM: "هدف"}, {ORTH: "ش", NORM: "ش"}],
+ "همانست": [{ORTH: "همان", NORM: "همان"}, {ORTH: "ست", NORM: "ست"}],
+ "همراهش": [{ORTH: "همراه", NORM: "همراه"}, {ORTH: "ش", NORM: "ش"}],
+ "همسرتان": [{ORTH: "همسر", NORM: "همسر"}, {ORTH: "تان", NORM: "تان"}],
+ "همسرش": [{ORTH: "همسر", NORM: "همسر"}, {ORTH: "ش", NORM: "ش"}],
+ "همسرم": [{ORTH: "همسر", NORM: "همسر"}, {ORTH: "م", NORM: "م"}],
+ "همفکرانش": [{ORTH: "همفکران", NORM: "همفکران"}, {ORTH: "ش", NORM: "ش"}],
+ "همهاش": [{ORTH: "همه", NORM: "همه"}, {ORTH: "اش", NORM: "اش"}],
+ "همهشان": [{ORTH: "همه", NORM: "همه"}, {ORTH: "شان", NORM: "شان"}],
+ "همکارانش": [{ORTH: "همکاران", NORM: "همکاران"}, {ORTH: "ش", NORM: "ش"}],
+ "همنظریم": [{ORTH: "همنظر", NORM: "همنظر"}, {ORTH: "یم", NORM: "یم"}],
+ "هنرش": [{ORTH: "هنر", NORM: "هنر"}, {ORTH: "ش", NORM: "ش"}],
+ "هواست": [{ORTH: "هوا", NORM: "هوا"}, {ORTH: "ست", NORM: "ست"}],
+ "هویتش": [{ORTH: "هویت", NORM: "هویت"}, {ORTH: "ش", NORM: "ش"}],
+ "وابستهاند": [{ORTH: "وابسته", NORM: "وابسته"}, {ORTH: "اند", NORM: "اند"}],
+ "واقفند": [{ORTH: "واقف", NORM: "واقف"}, {ORTH: "ند", NORM: "ند"}],
+ "والدینشان": [{ORTH: "والدین", NORM: "والدین"}, {ORTH: "شان", NORM: "شان"}],
+ "وجدانتان": [{ORTH: "وجدان", NORM: "وجدان"}, {ORTH: "تان", NORM: "تان"}],
+ "وجودشان": [{ORTH: "وجود", NORM: "وجود"}, {ORTH: "شان", NORM: "شان"}],
+ "وطنم": [{ORTH: "وطن", NORM: "وطن"}, {ORTH: "م", NORM: "م"}],
+ "وعدهاش": [{ORTH: "وعده", NORM: "وعده"}, {ORTH: "اش", NORM: "اش"}],
+ "وقتمان": [{ORTH: "وقت", NORM: "وقت"}, {ORTH: "مان", NORM: "مان"}],
+ "ولادتش": [{ORTH: "ولادت", NORM: "ولادت"}, {ORTH: "ش", NORM: "ش"}],
+ "پایانش": [{ORTH: "پایان", NORM: "پایان"}, {ORTH: "ش", NORM: "ش"}],
+ "پایش": [{ORTH: "پای", NORM: "پای"}, {ORTH: "ش", NORM: "ش"}],
+ "پایینترند": [{ORTH: "پایینتر", NORM: "پایینتر"}, {ORTH: "ند", NORM: "ند"}],
+ "پدرت": [{ORTH: "پدر", NORM: "پدر"}, {ORTH: "ت", NORM: "ت"}],
+ "پدرش": [{ORTH: "پدر", NORM: "پدر"}, {ORTH: "ش", NORM: "ش"}],
+ "پدرشان": [{ORTH: "پدر", NORM: "پدر"}, {ORTH: "شان", NORM: "شان"}],
+ "پدرم": [{ORTH: "پدر", NORM: "پدر"}, {ORTH: "م", NORM: "م"}],
+ "پربارش": [{ORTH: "پربار", NORM: "پربار"}, {ORTH: "ش", NORM: "ش"}],
+ "پروردگارت": [{ORTH: "پروردگار", NORM: "پروردگار"}, {ORTH: "ت", NORM: "ت"}],
+ "پسرتان": [{ORTH: "پسر", NORM: "پسر"}, {ORTH: "تان", NORM: "تان"}],
+ "پسرش": [{ORTH: "پسر", NORM: "پسر"}, {ORTH: "ش", NORM: "ش"}],
+ "پسرعمویش": [{ORTH: "پسرعموی", NORM: "پسرعموی"}, {ORTH: "ش", NORM: "ش"}],
+ "پسرعمویت": [{ORTH: "پسرعموی", NORM: "پسرعموی"}, {ORTH: "ت", NORM: "ت"}],
+ "پشتش": [{ORTH: "پشت", NORM: "پشت"}, {ORTH: "ش", NORM: "ش"}],
+ "پشیمونی": [{ORTH: "پشیمون", NORM: "پشیمون"}, {ORTH: "ی", NORM: "ی"}],
+ "پولش": [{ORTH: "پول", NORM: "پول"}, {ORTH: "ش", NORM: "ش"}],
+ "پژوهشهایش": [{ORTH: "پژوهشهای", NORM: "پژوهشهای"}, {ORTH: "ش", NORM: "ش"}],
+ "پیامبرش": [{ORTH: "پیامبر", NORM: "پیامبر"}, {ORTH: "ش", NORM: "ش"}],
+ "پیامبری": [{ORTH: "پیامبر", NORM: "پیامبر"}, {ORTH: "ی", NORM: "ی"}],
+ "پیامش": [{ORTH: "پیام", NORM: "پیام"}, {ORTH: "ش", NORM: "ش"}],
+ "پیداست": [{ORTH: "پیدا", NORM: "پیدا"}, {ORTH: "ست", NORM: "ست"}],
+ "پیراهنش": [{ORTH: "پیراهن", NORM: "پیراهن"}, {ORTH: "ش", NORM: "ش"}],
+ "پیروانش": [{ORTH: "پیروان", NORM: "پیروان"}, {ORTH: "ش", NORM: "ش"}],
+ "پیشانیاش": [{ORTH: "پیشانی", NORM: "پیشانی"}, {ORTH: "اش", NORM: "اش"}],
+ "پیمانت": [{ORTH: "پیمان", NORM: "پیمان"}, {ORTH: "ت", NORM: "ت"}],
+ "پیوندشان": [{ORTH: "پیوند", NORM: "پیوند"}, {ORTH: "شان", NORM: "شان"}],
+ "چاپش": [{ORTH: "چاپ", NORM: "چاپ"}, {ORTH: "ش", NORM: "ش"}],
+ "چت": [{ORTH: "چ", NORM: "چ"}, {ORTH: "ت", NORM: "ت"}],
+ "چته": [{ORTH: "چ", NORM: "چ"}, {ORTH: "ت", NORM: "ت"}, {ORTH: "ه", NORM: "ه"}],
+ "چرخهایش": [{ORTH: "چرخهای", NORM: "چرخهای"}, {ORTH: "ش", NORM: "ش"}],
+ "چشمم": [{ORTH: "چشم", NORM: "چشم"}, {ORTH: "م", NORM: "م"}],
+ "چشمهایش": [{ORTH: "چشمهای", NORM: "چشمهای"}, {ORTH: "ش", NORM: "ش"}],
+ "چشمهایشان": [{ORTH: "چشمهای", NORM: "چشمهای"}, {ORTH: "شان", NORM: "شان"}],
+ "چمنم": [{ORTH: "چمن", NORM: "چمن"}, {ORTH: "م", NORM: "م"}],
+ "چهرهاش": [{ORTH: "چهره", NORM: "چهره"}, {ORTH: "اش", NORM: "اش"}],
+ "چکارهاند": [{ORTH: "چکاره", NORM: "چکاره"}, {ORTH: "اند", NORM: "اند"}],
+ "چیزهاست": [{ORTH: "چیزها", NORM: "چیزها"}, {ORTH: "ست", NORM: "ست"}],
+ "چیزهایش": [{ORTH: "چیزهای", NORM: "چیزهای"}, {ORTH: "ش", NORM: "ش"}],
+ "چیزیست": [{ORTH: "چیزی", NORM: "چیزی"}, {ORTH: "ست", NORM: "ست"}],
+ "چیست": [{ORTH: "چی", NORM: "چی"}, {ORTH: "ست", NORM: "ست"}],
+ "کارش": [{ORTH: "کار", NORM: "کار"}, {ORTH: "ش", NORM: "ش"}],
+ "کارشان": [{ORTH: "کار", NORM: "کار"}, {ORTH: "شان", NORM: "شان"}],
+ "کارم": [{ORTH: "کار", NORM: "کار"}, {ORTH: "م", NORM: "م"}],
+ "کارند": [{ORTH: "کار", NORM: "کار"}, {ORTH: "ند", NORM: "ند"}],
+ "کارهایم": [{ORTH: "کارها", NORM: "کارها"}, {ORTH: "یم", NORM: "یم"}],
+ "کافیست": [{ORTH: "کافی", NORM: "کافی"}, {ORTH: "ست", NORM: "ست"}],
+ "کتابخانهاش": [{ORTH: "کتابخانه", NORM: "کتابخانه"}, {ORTH: "اش", NORM: "اش"}],
+ "کتابش": [{ORTH: "کتاب", NORM: "کتاب"}, {ORTH: "ش", NORM: "ش"}],
+ "کتابهاشان": [{ORTH: "کتابها", NORM: "کتابها"}, {ORTH: "شان", NORM: "شان"}],
+ "کجاست": [{ORTH: "کجا", NORM: "کجا"}, {ORTH: "ست", NORM: "ست"}],
+ "کدورتهایشان": [{ORTH: "کدورتهای", NORM: "کدورتهای"}, {ORTH: "شان", NORM: "شان"}],
+ "کردنش": [{ORTH: "کردن", NORM: "کردن"}, {ORTH: "ش", NORM: "ش"}],
+ "کرمخوردهاش": [
+ {ORTH: "کرمخورده", NORM: "کرمخورده"},
+ {ORTH: "اش", NORM: "اش"},
+ ],
+ "کشش": [{ORTH: "کش", NORM: "کش"}, {ORTH: "ش", NORM: "ش"}],
+ "کشورش": [{ORTH: "کشور", NORM: "کشور"}, {ORTH: "ش", NORM: "ش"}],
+ "کشورشان": [{ORTH: "کشور", NORM: "کشور"}, {ORTH: "شان", NORM: "شان"}],
+ "کشورمان": [{ORTH: "کشور", NORM: "کشور"}, {ORTH: "مان", NORM: "مان"}],
+ "کشورهاست": [{ORTH: "کشورها", NORM: "کشورها"}, {ORTH: "ست", NORM: "ست"}],
+ "کلیشههاست": [{ORTH: "کلیشهها", NORM: "کلیشهها"}, {ORTH: "ست", NORM: "ست"}],
+ "کمبودهاست": [{ORTH: "کمبودها", NORM: "کمبودها"}, {ORTH: "ست", NORM: "ست"}],
+ "کمتره": [{ORTH: "کمتر", NORM: "کمتر"}, {ORTH: "ه", NORM: "ه"}],
+ "کمکم": [{ORTH: "کمک", NORM: "کمک"}, {ORTH: "م", NORM: "م"}],
+ "کنارش": [{ORTH: "کنار", NORM: "کنار"}, {ORTH: "ش", NORM: "ش"}],
+ "کودکانشان": [{ORTH: "کودکان", NORM: "کودکان"}, {ORTH: "شان", NORM: "شان"}],
+ "کوچکش": [{ORTH: "کوچک", NORM: "کوچک"}, {ORTH: "ش", NORM: "ش"}],
+ "کیست": [{ORTH: "کی", NORM: "کی"}, {ORTH: "ست", NORM: "ست"}],
+ "کیفش": [{ORTH: "کیف", NORM: "کیف"}, {ORTH: "ش", NORM: "ش"}],
+ "گذشتهاند": [{ORTH: "گذشته", NORM: "گذشته"}, {ORTH: "اند", NORM: "اند"}],
+ "گرانقدرش": [{ORTH: "گرانقدر", NORM: "گرانقدر"}, {ORTH: "ش", NORM: "ش"}],
+ "گرانقدرشان": [{ORTH: "گرانقدر", NORM: "گرانقدر"}, {ORTH: "شان", NORM: "شان"}],
+ "گردنتان": [{ORTH: "گردن", NORM: "گردن"}, {ORTH: "تان", NORM: "تان"}],
+ "گردنش": [{ORTH: "گردن", NORM: "گردن"}, {ORTH: "ش", NORM: "ش"}],
+ "گرفتارند": [{ORTH: "گرفتار", NORM: "گرفتار"}, {ORTH: "ند", NORM: "ند"}],
+ "گرفتنت": [{ORTH: "گرفتن", NORM: "گرفتن"}, {ORTH: "ت", NORM: "ت"}],
+ "گروهند": [{ORTH: "گروه", NORM: "گروه"}, {ORTH: "ند", NORM: "ند"}],
+ "گروگانهایش": [{ORTH: "گروگانهای", NORM: "گروگانهای"}, {ORTH: "ش", NORM: "ش"}],
+ "گریمش": [{ORTH: "گریم", NORM: "گریم"}, {ORTH: "ش", NORM: "ش"}],
+ "گفتارمان": [{ORTH: "گفتار", NORM: "گفتار"}, {ORTH: "مان", NORM: "مان"}],
+ "گلهایش": [{ORTH: "گلهای", NORM: "گلهای"}, {ORTH: "ش", NORM: "ش"}],
+ "گلویش": [{ORTH: "گلوی", NORM: "گلوی"}, {ORTH: "ش", NORM: "ش"}],
+ "گناهت": [{ORTH: "گناه", NORM: "گناه"}, {ORTH: "ت", NORM: "ت"}],
+ "گوشش": [{ORTH: "گوش", NORM: "گوش"}, {ORTH: "ش", NORM: "ش"}],
+ "گوشم": [{ORTH: "گوش", NORM: "گوش"}, {ORTH: "م", NORM: "م"}],
+ "گولش": [{ORTH: "گول", NORM: "گول"}, {ORTH: "ش", NORM: "ش"}],
+ "یادتان": [{ORTH: "یاد", NORM: "یاد"}, {ORTH: "تان", NORM: "تان"}],
+ "یادم": [{ORTH: "یاد", NORM: "یاد"}, {ORTH: "م", NORM: "م"}],
+ "یادمان": [{ORTH: "یاد", NORM: "یاد"}, {ORTH: "مان", NORM: "مان"}],
+ "یارانش": [{ORTH: "یاران", NORM: "یاران"}, {ORTH: "ش", NORM: "ش"}],
}
-
-_exc.update(
- {
- "آبرویت": [
- {ORTH: "آبروی", LEMMA: "آبروی", NORM: "آبروی", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "آبنباتش": [
- {ORTH: "آبنبات", LEMMA: "آبنبات", NORM: "آبنبات", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "آثارش": [
- {ORTH: "آثار", LEMMA: "آثار", NORM: "آثار", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "آخرش": [
- {ORTH: "آخر", LEMMA: "آخر", NORM: "آخر", TAG: "ADV"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "آدمهاست": [
- {ORTH: "آدمها", LEMMA: "آدمها", NORM: "آدمها", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "آرزومندیم": [
- {ORTH: "آرزومند", LEMMA: "آرزومند", NORM: "آرزومند", TAG: "ADJ"},
- {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "VERB"},
- ],
- "آزادند": [
- {ORTH: "آزاد", LEMMA: "آزاد", NORM: "آزاد", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "آسیبپذیرند": [
- {ORTH: "آسیبپذیر", LEMMA: "آسیبپذیر", NORM: "آسیبپذیر", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "آفریدهاند": [
- {ORTH: "آفریده", LEMMA: "آفریده", NORM: "آفریده", TAG: "NOUN"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "آمدنش": [
- {ORTH: "آمدن", LEMMA: "آمدن", NORM: "آمدن", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "آمریکاست": [
- {ORTH: "آمریکا", LEMMA: "آمریکا", NORM: "آمریکا", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "آنجاست": [
- {ORTH: "آنجا", LEMMA: "آنجا", NORM: "آنجا", TAG: "ADV"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "آنست": [
- {ORTH: "آن", LEMMA: "آن", NORM: "آن", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "آنند": [
- {ORTH: "آن", LEMMA: "آن", NORM: "آن", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "آنهاست": [
- {ORTH: "آنها", LEMMA: "آنها", NORM: "آنها", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "آپاداناست": [
- {ORTH: "آپادانا", LEMMA: "آپادانا", NORM: "آپادانا", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "اجتماعیمان": [
- {ORTH: "اجتماعی", LEMMA: "اجتماعی", NORM: "اجتماعی", TAG: "ADJ"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "اجدادت": [
- {ORTH: "اجداد", LEMMA: "اجداد", NORM: "اجداد", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "اجدادش": [
- {ORTH: "اجداد", LEMMA: "اجداد", NORM: "اجداد", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "اجدادیشان": [
- {ORTH: "اجدادی", LEMMA: "اجدادی", NORM: "اجدادی", TAG: "ADJ"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "اجراست": [
- {ORTH: "اجرا", LEMMA: "اجرا", NORM: "اجرا", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "اختیارش": [
- {ORTH: "اختیار", LEMMA: "اختیار", NORM: "اختیار", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "اخلاقشان": [
- {ORTH: "اخلاق", LEMMA: "اخلاق", NORM: "اخلاق", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "ادعایمان": [
- {ORTH: "ادعای", LEMMA: "ادعای", NORM: "ادعای", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "اذیتش": [
- {ORTH: "اذیت", LEMMA: "اذیت", NORM: "اذیت", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "ارادهاش": [
- {ORTH: "اراده", LEMMA: "اراده", NORM: "اراده", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "ارتباطش": [
- {ORTH: "ارتباط", LEMMA: "ارتباط", NORM: "ارتباط", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "ارتباطمان": [
- {ORTH: "ارتباط", LEMMA: "ارتباط", NORM: "ارتباط", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "ارزشهاست": [
- {ORTH: "ارزشها", LEMMA: "ارزشها", NORM: "ارزشها", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "ارزیاش": [
- {ORTH: "ارزی", LEMMA: "ارزی", NORM: "ارزی", TAG: "ADJ"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "ارهاش": [
- {ORTH: "اره", LEMMA: "اره", NORM: "اره", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "ازش": [
- {ORTH: "از", LEMMA: "از", NORM: "از", TAG: "ADP"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "ازین": [
- {ORTH: "از", LEMMA: "از", NORM: "از", TAG: "ADP"},
- {ORTH: "ین", LEMMA: "ین", NORM: "ین", TAG: "NOUN"},
- ],
- "ازینهاست": [
- {ORTH: "از", LEMMA: "از", NORM: "از", TAG: "ADP"},
- {ORTH: "ینها", LEMMA: "ینها", NORM: "ینها", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "استخوانند": [
- {ORTH: "استخوان", LEMMA: "استخوان", NORM: "استخوان", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "اسلامند": [
- {ORTH: "اسلام", LEMMA: "اسلام", NORM: "اسلام", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "اسلامیاند": [
- {ORTH: "اسلامی", LEMMA: "اسلامی", NORM: "اسلامی", TAG: "ADJ"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "اسلحههایشان": [
- {ORTH: "اسلحههای", LEMMA: "اسلحههای", NORM: "اسلحههای", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "اسمت": [
- {ORTH: "اسم", LEMMA: "اسم", NORM: "اسم", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "اسمش": [
- {ORTH: "اسم", LEMMA: "اسم", NORM: "اسم", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "اشتباهند": [
- {ORTH: "اشتباه", LEMMA: "اشتباه", NORM: "اشتباه", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "اصلش": [
- {ORTH: "اصل", LEMMA: "اصل", NORM: "اصل", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "اطاقش": [
- {ORTH: "اطاق", LEMMA: "اطاق", NORM: "اطاق", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "اعتقادند": [
- {ORTH: "اعتقاد", LEMMA: "اعتقاد", NORM: "اعتقاد", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "اعلایش": [
- {ORTH: "اعلای", LEMMA: "اعلای", NORM: "اعلای", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "افتراست": [
- {ORTH: "افترا", LEMMA: "افترا", NORM: "افترا", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "افطارت": [
- {ORTH: "افطار", LEMMA: "افطار", NORM: "افطار", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "اقوامش": [
- {ORTH: "اقوام", LEMMA: "اقوام", NORM: "اقوام", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "امروزیش": [
- {ORTH: "امروزی", LEMMA: "امروزی", NORM: "امروزی", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "اموالش": [
- {ORTH: "اموال", LEMMA: "اموال", NORM: "اموال", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "امیدوارند": [
- {ORTH: "امیدوار", LEMMA: "امیدوار", NORM: "امیدوار", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "امیدواریم": [
- {ORTH: "امیدوار", LEMMA: "امیدوار", NORM: "امیدوار", TAG: "ADJ"},
- {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "VERB"},
- ],
- "انتخابهایم": [
- {ORTH: "انتخابها", LEMMA: "انتخابها", NORM: "انتخابها", TAG: "NOUN"},
- {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "NOUN"},
- ],
- "انتظارم": [
- {ORTH: "انتظار", LEMMA: "انتظار", NORM: "انتظار", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "انجمنم": [
- {ORTH: "انجمن", LEMMA: "انجمن", NORM: "انجمن", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "اندرش": [
- {ORTH: "اندر", LEMMA: "اندر", NORM: "اندر", TAG: "ADP"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "انشایش": [
- {ORTH: "انشای", LEMMA: "انشای", NORM: "انشای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "انگشتشان": [
- {ORTH: "انگشت", LEMMA: "انگشت", NORM: "انگشت", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "انگشتهایش": [
- {ORTH: "انگشتهای", LEMMA: "انگشتهای", NORM: "انگشتهای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "اهمیتشان": [
- {ORTH: "اهمیت", LEMMA: "اهمیت", NORM: "اهمیت", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "اهمیتند": [
- {ORTH: "اهمیت", LEMMA: "اهمیت", NORM: "اهمیت", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "اوایلش": [
- {ORTH: "اوایل", LEMMA: "اوایل", NORM: "اوایل", TAG: "ADV"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "اوست": [
- {ORTH: "او", LEMMA: "او", NORM: "او", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "اولش": [
- {ORTH: "اول", LEMMA: "اول", NORM: "اول", TAG: "ADV"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "اولشان": [
- {ORTH: "اول", LEMMA: "اول", NORM: "اول", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "اولم": [
- {ORTH: "اول", LEMMA: "اول", NORM: "اول", TAG: "ADJ"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "اکثرشان": [
- {ORTH: "اکثر", LEMMA: "اکثر", NORM: "اکثر", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "ایتالیاست": [
- {ORTH: "ایتالیا", LEMMA: "ایتالیا", NORM: "ایتالیا", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "ایرانیاش": [
- {ORTH: "ایرانی", LEMMA: "ایرانی", NORM: "ایرانی", TAG: "ADJ"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "اینجاست": [
- {ORTH: "اینجا", LEMMA: "اینجا", NORM: "اینجا", TAG: "ADV"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "اینهاست": [
- {ORTH: "اینها", LEMMA: "اینها", NORM: "اینها", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "بابات": [
- {ORTH: "بابا", LEMMA: "بابا", NORM: "بابا", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "بارش": [
- {ORTH: "بار", LEMMA: "بار", NORM: "بار", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "بازیگرانش": [
- {ORTH: "بازیگران", LEMMA: "بازیگران", NORM: "بازیگران", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "بازیگرمان": [
- {ORTH: "بازیگر", LEMMA: "بازیگر", NORM: "بازیگر", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "بازیگرهایم": [
- {ORTH: "بازیگرها", LEMMA: "بازیگرها", NORM: "بازیگرها", TAG: "NOUN"},
- {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "NOUN"},
- ],
- "بازیاش": [
- {ORTH: "بازی", LEMMA: "بازی", NORM: "بازی", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "بالاست": [
- {ORTH: "بالا", LEMMA: "بالا", NORM: "بالا", TAG: "ADV"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "باورند": [
- {ORTH: "باور", LEMMA: "باور", NORM: "باور", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "بجاست": [
- {ORTH: "بجا", LEMMA: "بجا", NORM: "بجا", TAG: "ADJ"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "بدان": [
- {ORTH: "ب", LEMMA: "ب", NORM: "ب", TAG: "ADP"},
- {ORTH: "دان", LEMMA: "دان", NORM: "دان", TAG: "NOUN"},
- ],
- "بدش": [
- {ORTH: "بد", LEMMA: "بد", NORM: "بد", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "بدشان": [
- {ORTH: "بد", LEMMA: "بد", NORM: "بد", TAG: "ADJ"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "بدنم": [
- {ORTH: "بدن", LEMMA: "بدن", NORM: "بدن", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "بدهیات": [
- {ORTH: "بدهی", LEMMA: "بدهی", NORM: "بدهی", TAG: "NOUN"},
- {ORTH: "ات", LEMMA: "ات", NORM: "ات", TAG: "NOUN"},
- ],
- "بدین": [
- {ORTH: "ب", LEMMA: "ب", NORM: "ب", TAG: "ADP"},
- {ORTH: "دین", LEMMA: "دین", NORM: "دین", TAG: "NOUN"},
- ],
- "برابرش": [
- {ORTH: "برابر", LEMMA: "برابر", NORM: "برابر", TAG: "ADP"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "برادرت": [
- {ORTH: "برادر", LEMMA: "برادر", NORM: "برادر", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "برادرش": [
- {ORTH: "برادر", LEMMA: "برادر", NORM: "برادر", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "برایت": [
- {ORTH: "برای", LEMMA: "برای", NORM: "برای", TAG: "ADP"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "برایتان": [
- {ORTH: "برای", LEMMA: "برای", NORM: "برای", TAG: "ADP"},
- {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"},
- ],
- "برایش": [
- {ORTH: "برای", LEMMA: "برای", NORM: "برای", TAG: "ADP"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "برایشان": [
- {ORTH: "برای", LEMMA: "برای", NORM: "برای", TAG: "ADP"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "برایم": [
- {ORTH: "برای", LEMMA: "برای", NORM: "برای", TAG: "ADP"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "برایمان": [
- {ORTH: "برای", LEMMA: "برای", NORM: "برای", TAG: "ADP"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "برخوردارند": [
- {ORTH: "برخوردار", LEMMA: "برخوردار", NORM: "برخوردار", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "برنامهسازهاست": [
- {
- ORTH: "برنامهسازها",
- LEMMA: "برنامهسازها",
- NORM: "برنامهسازها",
- TAG: "NOUN",
- },
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "برهمش": [
- {ORTH: "برهم", LEMMA: "برهم", NORM: "برهم", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "برهنهاش": [
- {ORTH: "برهنه", LEMMA: "برهنه", NORM: "برهنه", TAG: "ADJ"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "برگهایش": [
- {ORTH: "برگها", LEMMA: "برگها", NORM: "برگها", TAG: "NOUN"},
- {ORTH: "یش", LEMMA: "یش", NORM: "یش", TAG: "NOUN"},
- ],
- "برین": [
- {ORTH: "بر", LEMMA: "بر", NORM: "بر", TAG: "ADP"},
- {ORTH: "ین", LEMMA: "ین", NORM: "ین", TAG: "NOUN"},
- ],
- "بزرگش": [
- {ORTH: "بزرگ", LEMMA: "بزرگ", NORM: "بزرگ", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "بزرگتری": [
- {ORTH: "بزرگتر", LEMMA: "بزرگتر", NORM: "بزرگتر", TAG: "ADJ"},
- {ORTH: "ی", LEMMA: "ی", NORM: "ی", TAG: "VERB"},
- ],
- "بساطش": [
- {ORTH: "بساط", LEMMA: "بساط", NORM: "بساط", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "بعدش": [
- {ORTH: "بعد", LEMMA: "بعد", NORM: "بعد", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "بعضیهایشان": [
- {ORTH: "بعضیهای", LEMMA: "بعضیهای", NORM: "بعضیهای", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "بعضیشان": [
- {ORTH: "بعضی", LEMMA: "بعضی", NORM: "بعضی", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "بقیهاش": [
- {ORTH: "بقیه", LEMMA: "بقیه", NORM: "بقیه", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "بلندش": [
- {ORTH: "بلند", LEMMA: "بلند", NORM: "بلند", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "بناگوشش": [
- {ORTH: "بناگوش", LEMMA: "بناگوش", NORM: "بناگوش", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "بنظرم": [
- {ORTH: "ب", LEMMA: "ب", NORM: "ب", TAG: "ADP"},
- {ORTH: "نظر", LEMMA: "نظر", NORM: "نظر", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "بهت": [
- {ORTH: "به", LEMMA: "به", NORM: "به", TAG: "ADP"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "بهترش": [
- {ORTH: "بهتر", LEMMA: "بهتر", NORM: "بهتر", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "بهترم": [
- {ORTH: "بهتر", LEMMA: "بهتر", NORM: "بهتر", TAG: "ADJ"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "VERB"},
- ],
- "بهتری": [
- {ORTH: "بهتر", LEMMA: "بهتر", NORM: "بهتر", TAG: "ADJ"},
- {ORTH: "ی", LEMMA: "ی", NORM: "ی", TAG: "VERB"},
- ],
- "بهش": [
- {ORTH: "به", LEMMA: "به", NORM: "به", TAG: "ADP"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "بهشان": [
- {ORTH: "به", LEMMA: "به", NORM: "به", TAG: "ADP"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "بودمش": [
- {ORTH: "بودم", LEMMA: "بودم", NORM: "بودم", TAG: "VERB"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "بودنش": [
- {ORTH: "بودن", LEMMA: "بودن", NORM: "بودن", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "بودنشان": [
- {ORTH: "بودن", LEMMA: "بودن", NORM: "بودن", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "بوستانش": [
- {ORTH: "بوستان", LEMMA: "بوستان", NORM: "بوستان", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "بویش": [
- {ORTH: "بو", LEMMA: "بو", NORM: "بو", TAG: "NOUN"},
- {ORTH: "یش", LEMMA: "یش", NORM: "یش", TAG: "NOUN"},
- ],
- "بچهاش": [
- {ORTH: "بچه", LEMMA: "بچه", NORM: "بچه", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "بچهم": [
- {ORTH: "بچه", LEMMA: "بچه", NORM: "بچه", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "بچههایش": [
- {ORTH: "بچههای", LEMMA: "بچههای", NORM: "بچههای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "بیانیهشان": [
- {ORTH: "بیانیه", LEMMA: "بیانیه", NORM: "بیانیه", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "بیدارم": [
- {ORTH: "بیدار", LEMMA: "بیدار", NORM: "بیدار", TAG: "ADJ"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "بیناتری": [
- {ORTH: "بیناتر", LEMMA: "بیناتر", NORM: "بیناتر", TAG: "ADJ"},
- {ORTH: "ی", LEMMA: "ی", NORM: "ی", TAG: "VERB"},
- ],
- "بیاطلاعند": [
- {ORTH: "بیاطلاع", LEMMA: "بیاطلاع", NORM: "بیاطلاع", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "بیاطلاعید": [
- {ORTH: "بیاطلاع", LEMMA: "بیاطلاع", NORM: "بیاطلاع", TAG: "ADJ"},
- {ORTH: "ید", LEMMA: "ید", NORM: "ید", TAG: "VERB"},
- ],
- "بیبهرهاند": [
- {ORTH: "بیبهره", LEMMA: "بیبهره", NORM: "بیبهره", TAG: "ADJ"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "بیتفاوتند": [
- {ORTH: "بیتفاوت", LEMMA: "بیتفاوت", NORM: "بیتفاوت", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "بیحسابش": [
- {ORTH: "بیحساب", LEMMA: "بیحساب", NORM: "بیحساب", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "بینیش": [
- {ORTH: "بینی", LEMMA: "بینی", NORM: "بینی", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "تجربههایم": [
- {ORTH: "تجربهها", LEMMA: "تجربهها", NORM: "تجربهها", TAG: "NOUN"},
- {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "NOUN"},
- ],
- "تحریمهاست": [
- {ORTH: "تحریمها", LEMMA: "تحریمها", NORM: "تحریمها", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "تحولند": [
- {ORTH: "تحول", LEMMA: "تحول", NORM: "تحول", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "تخیلیاش": [
- {ORTH: "تخیلی", LEMMA: "تخیلی", NORM: "تخیلی", TAG: "ADJ"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "ترا": [
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- {ORTH: "را", LEMMA: "را", NORM: "را", TAG: "PART"},
- ],
- "ترسشان": [
- {ORTH: "ترس", LEMMA: "ترس", NORM: "ترس", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "ترکش": [
- {ORTH: "ترک", LEMMA: "ترک", NORM: "ترک", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "تشنهت": [
- {ORTH: "تشنه", LEMMA: "تشنه", NORM: "تشنه", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "تشکیلاتیاش": [
- {ORTH: "تشکیلاتی", LEMMA: "تشکیلاتی", NORM: "تشکیلاتی", TAG: "ADJ"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "تعلقش": [
- {ORTH: "تعلق", LEMMA: "تعلق", NORM: "تعلق", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "تلاششان": [
- {ORTH: "تلاش", LEMMA: "تلاش", NORM: "تلاش", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "تلاشمان": [
- {ORTH: "تلاش", LEMMA: "تلاش", NORM: "تلاش", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "تماشاگرش": [
- {ORTH: "تماشاگر", LEMMA: "تماشاگر", NORM: "تماشاگر", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "تمامشان": [
- {ORTH: "تمام", LEMMA: "تمام", NORM: "تمام", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "تنش": [
- {ORTH: "تن", LEMMA: "تن", NORM: "تن", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "تنمان": [
- {ORTH: "تن", LEMMA: "تن", NORM: "تن", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "تنهاییاش": [
- {ORTH: "تنهایی", LEMMA: "تنهایی", NORM: "تنهایی", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "تواناییاش": [
- {ORTH: "توانایی", LEMMA: "توانایی", NORM: "توانایی", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "توجهش": [
- {ORTH: "توجه", LEMMA: "توجه", NORM: "توجه", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "توست": [
- {ORTH: "تو", LEMMA: "تو", NORM: "تو", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "توصیهاش": [
- {ORTH: "توصیه", LEMMA: "توصیه", NORM: "توصیه", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "تیغهاش": [
- {ORTH: "تیغه", LEMMA: "تیغه", NORM: "تیغه", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "جاست": [
- {ORTH: "جا", LEMMA: "جا", NORM: "جا", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "جامعهاند": [
- {ORTH: "جامعه", LEMMA: "جامعه", NORM: "جامعه", TAG: "NOUN"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "جانم": [
- {ORTH: "جان", LEMMA: "جان", NORM: "جان", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "جایش": [
- {ORTH: "جای", LEMMA: "جای", NORM: "جای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "جایشان": [
- {ORTH: "جای", LEMMA: "جای", NORM: "جای", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "جدیدش": [
- {ORTH: "جدید", LEMMA: "جدید", NORM: "جدید", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "جرمزاست": [
- {ORTH: "جرمزا", LEMMA: "جرمزا", NORM: "جرمزا", TAG: "ADJ"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "جلوست": [
- {ORTH: "جلو", LEMMA: "جلو", NORM: "جلو", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "جلویش": [
- {ORTH: "جلوی", LEMMA: "جلوی", NORM: "جلوی", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "جمهوریست": [
- {ORTH: "جمهوری", LEMMA: "جمهوری", NORM: "جمهوری", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "جنسش": [
- {ORTH: "جنس", LEMMA: "جنس", NORM: "جنس", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "جنساند": [
- {ORTH: "جنس", LEMMA: "جنس", NORM: "جنس", TAG: "NOUN"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "جوانانش": [
- {ORTH: "جوانان", LEMMA: "جوانان", NORM: "جوانان", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "جویش": [
- {ORTH: "جوی", LEMMA: "جوی", NORM: "جوی", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "جگرش": [
- {ORTH: "جگر", LEMMA: "جگر", NORM: "جگر", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "حاضرم": [
- {ORTH: "حاضر", LEMMA: "حاضر", NORM: "حاضر", TAG: "ADJ"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "VERB"},
- ],
- "حالتهایشان": [
- {ORTH: "حالتهای", LEMMA: "حالتهای", NORM: "حالتهای", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "حالیست": [
- {ORTH: "حالی", LEMMA: "حالی", NORM: "حالی", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "حالیمان": [
- {ORTH: "حالی", LEMMA: "حالی", NORM: "حالی", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "حاکیست": [
- {ORTH: "حاکی", LEMMA: "حاکی", NORM: "حاکی", TAG: "ADJ"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "حرامزادگیاش": [
- {ORTH: "حرامزادگی", LEMMA: "حرامزادگی", NORM: "حرامزادگی", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "حرفتان": [
- {ORTH: "حرف", LEMMA: "حرف", NORM: "حرف", TAG: "NOUN"},
- {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"},
- ],
- "حرفش": [
- {ORTH: "حرف", LEMMA: "حرف", NORM: "حرف", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "حرفشان": [
- {ORTH: "حرف", LEMMA: "حرف", NORM: "حرف", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "حرفم": [
- {ORTH: "حرف", LEMMA: "حرف", NORM: "حرف", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "حرفهایشان": [
- {ORTH: "حرفهای", LEMMA: "حرفهای", NORM: "حرفهای", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "حرکتمان": [
- {ORTH: "حرکت", LEMMA: "حرکت", NORM: "حرکت", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "حریفانشان": [
- {ORTH: "حریفان", LEMMA: "حریفان", NORM: "حریفان", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "حضورشان": [
- {ORTH: "حضور", LEMMA: "حضور", NORM: "حضور", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "حمایتش": [
- {ORTH: "حمایت", LEMMA: "حمایت", NORM: "حمایت", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "حواسش": [
- {ORTH: "حواس", LEMMA: "حواس", NORM: "حواس", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "حواسشان": [
- {ORTH: "حواس", LEMMA: "حواس", NORM: "حواس", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "حوصلهمان": [
- {ORTH: "حوصله", LEMMA: "حوصله", NORM: "حوصله", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "حکومتش": [
- {ORTH: "حکومت", LEMMA: "حکومت", NORM: "حکومت", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "حکومتشان": [
- {ORTH: "حکومت", LEMMA: "حکومت", NORM: "حکومت", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "حیفم": [
- {ORTH: "حیف", LEMMA: "حیف", NORM: "حیف", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "خاندانش": [
- {ORTH: "خاندان", LEMMA: "خاندان", NORM: "خاندان", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "خانهاش": [
- {ORTH: "خانه", LEMMA: "خانه", NORM: "خانه", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "خانهشان": [
- {ORTH: "خانه", LEMMA: "خانه", NORM: "خانه", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "خانهمان": [
- {ORTH: "خانه", LEMMA: "خانه", NORM: "خانه", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "خانههایشان": [
- {ORTH: "خانههای", LEMMA: "خانههای", NORM: "خانههای", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "خانوادهات": [
- {ORTH: "خانواده", LEMMA: "خانواده", NORM: "خانواده", TAG: "NOUN"},
- {ORTH: "ات", LEMMA: "ات", NORM: "ات", TAG: "NOUN"},
- ],
- "خانوادهاش": [
- {ORTH: "خانواده", LEMMA: "خانواده", NORM: "خانواده", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "خانوادهام": [
- {ORTH: "خانواده", LEMMA: "خانواده", NORM: "خانواده", TAG: "NOUN"},
- {ORTH: "ام", LEMMA: "ام", NORM: "ام", TAG: "NOUN"},
- ],
- "خانوادهشان": [
- {ORTH: "خانواده", LEMMA: "خانواده", NORM: "خانواده", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "خداست": [
- {ORTH: "خدا", LEMMA: "خدا", NORM: "خدا", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "خدایش": [
- {ORTH: "خدا", LEMMA: "خدا", NORM: "خدا", TAG: "NOUN"},
- {ORTH: "یش", LEMMA: "یش", NORM: "یش", TAG: "NOUN"},
- ],
- "خدایشان": [
- {ORTH: "خدای", LEMMA: "خدای", NORM: "خدای", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "خردسالش": [
- {ORTH: "خردسال", LEMMA: "خردسال", NORM: "خردسال", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "خروپفشان": [
- {ORTH: "خروپف", LEMMA: "خروپف", NORM: "خروپف", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "خستهای": [
- {ORTH: "خسته", LEMMA: "خسته", NORM: "خسته", TAG: "ADJ"},
- {ORTH: "ای", LEMMA: "ای", NORM: "ای", TAG: "VERB"},
- ],
- "خطت": [
- {ORTH: "خط", LEMMA: "خط", NORM: "خط", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "خوابمان": [
- {ORTH: "خواب", LEMMA: "خواب", NORM: "خواب", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "خواندنش": [
- {ORTH: "خواندن", LEMMA: "خواندن", NORM: "خواندن", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "خواهرش": [
- {ORTH: "خواهر", LEMMA: "خواهر", NORM: "خواهر", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "خوبش": [
- {ORTH: "خوب", LEMMA: "خوب", NORM: "خوب", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "خودت": [
- {ORTH: "خود", LEMMA: "خود", NORM: "خود", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "خودتان": [
- {ORTH: "خود", LEMMA: "خود", NORM: "خود", TAG: "NOUN"},
- {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"},
- ],
- "خودش": [
- {ORTH: "خود", LEMMA: "خود", NORM: "خود", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "خودشان": [
- {ORTH: "خود", LEMMA: "خود", NORM: "خود", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "خودمان": [
- {ORTH: "خود", LEMMA: "خود", NORM: "خود", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "خوردمان": [
- {ORTH: "خورد", LEMMA: "خورد", NORM: "خورد", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "خوردنشان": [
- {ORTH: "خوردن", LEMMA: "خوردن", NORM: "خوردن", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "خوشش": [
- {ORTH: "خوش", LEMMA: "خوش", NORM: "خوش", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "خوشوقتم": [
- {ORTH: "خوشوقت", LEMMA: "خوشوقت", NORM: "خوشوقت", TAG: "ADJ"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "VERB"},
- ],
- "خونشان": [
- {ORTH: "خون", LEMMA: "خون", NORM: "خون", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "خویش": [
- {ORTH: "خوی", LEMMA: "خوی", NORM: "خوی", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "خویشتنم": [
- {ORTH: "خویشتن", LEMMA: "خویشتن", NORM: "خویشتن", TAG: "VERB"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "خیالش": [
- {ORTH: "خیال", LEMMA: "خیال", NORM: "خیال", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "خیسش": [
- {ORTH: "خیس", LEMMA: "خیس", NORM: "خیس", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "داراست": [
- {ORTH: "دارا", LEMMA: "دارا", NORM: "دارا", TAG: "ADJ"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "داستانهایش": [
- {ORTH: "داستانهای", LEMMA: "داستانهای", NORM: "داستانهای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دخترمان": [
- {ORTH: "دختر", LEMMA: "دختر", NORM: "دختر", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "دخیلند": [
- {ORTH: "دخیل", LEMMA: "دخیل", NORM: "دخیل", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "دربارهات": [
- {ORTH: "درباره", LEMMA: "درباره", NORM: "درباره", TAG: "ADP"},
- {ORTH: "ات", LEMMA: "ات", NORM: "ات", TAG: "NOUN"},
- ],
- "دربارهاش": [
- {ORTH: "درباره", LEMMA: "درباره", NORM: "درباره", TAG: "ADP"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "دردش": [
- {ORTH: "درد", LEMMA: "درد", NORM: "درد", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دردشان": [
- {ORTH: "درد", LEMMA: "درد", NORM: "درد", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "درسته": [
- {ORTH: "درست", LEMMA: "درست", NORM: "درست", TAG: "ADJ"},
- {ORTH: "ه", LEMMA: "ه", NORM: "ه", TAG: "VERB"},
- ],
- "درش": [
- {ORTH: "در", LEMMA: "در", NORM: "در", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "درونشان": [
- {ORTH: "درون", LEMMA: "درون", NORM: "درون", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "درین": [
- {ORTH: "در", LEMMA: "در", NORM: "در", TAG: "ADP"},
- {ORTH: "ین", LEMMA: "ین", NORM: "ین", TAG: "NOUN"},
- ],
- "دریچههایش": [
- {ORTH: "دریچههای", LEMMA: "دریچههای", NORM: "دریچههای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دزدانش": [
- {ORTH: "دزدان", LEMMA: "دزدان", NORM: "دزدان", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دستت": [
- {ORTH: "دست", LEMMA: "دست", NORM: "دست", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "دستش": [
- {ORTH: "دست", LEMMA: "دست", NORM: "دست", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دستمان": [
- {ORTH: "دست", LEMMA: "دست", NORM: "دست", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "دستهایشان": [
- {ORTH: "دستهای", LEMMA: "دستهای", NORM: "دستهای", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "دستیافتنیست": [
- {
- ORTH: "دستیافتنی",
- LEMMA: "دستیافتنی",
- NORM: "دستیافتنی",
- TAG: "ADJ",
- },
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "دشمنند": [
- {ORTH: "دشمن", LEMMA: "دشمن", NORM: "دشمن", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "دشمنیشان": [
- {ORTH: "دشمنی", LEMMA: "دشمنی", NORM: "دشمنی", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "دشمنیم": [
- {ORTH: "دشمن", LEMMA: "دشمن", NORM: "دشمن", TAG: "NOUN"},
- {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "VERB"},
- ],
- "دفترش": [
- {ORTH: "دفتر", LEMMA: "دفتر", NORM: "دفتر", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دفنشان": [
- {ORTH: "دفن", LEMMA: "دفن", NORM: "دفن", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "دلت": [
- {ORTH: "دل", LEMMA: "دل", NORM: "دل", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "دلش": [
- {ORTH: "دل", LEMMA: "دل", NORM: "دل", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دلشان": [
- {ORTH: "دل", LEMMA: "دل", NORM: "دل", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "دلم": [
- {ORTH: "دل", LEMMA: "دل", NORM: "دل", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "دلیلش": [
- {ORTH: "دلیل", LEMMA: "دلیل", NORM: "دلیل", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دنبالش": [
- {ORTH: "دنبال", LEMMA: "دنبال", NORM: "دنبال", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دنبالهاش": [
- {ORTH: "دنباله", LEMMA: "دنباله", NORM: "دنباله", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "دهاتیهایش": [
- {ORTH: "دهاتیهای", LEMMA: "دهاتیهای", NORM: "دهاتیهای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دهانت": [
- {ORTH: "دهان", LEMMA: "دهان", NORM: "دهان", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "دهنش": [
- {ORTH: "دهن", LEMMA: "دهن", NORM: "دهن", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دورش": [
- {ORTH: "دور", LEMMA: "دور", NORM: "دور", TAG: "ADV"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دوروبریهاشان": [
- {ORTH: "دوروبریها", LEMMA: "دوروبریها", NORM: "دوروبریها", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "دوستانش": [
- {ORTH: "دوستان", LEMMA: "دوستان", NORM: "دوستان", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دوستانشان": [
- {ORTH: "دوستان", LEMMA: "دوستان", NORM: "دوستان", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "دوستت": [
- {ORTH: "دوست", LEMMA: "دوست", NORM: "دوست", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "دوستش": [
- {ORTH: "دوست", LEMMA: "دوست", NORM: "دوست", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دومش": [
- {ORTH: "دوم", LEMMA: "دوم", NORM: "دوم", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دویدنش": [
- {ORTH: "دویدن", LEMMA: "دویدن", NORM: "دویدن", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دکورهایمان": [
- {ORTH: "دکورهای", LEMMA: "دکورهای", NORM: "دکورهای", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "دیدگاهش": [
- {ORTH: "دیدگاه", LEMMA: "دیدگاه", NORM: "دیدگاه", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دیرت": [
- {ORTH: "دیر", LEMMA: "دیر", NORM: "دیر", TAG: "ADV"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "دیرم": [
- {ORTH: "دیر", LEMMA: "دیر", NORM: "دیر", TAG: "ADV"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "دینت": [
- {ORTH: "دین", LEMMA: "دین", NORM: "دین", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "دینش": [
- {ORTH: "دین", LEMMA: "دین", NORM: "دین", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دینشان": [
- {ORTH: "دین", LEMMA: "دین", NORM: "دین", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "دیوارههایش": [
- {ORTH: "دیوارههای", LEMMA: "دیوارههای", NORM: "دیوارههای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "دیوانهای": [
- {ORTH: "دیوانه", LEMMA: "دیوانه", NORM: "دیوانه", TAG: "ADJ"},
- {ORTH: "ای", LEMMA: "ای", NORM: "ای", TAG: "VERB"},
- ],
- "دیوی": [
- {ORTH: "دیو", LEMMA: "دیو", NORM: "دیو", TAG: "NOUN"},
- {ORTH: "ی", LEMMA: "ی", NORM: "ی", TAG: "VERB"},
- ],
- "دیگرم": [
- {ORTH: "دیگر", LEMMA: "دیگر", NORM: "دیگر", TAG: "ADJ"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "دیگرمان": [
- {ORTH: "دیگر", LEMMA: "دیگر", NORM: "دیگر", TAG: "ADJ"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "ذهنش": [
- {ORTH: "ذهن", LEMMA: "ذهن", NORM: "ذهن", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "ذهنشان": [
- {ORTH: "ذهن", LEMMA: "ذهن", NORM: "ذهن", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "ذهنم": [
- {ORTH: "ذهن", LEMMA: "ذهن", NORM: "ذهن", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "رئوسش": [
- {ORTH: "رئوس", LEMMA: "رئوس", NORM: "رئوس", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "راهشان": [
- {ORTH: "راه", LEMMA: "راه", NORM: "راه", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "راهگشاست": [
- {ORTH: "راهگشا", LEMMA: "راهگشا", NORM: "راهگشا", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "رایانههایشان": [
- {ORTH: "رایانههای", LEMMA: "رایانههای", NORM: "رایانههای", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "رعایتشان": [
- {ORTH: "رعایت", LEMMA: "رعایت", NORM: "رعایت", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "رفتارش": [
- {ORTH: "رفتار", LEMMA: "رفتار", NORM: "رفتار", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "رفتارشان": [
- {ORTH: "رفتار", LEMMA: "رفتار", NORM: "رفتار", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "رفتارمان": [
- {ORTH: "رفتار", LEMMA: "رفتار", NORM: "رفتار", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "رفتارهاست": [
- {ORTH: "رفتارها", LEMMA: "رفتارها", NORM: "رفتارها", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "رفتارهایشان": [
- {ORTH: "رفتارهای", LEMMA: "رفتارهای", NORM: "رفتارهای", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "رفقایم": [
- {ORTH: "رفقا", LEMMA: "رفقا", NORM: "رفقا", TAG: "NOUN"},
- {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "NOUN"},
- ],
- "رقیقترش": [
- {ORTH: "رقیقتر", LEMMA: "رقیقتر", NORM: "رقیقتر", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "رنجند": [
- {ORTH: "رنج", LEMMA: "رنج", NORM: "رنج", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "رهگشاست": [
- {ORTH: "رهگشا", LEMMA: "رهگشا", NORM: "رهگشا", TAG: "ADJ"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "رواست": [
- {ORTH: "روا", LEMMA: "روا", NORM: "روا", TAG: "ADJ"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "روبروست": [
- {ORTH: "روبرو", LEMMA: "روبرو", NORM: "روبرو", TAG: "ADJ"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "روحیاش": [
- {ORTH: "روحی", LEMMA: "روحی", NORM: "روحی", TAG: "ADJ"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "روزنامهاش": [
- {ORTH: "روزنامه", LEMMA: "روزنامه", NORM: "روزنامه", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "روزهست": [
- {ORTH: "روزه", LEMMA: "روزه", NORM: "روزه", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "روسریاش": [
- {ORTH: "روسری", LEMMA: "روسری", NORM: "روسری", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "روشتان": [
- {ORTH: "روش", LEMMA: "روش", NORM: "روش", TAG: "NOUN"},
- {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"},
- ],
- "رویش": [
- {ORTH: "روی", LEMMA: "روی", NORM: "روی", TAG: "ADP"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "زبانش": [
- {ORTH: "زبان", LEMMA: "زبان", NORM: "زبان", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "زحماتشان": [
- {ORTH: "زحمات", LEMMA: "زحمات", NORM: "زحمات", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "زدنهایشان": [
- {ORTH: "زدنهای", LEMMA: "زدنهای", NORM: "زدنهای", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "زرنگشان": [
- {ORTH: "زرنگ", LEMMA: "زرنگ", NORM: "زرنگ", TAG: "ADJ"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "زشتش": [
- {ORTH: "زشت", LEMMA: "زشت", NORM: "زشت", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "زشتکارانند": [
- {ORTH: "زشتکاران", LEMMA: "زشتکاران", NORM: "زشتکاران", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "زلفش": [
- {ORTH: "زلف", LEMMA: "زلف", NORM: "زلف", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "زمن": [
- {ORTH: "ز", LEMMA: "ز", NORM: "ز", TAG: "ADP"},
- {ORTH: "من", LEMMA: "من", NORM: "من", TAG: "NOUN"},
- ],
- "زنبوریاش": [
- {ORTH: "زنبوری", LEMMA: "زنبوری", NORM: "زنبوری", TAG: "ADJ"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "زندانم": [
- {ORTH: "زندان", LEMMA: "زندان", NORM: "زندان", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "زندهام": [
- {ORTH: "زنده", LEMMA: "زنده", NORM: "زنده", TAG: "ADJ"},
- {ORTH: "ام", LEMMA: "ام", NORM: "ام", TAG: "VERB"},
- ],
- "زندگانیاش": [
- {ORTH: "زندگانی", LEMMA: "زندگانی", NORM: "زندگانی", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "زندگیاش": [
- {ORTH: "زندگی", LEMMA: "زندگی", NORM: "زندگی", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "زندگیام": [
- {ORTH: "زندگی", LEMMA: "زندگی", NORM: "زندگی", TAG: "NOUN"},
- {ORTH: "ام", LEMMA: "ام", NORM: "ام", TAG: "NOUN"},
- ],
- "زندگیشان": [
- {ORTH: "زندگی", LEMMA: "زندگی", NORM: "زندگی", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "زنش": [
- {ORTH: "زن", LEMMA: "زن", NORM: "زن", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "زنند": [
- {ORTH: "زن", LEMMA: "زن", NORM: "زن", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "زو": [
- {ORTH: "ز", LEMMA: "ز", NORM: "ز", TAG: "ADP"},
- {ORTH: "و", LEMMA: "و", NORM: "و", TAG: "NOUN"},
- ],
- "زیاده": [
- {ORTH: "زیاد", LEMMA: "زیاد", NORM: "زیاد", TAG: "ADJ"},
- {ORTH: "ه", LEMMA: "ه", NORM: "ه", TAG: "VERB"},
- ],
- "زیباست": [
- {ORTH: "زیبا", LEMMA: "زیبا", NORM: "زیبا", TAG: "ADJ"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "زیبایش": [
- {ORTH: "زیبای", LEMMA: "زیبای", NORM: "زیبای", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "زیبایی": [
- {ORTH: "زیبای", LEMMA: "زیبای", NORM: "زیبای", TAG: "ADJ"},
- {ORTH: "ی", LEMMA: "ی", NORM: "ی", TAG: "VERB"},
- ],
- "زیربناست": [
- {ORTH: "زیربنا", LEMMA: "زیربنا", NORM: "زیربنا", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "زیرکاند": [
- {ORTH: "زیرک", LEMMA: "زیرک", NORM: "زیرک", TAG: "ADJ"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "سؤالتان": [
- {ORTH: "سؤال", LEMMA: "سؤال", NORM: "سؤال", TAG: "NOUN"},
- {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"},
- ],
- "سؤالم": [
- {ORTH: "سؤال", LEMMA: "سؤال", NORM: "سؤال", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "سابقهاش": [
- {ORTH: "سابقه", LEMMA: "سابقه", NORM: "سابقه", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "ساختنم": [
- {ORTH: "ساختن", LEMMA: "ساختن", NORM: "ساختن", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "سادهاش": [
- {ORTH: "ساده", LEMMA: "ساده", NORM: "ساده", TAG: "ADJ"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "سادهاند": [
- {ORTH: "ساده", LEMMA: "ساده", NORM: "ساده", TAG: "ADJ"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "سازمانش": [
- {ORTH: "سازمان", LEMMA: "سازمان", NORM: "سازمان", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "ساعتم": [
- {ORTH: "ساعت", LEMMA: "ساعت", NORM: "ساعت", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "سالته": [
- {ORTH: "سال", LEMMA: "سال", NORM: "سال", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- {ORTH: "ه", LEMMA: "ه", NORM: "ه", TAG: "VERB"},
- ],
- "سالش": [
- {ORTH: "سال", LEMMA: "سال", NORM: "سال", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "سالهاست": [
- {ORTH: "سالها", LEMMA: "سالها", NORM: "سالها", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "سالهاش": [
- {ORTH: "ساله", LEMMA: "ساله", NORM: "ساله", TAG: "ADJ"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "ساکتند": [
- {ORTH: "ساکت", LEMMA: "ساکت", NORM: "ساکت", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "ساکنند": [
- {ORTH: "ساکن", LEMMA: "ساکن", NORM: "ساکن", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "سبزشان": [
- {ORTH: "سبز", LEMMA: "سبز", NORM: "سبز", TAG: "ADJ"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "سبیلمان": [
- {ORTH: "سبیل", LEMMA: "سبیل", NORM: "سبیل", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "ستمهایش": [
- {ORTH: "ستمهای", LEMMA: "ستمهای", NORM: "ستمهای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "سخنانش": [
- {ORTH: "سخنان", LEMMA: "سخنان", NORM: "سخنان", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "سخنانشان": [
- {ORTH: "سخنان", LEMMA: "سخنان", NORM: "سخنان", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "سخنتان": [
- {ORTH: "سخن", LEMMA: "سخن", NORM: "سخن", TAG: "NOUN"},
- {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"},
- ],
- "سخنش": [
- {ORTH: "سخن", LEMMA: "سخن", NORM: "سخن", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "سخنم": [
- {ORTH: "سخن", LEMMA: "سخن", NORM: "سخن", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "سردش": [
- {ORTH: "سرد", LEMMA: "سرد", NORM: "سرد", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "سرزمینشان": [
- {ORTH: "سرزمین", LEMMA: "سرزمین", NORM: "سرزمین", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "سرش": [
- {ORTH: "سر", LEMMA: "سر", NORM: "سر", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "سرمایهدارهاست": [
- {
- ORTH: "سرمایهدارها",
- LEMMA: "سرمایهدارها",
- NORM: "سرمایهدارها",
- TAG: "NOUN",
- },
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "سرنوشتش": [
- {ORTH: "سرنوشت", LEMMA: "سرنوشت", NORM: "سرنوشت", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "سرنوشتشان": [
- {ORTH: "سرنوشت", LEMMA: "سرنوشت", NORM: "سرنوشت", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "سروتهش": [
- {ORTH: "سروته", LEMMA: "سروته", NORM: "سروته", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "سرچشمهاش": [
- {ORTH: "سرچشمه", LEMMA: "سرچشمه", NORM: "سرچشمه", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "سقمش": [
- {ORTH: "سقم", LEMMA: "سقم", NORM: "سقم", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "سنش": [
- {ORTH: "سن", LEMMA: "سن", NORM: "سن", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "سپاهش": [
- {ORTH: "سپاه", LEMMA: "سپاه", NORM: "سپاه", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "سیاسیشان": [
- {ORTH: "سیاسی", LEMMA: "سیاسی", NORM: "سیاسی", TAG: "ADJ"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "سیاهچالههاست": [
- {
- ORTH: "سیاهچالهها",
- LEMMA: "سیاهچالهها",
- NORM: "سیاهچالهها",
- TAG: "NOUN",
- },
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "شاخههایشان": [
- {ORTH: "شاخههای", LEMMA: "شاخههای", NORM: "شاخههای", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "شالودهاش": [
- {ORTH: "شالوده", LEMMA: "شالوده", NORM: "شالوده", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "شانههایش": [
- {ORTH: "شانههای", LEMMA: "شانههای", NORM: "شانههای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "شاهدیم": [
- {ORTH: "شاهد", LEMMA: "شاهد", NORM: "شاهد", TAG: "NOUN"},
- {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "VERB"},
- ],
- "شاهکارهایش": [
- {ORTH: "شاهکارهای", LEMMA: "شاهکارهای", NORM: "شاهکارهای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "شخصیتش": [
- {ORTH: "شخصیت", LEMMA: "شخصیت", NORM: "شخصیت", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "شدنشان": [
- {ORTH: "شدن", LEMMA: "شدن", NORM: "شدن", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "شرکتیست": [
- {ORTH: "شرکتی", LEMMA: "شرکتی", NORM: "شرکتی", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "شعارهاشان": [
- {ORTH: "شعارها", LEMMA: "شعارها", NORM: "شعارها", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "شعورش": [
- {ORTH: "شعور", LEMMA: "شعور", NORM: "شعور", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "شغلش": [
- {ORTH: "شغل", LEMMA: "شغل", NORM: "شغل", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "شماست": [
- {ORTH: "شما", LEMMA: "شما", NORM: "شما", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "شمشیرش": [
- {ORTH: "شمشیر", LEMMA: "شمشیر", NORM: "شمشیر", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "شنیدنش": [
- {ORTH: "شنیدن", LEMMA: "شنیدن", NORM: "شنیدن", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "شوراست": [
- {ORTH: "شورا", LEMMA: "شورا", NORM: "شورا", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "شومت": [
- {ORTH: "شوم", LEMMA: "شوم", NORM: "شوم", TAG: "ADJ"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "شیرینترش": [
- {ORTH: "شیرینتر", LEMMA: "شیرینتر", NORM: "شیرینتر", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "شیطاناند": [
- {ORTH: "شیطان", LEMMA: "شیطان", NORM: "شیطان", TAG: "NOUN"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "شیوههاست": [
- {ORTH: "شیوهها", LEMMA: "شیوهها", NORM: "شیوهها", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "صاحبش": [
- {ORTH: "صاحب", LEMMA: "صاحب", NORM: "صاحب", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "صحنهاش": [
- {ORTH: "صحنه", LEMMA: "صحنه", NORM: "صحنه", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "صدایش": [
- {ORTH: "صدای", LEMMA: "صدای", NORM: "صدای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "صددند": [
- {ORTH: "صدد", LEMMA: "صدد", NORM: "صدد", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "صندوقهاست": [
- {ORTH: "صندوقها", LEMMA: "صندوقها", NORM: "صندوقها", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "صندوقهایش": [
- {ORTH: "صندوقهای", LEMMA: "صندوقهای", NORM: "صندوقهای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "صورتش": [
- {ORTH: "صورت", LEMMA: "صورت", NORM: "صورت", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "ضروریاند": [
- {ORTH: "ضروری", LEMMA: "ضروری", NORM: "ضروری", TAG: "ADJ"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "ضمیرش": [
- {ORTH: "ضمیر", LEMMA: "ضمیر", NORM: "ضمیر", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "طرفش": [
- {ORTH: "طرف", LEMMA: "طرف", NORM: "طرف", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "طلسمش": [
- {ORTH: "طلسم", LEMMA: "طلسم", NORM: "طلسم", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "طوره": [
- {ORTH: "طور", LEMMA: "طور", NORM: "طور", TAG: "NOUN"},
- {ORTH: "ه", LEMMA: "ه", NORM: "ه", TAG: "VERB"},
- ],
- "عاشوراست": [
- {ORTH: "عاشورا", LEMMA: "عاشورا", NORM: "عاشورا", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "عبارتند": [
- {ORTH: "عبارت", LEMMA: "عبارت", NORM: "عبارت", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "عزیزانتان": [
- {ORTH: "عزیزان", LEMMA: "عزیزان", NORM: "عزیزان", TAG: "NOUN"},
- {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"},
- ],
- "عزیزانش": [
- {ORTH: "عزیزان", LEMMA: "عزیزان", NORM: "عزیزان", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "عزیزش": [
- {ORTH: "عزیز", LEMMA: "عزیز", NORM: "عزیز", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "عشرتطلبیاش": [
- {ORTH: "عشرتطلبی", LEMMA: "عشرتطلبی", NORM: "عشرتطلبی", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "عقبیم": [
- {ORTH: "عقب", LEMMA: "عقب", NORM: "عقب", TAG: "NOUN"},
- {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "VERB"},
- ],
- "علاقهاش": [
- {ORTH: "علاقه", LEMMA: "علاقه", NORM: "علاقه", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "علمیمان": [
- {ORTH: "علمی", LEMMA: "علمی", NORM: "علمی", TAG: "ADJ"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "عمرش": [
- {ORTH: "عمر", LEMMA: "عمر", NORM: "عمر", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "عمرشان": [
- {ORTH: "عمر", LEMMA: "عمر", NORM: "عمر", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "عملش": [
- {ORTH: "عمل", LEMMA: "عمل", NORM: "عمل", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "عملیاند": [
- {ORTH: "عملی", LEMMA: "عملی", NORM: "عملی", TAG: "ADJ"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "عمویت": [
- {ORTH: "عموی", LEMMA: "عموی", NORM: "عموی", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "عمویش": [
- {ORTH: "عموی", LEMMA: "عموی", NORM: "عموی", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "عمیقش": [
- {ORTH: "عمیق", LEMMA: "عمیق", NORM: "عمیق", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "عواملش": [
- {ORTH: "عوامل", LEMMA: "عوامل", NORM: "عوامل", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "عوضشان": [
- {ORTH: "عوض", LEMMA: "عوض", NORM: "عوض", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "غذاییشان": [
- {ORTH: "غذایی", LEMMA: "غذایی", NORM: "غذایی", TAG: "ADJ"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "غریبهاند": [
- {ORTH: "غریبه", LEMMA: "غریبه", NORM: "غریبه", TAG: "NOUN"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "غلامانش": [
- {ORTH: "غلامان", LEMMA: "غلامان", NORM: "غلامان", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "غلطهاست": [
- {ORTH: "غلطها", LEMMA: "غلطها", NORM: "غلطها", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "فراموشتان": [
- {ORTH: "فراموش", LEMMA: "فراموش", NORM: "فراموش", TAG: "ADJ"},
- {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"},
- ],
- "فردیاند": [
- {ORTH: "فردی", LEMMA: "فردی", NORM: "فردی", TAG: "ADJ"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "فرزندانش": [
- {ORTH: "فرزندان", LEMMA: "فرزندان", NORM: "فرزندان", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "فرزندش": [
- {ORTH: "فرزند", LEMMA: "فرزند", NORM: "فرزند", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "فرمهایش": [
- {ORTH: "فرمهای", LEMMA: "فرمهای", NORM: "فرمهای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "فرهنگیمان": [
- {ORTH: "فرهنگی", LEMMA: "فرهنگی", NORM: "فرهنگی", TAG: "ADJ"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "فریادشان": [
- {ORTH: "فریاد", LEMMA: "فریاد", NORM: "فریاد", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "فضاییشان": [
- {ORTH: "فضایی", LEMMA: "فضایی", NORM: "فضایی", TAG: "ADJ"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "فقیرشان": [
- {ORTH: "فقیر", LEMMA: "فقیر", NORM: "فقیر", TAG: "ADJ"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "فوریشان": [
- {ORTH: "فوری", LEMMA: "فوری", NORM: "فوری", TAG: "ADJ"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "قائلند": [
- {ORTH: "قائل", LEMMA: "قائل", NORM: "قائل", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "قائلیم": [
- {ORTH: "قائل", LEMMA: "قائل", NORM: "قائل", TAG: "ADJ"},
- {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "VERB"},
- ],
- "قادرند": [
- {ORTH: "قادر", LEMMA: "قادر", NORM: "قادر", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "قانونمندش": [
- {ORTH: "قانونمند", LEMMA: "قانونمند", NORM: "قانونمند", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "قبلند": [
- {ORTH: "قبل", LEMMA: "قبل", NORM: "قبل", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "قبلیاش": [
- {ORTH: "قبلی", LEMMA: "قبلی", NORM: "قبلی", TAG: "ADJ"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "قبلیمان": [
- {ORTH: "قبلی", LEMMA: "قبلی", NORM: "قبلی", TAG: "ADJ"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "قدریست": [
- {ORTH: "قدری", LEMMA: "قدری", NORM: "قدری", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "قدمش": [
- {ORTH: "قدم", LEMMA: "قدم", NORM: "قدم", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "قسمتش": [
- {ORTH: "قسمت", LEMMA: "قسمت", NORM: "قسمت", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "قضایاست": [
- {ORTH: "قضایا", LEMMA: "قضایا", NORM: "قضایا", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "قضیهشان": [
- {ORTH: "قضیه", LEMMA: "قضیه", NORM: "قضیه", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "قهرمانهایشان": [
- {ORTH: "قهرمانهای", LEMMA: "قهرمانهای", NORM: "قهرمانهای", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "قهرمانیش": [
- {ORTH: "قهرمانی", LEMMA: "قهرمانی", NORM: "قهرمانی", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "قومت": [
- {ORTH: "قوم", LEMMA: "قوم", NORM: "قوم", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "لازمهاش": [
- {ORTH: "لازمه", LEMMA: "لازمه", NORM: "لازمه", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "مأموریتش": [
- {ORTH: "مأموریت", LEMMA: "مأموریت", NORM: "مأموریت", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "مأموریتم": [
- {ORTH: "مأموریت", LEMMA: "مأموریت", NORM: "مأموریت", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "مأموریتاند": [
- {ORTH: "مأموریت", LEMMA: "مأموریت", NORM: "مأموریت", TAG: "NOUN"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "مادرانشان": [
- {ORTH: "مادران", LEMMA: "مادران", NORM: "مادران", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "مادرت": [
- {ORTH: "مادر", LEMMA: "مادر", NORM: "مادر", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "مادرش": [
- {ORTH: "مادر", LEMMA: "مادر", NORM: "مادر", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "مادرم": [
- {ORTH: "مادر", LEMMA: "مادر", NORM: "مادر", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "ماست": [
- {ORTH: "ما", LEMMA: "ما", NORM: "ما", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "مالیاش": [
- {ORTH: "مالی", LEMMA: "مالی", NORM: "مالی", TAG: "ADJ"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "ماهیتش": [
- {ORTH: "ماهیت", LEMMA: "ماهیت", NORM: "ماهیت", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "مایی": [
- {ORTH: "ما", LEMMA: "ما", NORM: "ما", TAG: "NOUN"},
- {ORTH: "یی", LEMMA: "یی", NORM: "یی", TAG: "VERB"},
- ],
- "مجازاتش": [
- {ORTH: "مجازات", LEMMA: "مجازات", NORM: "مجازات", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "مجبورند": [
- {ORTH: "مجبور", LEMMA: "مجبور", NORM: "مجبور", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "محتاجند": [
- {ORTH: "محتاج", LEMMA: "محتاج", NORM: "محتاج", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "محرمم": [
- {ORTH: "محرم", LEMMA: "محرم", NORM: "محرم", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "SCONJ"},
- ],
- "محلش": [
- {ORTH: "محل", LEMMA: "محل", NORM: "محل", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "مخالفند": [
- {ORTH: "مخالف", LEMMA: "مخالف", NORM: "مخالف", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "مخدرش": [
- {ORTH: "مخدر", LEMMA: "مخدر", NORM: "مخدر", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "مدتهاست": [
- {ORTH: "مدتها", LEMMA: "مدتها", NORM: "مدتها", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "مدرسهات": [
- {ORTH: "مدرسه", LEMMA: "مدرسه", NORM: "مدرسه", TAG: "NOUN"},
- {ORTH: "ات", LEMMA: "ات", NORM: "ات", TAG: "NOUN"},
- ],
- "مدرکم": [
- {ORTH: "مدرک", LEMMA: "مدرک", NORM: "مدرک", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "مدیرانش": [
- {ORTH: "مدیران", LEMMA: "مدیران", NORM: "مدیران", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "مدیونم": [
- {ORTH: "مدیون", LEMMA: "مدیون", NORM: "مدیون", TAG: "ADJ"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "VERB"},
- ],
- "مذهبیاند": [
- {ORTH: "مذهبی", LEMMA: "مذهبی", NORM: "مذهبی", TAG: "ADJ"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "مرا": [
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- {ORTH: "را", LEMMA: "را", NORM: "را", TAG: "PART"},
- ],
- "مرادت": [
- {ORTH: "مراد", LEMMA: "مراد", NORM: "مراد", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "مردمشان": [
- {ORTH: "مردم", LEMMA: "مردم", NORM: "مردم", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "مردمند": [
- {ORTH: "مردم", LEMMA: "مردم", NORM: "مردم", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "مردماند": [
- {ORTH: "مردم", LEMMA: "مردم", NORM: "مردم", TAG: "NOUN"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "مرزشان": [
- {ORTH: "مرز", LEMMA: "مرز", NORM: "مرز", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "مرزهاشان": [
- {ORTH: "مرزها", LEMMA: "مرزها", NORM: "مرزها", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "مزدورش": [
- {ORTH: "مزدور", LEMMA: "مزدور", NORM: "مزدور", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "مسئولیتش": [
- {ORTH: "مسئولیت", LEMMA: "مسئولیت", NORM: "مسئولیت", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "مسائلش": [
- {ORTH: "مسائل", LEMMA: "مسائل", NORM: "مسائل", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "مستحضرید": [
- {ORTH: "مستحضر", LEMMA: "مستحضر", NORM: "مستحضر", TAG: "ADJ"},
- {ORTH: "ید", LEMMA: "ید", NORM: "ید", TAG: "VERB"},
- ],
- "مسلمانم": [
- {ORTH: "مسلمان", LEMMA: "مسلمان", NORM: "مسلمان", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "VERB"},
- ],
- "مسلمانند": [
- {ORTH: "مسلمان", LEMMA: "مسلمان", NORM: "مسلمان", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "مشتریانش": [
- {ORTH: "مشتریان", LEMMA: "مشتریان", NORM: "مشتریان", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "مشتهایمان": [
- {ORTH: "مشتهای", LEMMA: "مشتهای", NORM: "مشتهای", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "مشخصند": [
- {ORTH: "مشخص", LEMMA: "مشخص", NORM: "مشخص", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "مشغولند": [
- {ORTH: "مشغول", LEMMA: "مشغول", NORM: "مشغول", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "مشغولیم": [
- {ORTH: "مشغول", LEMMA: "مشغول", NORM: "مشغول", TAG: "ADJ"},
- {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "VERB"},
- ],
- "مشهورش": [
- {ORTH: "مشهور", LEMMA: "مشهور", NORM: "مشهور", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "مشکلاتشان": [
- {ORTH: "مشکلات", LEMMA: "مشکلات", NORM: "مشکلات", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "مشکلم": [
- {ORTH: "مشکل", LEMMA: "مشکل", NORM: "مشکل", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "مطمئنم": [
- {ORTH: "مطمئن", LEMMA: "مطمئن", NORM: "مطمئن", TAG: "ADJ"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "VERB"},
- ],
- "معاملهمان": [
- {ORTH: "معامله", LEMMA: "معامله", NORM: "معامله", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "معتقدم": [
- {ORTH: "معتقد", LEMMA: "معتقد", NORM: "معتقد", TAG: "ADJ"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "VERB"},
- ],
- "معتقدند": [
- {ORTH: "معتقد", LEMMA: "معتقد", NORM: "معتقد", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "معتقدیم": [
- {ORTH: "معتقد", LEMMA: "معتقد", NORM: "معتقد", TAG: "ADJ"},
- {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "VERB"},
- ],
- "معرفیاش": [
- {ORTH: "معرفی", LEMMA: "معرفی", NORM: "معرفی", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "معروفش": [
- {ORTH: "معروف", LEMMA: "معروف", NORM: "معروف", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "معضلاتمان": [
- {ORTH: "معضلات", LEMMA: "معضلات", NORM: "معضلات", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "معلمش": [
- {ORTH: "معلم", LEMMA: "معلم", NORM: "معلم", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "معنایش": [
- {ORTH: "معنای", LEMMA: "معنای", NORM: "معنای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "مغزشان": [
- {ORTH: "مغز", LEMMA: "مغز", NORM: "مغز", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "مفیدند": [
- {ORTH: "مفید", LEMMA: "مفید", NORM: "مفید", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "مقابلش": [
- {ORTH: "مقابل", LEMMA: "مقابل", NORM: "مقابل", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "مقالهاش": [
- {ORTH: "مقاله", LEMMA: "مقاله", NORM: "مقاله", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "مقدمش": [
- {ORTH: "مقدم", LEMMA: "مقدم", NORM: "مقدم", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "مقرش": [
- {ORTH: "مقر", LEMMA: "مقر", NORM: "مقر", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "مقصدشان": [
- {ORTH: "مقصد", LEMMA: "مقصد", NORM: "مقصد", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "مقصرند": [
- {ORTH: "مقصر", LEMMA: "مقصر", NORM: "مقصر", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "مقصودتان": [
- {ORTH: "مقصود", LEMMA: "مقصود", NORM: "مقصود", TAG: "NOUN"},
- {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"},
- ],
- "ملاقاتهایش": [
- {ORTH: "ملاقاتهای", LEMMA: "ملاقاتهای", NORM: "ملاقاتهای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "ممکنشان": [
- {ORTH: "ممکن", LEMMA: "ممکن", NORM: "ممکن", TAG: "ADJ"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "ممیزیهاست": [
- {ORTH: "ممیزیها", LEMMA: "ممیزیها", NORM: "ممیزیها", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "منظورم": [
- {ORTH: "منظور", LEMMA: "منظور", NORM: "منظور", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "منی": [
- {ORTH: "من", LEMMA: "من", NORM: "من", TAG: "NOUN"},
- {ORTH: "ی", LEMMA: "ی", NORM: "ی", TAG: "VERB"},
- ],
- "منید": [
- {ORTH: "من", LEMMA: "من", NORM: "من", TAG: "NOUN"},
- {ORTH: "ید", LEMMA: "ید", NORM: "ید", TAG: "VERB"},
- ],
- "مهربانش": [
- {ORTH: "مهربان", LEMMA: "مهربان", NORM: "مهربان", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "مهماند": [
- {ORTH: "مهم", LEMMA: "مهم", NORM: "مهم", TAG: "ADJ"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "مواجهند": [
- {ORTH: "مواجه", LEMMA: "مواجه", NORM: "مواجه", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "مواجهاند": [
- {ORTH: "مواجه", LEMMA: "مواجه", NORM: "مواجه", TAG: "ADJ"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "مواخذهات": [
- {ORTH: "مواخذه", LEMMA: "مواخذه", NORM: "مواخذه", TAG: "NOUN"},
- {ORTH: "ات", LEMMA: "ات", NORM: "ات", TAG: "NOUN"},
- ],
- "مواضعشان": [
- {ORTH: "مواضع", LEMMA: "مواضع", NORM: "مواضع", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "مواضعمان": [
- {ORTH: "مواضع", LEMMA: "مواضع", NORM: "مواضع", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "موافقند": [
- {ORTH: "موافق", LEMMA: "موافق", NORM: "موافق", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "موجوداتش": [
- {ORTH: "موجودات", LEMMA: "موجودات", NORM: "موجودات", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "موجودند": [
- {ORTH: "موجود", LEMMA: "موجود", NORM: "موجود", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "موردش": [
- {ORTH: "مورد", LEMMA: "مورد", NORM: "مورد", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "موضعشان": [
- {ORTH: "موضع", LEMMA: "موضع", NORM: "موضع", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "موظفند": [
- {ORTH: "موظف", LEMMA: "موظف", NORM: "موظف", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "موهایش": [
- {ORTH: "موهای", LEMMA: "موهای", NORM: "موهای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "موهایمان": [
- {ORTH: "موهای", LEMMA: "موهای", NORM: "موهای", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "مویم": [
- {ORTH: "مو", LEMMA: "مو", NORM: "مو", TAG: "NOUN"},
- {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "NOUN"},
- ],
- "ناخرسندند": [
- {ORTH: "ناخرسند", LEMMA: "ناخرسند", NORM: "ناخرسند", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "ناراحتیش": [
- {ORTH: "ناراحتی", LEMMA: "ناراحتی", NORM: "ناراحتی", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "ناراضیاند": [
- {ORTH: "ناراضی", LEMMA: "ناراضی", NORM: "ناراضی", TAG: "ADJ"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "نارواست": [
- {ORTH: "ناروا", LEMMA: "ناروا", NORM: "ناروا", TAG: "ADJ"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "نازش": [
- {ORTH: "ناز", LEMMA: "ناز", NORM: "ناز", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "نامش": [
- {ORTH: "نام", LEMMA: "نام", NORM: "نام", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "نامشان": [
- {ORTH: "نام", LEMMA: "نام", NORM: "نام", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "نامم": [
- {ORTH: "نام", LEMMA: "نام", NORM: "نام", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "نامهات": [
- {ORTH: "نامه", LEMMA: "نامه", NORM: "نامه", TAG: "NOUN"},
- {ORTH: "ات", LEMMA: "ات", NORM: "ات", TAG: "NOUN"},
- ],
- "نامهام": [
- {ORTH: "نامه", LEMMA: "نامه", NORM: "نامه", TAG: "NOUN"},
- {ORTH: "ام", LEMMA: "ام", NORM: "ام", TAG: "NOUN"},
- ],
- "ناچارم": [
- {ORTH: "ناچار", LEMMA: "ناچار", NORM: "ناچار", TAG: "ADJ"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "VERB"},
- ],
- "نخستوزیریاش": [
- {
- ORTH: "نخستوزیری",
- LEMMA: "نخستوزیری",
- NORM: "نخستوزیری",
- TAG: "NOUN",
- },
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "نزدش": [
- {ORTH: "نزد", LEMMA: "نزد", NORM: "نزد", TAG: "ADP"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "نشانم": [
- {ORTH: "نشان", LEMMA: "نشان", NORM: "نشان", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "نظراتشان": [
- {ORTH: "نظرات", LEMMA: "نظرات", NORM: "نظرات", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "نظرتان": [
- {ORTH: "نظر", LEMMA: "نظر", NORM: "نظر", TAG: "NOUN"},
- {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"},
- ],
- "نظرش": [
- {ORTH: "نظر", LEMMA: "نظر", NORM: "نظر", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "نظرشان": [
- {ORTH: "نظر", LEMMA: "نظر", NORM: "نظر", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "نظرم": [
- {ORTH: "نظر", LEMMA: "نظر", NORM: "نظر", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "نظرهایشان": [
- {ORTH: "نظرهای", LEMMA: "نظرهای", NORM: "نظرهای", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "نفاقش": [
- {ORTH: "نفاق", LEMMA: "نفاق", NORM: "نفاق", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "نفرند": [
- {ORTH: "نفر", LEMMA: "نفر", NORM: "نفر", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "نفوذیند": [
- {ORTH: "نفوذی", LEMMA: "نفوذی", NORM: "نفوذی", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "نقطهنظراتتان": [
- {ORTH: "نقطهنظرات", LEMMA: "نقطهنظرات", NORM: "نقطهنظرات", TAG: "NOUN"},
- {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"},
- ],
- "نمایشیمان": [
- {ORTH: "نمایشی", LEMMA: "نمایشی", NORM: "نمایشی", TAG: "ADJ"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "نمایندگیشان": [
- {ORTH: "نمایندگی", LEMMA: "نمایندگی", NORM: "نمایندگی", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "نمونهاش": [
- {ORTH: "نمونه", LEMMA: "نمونه", NORM: "نمونه", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "نمیپذیرندش": [
- {ORTH: "نمیپذیرند", LEMMA: "نمیپذیرند", NORM: "نمیپذیرند", TAG: "VERB"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "نوآوریاش": [
- {ORTH: "نوآوری", LEMMA: "نوآوری", NORM: "نوآوری", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "نوشتههایشان": [
- {ORTH: "نوشتههای", LEMMA: "نوشتههای", NORM: "نوشتههای", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "نوشتههایم": [
- {ORTH: "نوشتهها", LEMMA: "نوشتهها", NORM: "نوشتهها", TAG: "NOUN"},
- {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "NOUN"},
- ],
- "نکردنشان": [
- {ORTH: "نکردن", LEMMA: "نکردن", NORM: "نکردن", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "نگاهداریشان": [
- {ORTH: "نگاهداری", LEMMA: "نگاهداری", NORM: "نگاهداری", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "نگاهش": [
- {ORTH: "نگاه", LEMMA: "نگاه", NORM: "نگاه", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "نگرانم": [
- {ORTH: "نگران", LEMMA: "نگران", NORM: "نگران", TAG: "ADJ"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "VERB"},
- ],
- "نگرشهایشان": [
- {ORTH: "نگرشهای", LEMMA: "نگرشهای", NORM: "نگرشهای", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "نیازمندند": [
- {ORTH: "نیازمند", LEMMA: "نیازمند", NORM: "نیازمند", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "هدفش": [
- {ORTH: "هدف", LEMMA: "هدف", NORM: "هدف", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "همانست": [
- {ORTH: "همان", LEMMA: "همان", NORM: "همان", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "همراهش": [
- {ORTH: "همراه", LEMMA: "همراه", NORM: "همراه", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "همسرتان": [
- {ORTH: "همسر", LEMMA: "همسر", NORM: "همسر", TAG: "NOUN"},
- {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"},
- ],
- "همسرش": [
- {ORTH: "همسر", LEMMA: "همسر", NORM: "همسر", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "همسرم": [
- {ORTH: "همسر", LEMMA: "همسر", NORM: "همسر", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "همفکرانش": [
- {ORTH: "همفکران", LEMMA: "همفکران", NORM: "همفکران", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "همهاش": [
- {ORTH: "همه", LEMMA: "همه", NORM: "همه", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "همهشان": [
- {ORTH: "همه", LEMMA: "همه", NORM: "همه", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "همکارانش": [
- {ORTH: "همکاران", LEMMA: "همکاران", NORM: "همکاران", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "همنظریم": [
- {ORTH: "همنظر", LEMMA: "همنظر", NORM: "همنظر", TAG: "ADJ"},
- {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "VERB"},
- ],
- "هنرش": [
- {ORTH: "هنر", LEMMA: "هنر", NORM: "هنر", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "هواست": [
- {ORTH: "هوا", LEMMA: "هوا", NORM: "هوا", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "هویتش": [
- {ORTH: "هویت", LEMMA: "هویت", NORM: "هویت", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "وابستهاند": [
- {ORTH: "وابسته", LEMMA: "وابسته", NORM: "وابسته", TAG: "ADJ"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "واقفند": [
- {ORTH: "واقف", LEMMA: "واقف", NORM: "واقف", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "والدینشان": [
- {ORTH: "والدین", LEMMA: "والدین", NORM: "والدین", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "وجدانتان": [
- {ORTH: "وجدان", LEMMA: "وجدان", NORM: "وجدان", TAG: "NOUN"},
- {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"},
- ],
- "وجودشان": [
- {ORTH: "وجود", LEMMA: "وجود", NORM: "وجود", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "وطنم": [
- {ORTH: "وطن", LEMMA: "وطن", NORM: "وطن", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "وعدهاش": [
- {ORTH: "وعده", LEMMA: "وعده", NORM: "وعده", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "وقتمان": [
- {ORTH: "وقت", LEMMA: "وقت", NORM: "وقت", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "ولادتش": [
- {ORTH: "ولادت", LEMMA: "ولادت", NORM: "ولادت", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "پایانش": [
- {ORTH: "پایان", LEMMA: "پایان", NORM: "پایان", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "پایش": [
- {ORTH: "پای", LEMMA: "پای", NORM: "پای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "پایینترند": [
- {ORTH: "پایینتر", LEMMA: "پایینتر", NORM: "پایینتر", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "پدرت": [
- {ORTH: "پدر", LEMMA: "پدر", NORM: "پدر", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "پدرش": [
- {ORTH: "پدر", LEMMA: "پدر", NORM: "پدر", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "پدرشان": [
- {ORTH: "پدر", LEMMA: "پدر", NORM: "پدر", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "پدرم": [
- {ORTH: "پدر", LEMMA: "پدر", NORM: "پدر", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "پربارش": [
- {ORTH: "پربار", LEMMA: "پربار", NORM: "پربار", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "پروردگارت": [
- {ORTH: "پروردگار", LEMMA: "پروردگار", NORM: "پروردگار", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "پسرتان": [
- {ORTH: "پسر", LEMMA: "پسر", NORM: "پسر", TAG: "NOUN"},
- {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"},
- ],
- "پسرش": [
- {ORTH: "پسر", LEMMA: "پسر", NORM: "پسر", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "پسرعمویش": [
- {ORTH: "پسرعموی", LEMMA: "پسرعموی", NORM: "پسرعموی", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "پسرعمویت": [
- {ORTH: "پسرعموی", LEMMA: "پسرعموی", NORM: "پسرعموی", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "پشتش": [
- {ORTH: "پشت", LEMMA: "پشت", NORM: "پشت", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "پشیمونی": [
- {ORTH: "پشیمون", LEMMA: "پشیمون", NORM: "پشیمون", TAG: "ADJ"},
- {ORTH: "ی", LEMMA: "ی", NORM: "ی", TAG: "VERB"},
- ],
- "پولش": [
- {ORTH: "پول", LEMMA: "پول", NORM: "پول", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "پژوهشهایش": [
- {ORTH: "پژوهشهای", LEMMA: "پژوهشهای", NORM: "پژوهشهای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "پیامبرش": [
- {ORTH: "پیامبر", LEMMA: "پیامبر", NORM: "پیامبر", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "پیامبری": [
- {ORTH: "پیامبر", LEMMA: "پیامبر", NORM: "پیامبر", TAG: "NOUN"},
- {ORTH: "ی", LEMMA: "ی", NORM: "ی", TAG: "VERB"},
- ],
- "پیامش": [
- {ORTH: "پیام", LEMMA: "پیام", NORM: "پیام", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "پیداست": [
- {ORTH: "پیدا", LEMMA: "پیدا", NORM: "پیدا", TAG: "ADJ"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "پیراهنش": [
- {ORTH: "پیراهن", LEMMA: "پیراهن", NORM: "پیراهن", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "پیروانش": [
- {ORTH: "پیروان", LEMMA: "پیروان", NORM: "پیروان", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "پیشانیاش": [
- {ORTH: "پیشانی", LEMMA: "پیشانی", NORM: "پیشانی", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "پیمانت": [
- {ORTH: "پیمان", LEMMA: "پیمان", NORM: "پیمان", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "پیوندشان": [
- {ORTH: "پیوند", LEMMA: "پیوند", NORM: "پیوند", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "چاپش": [
- {ORTH: "چاپ", LEMMA: "چاپ", NORM: "چاپ", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "چت": [
- {ORTH: "چ", LEMMA: "چ", NORM: "چ", TAG: "ADV"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "چته": [
- {ORTH: "چ", LEMMA: "چ", NORM: "چ", TAG: "ADV"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- {ORTH: "ه", LEMMA: "ه", NORM: "ه", TAG: "VERB"},
- ],
- "چرخهایش": [
- {ORTH: "چرخهای", LEMMA: "چرخهای", NORM: "چرخهای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "چشمم": [
- {ORTH: "چشم", LEMMA: "چشم", NORM: "چشم", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "چشمهایش": [
- {ORTH: "چشمهای", LEMMA: "چشمهای", NORM: "چشمهای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "چشمهایشان": [
- {ORTH: "چشمهای", LEMMA: "چشمهای", NORM: "چشمهای", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "چمنم": [
- {ORTH: "چمن", LEMMA: "چمن", NORM: "چمن", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "چهرهاش": [
- {ORTH: "چهره", LEMMA: "چهره", NORM: "چهره", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "چکارهاند": [
- {ORTH: "چکاره", LEMMA: "چکاره", NORM: "چکاره", TAG: "ADV"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "چیزهاست": [
- {ORTH: "چیزها", LEMMA: "چیزها", NORM: "چیزها", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "چیزهایش": [
- {ORTH: "چیزهای", LEMMA: "چیزهای", NORM: "چیزهای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "چیزیست": [
- {ORTH: "چیزی", LEMMA: "چیزی", NORM: "چیزی", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "چیست": [
- {ORTH: "چی", LEMMA: "چی", NORM: "چی", TAG: "ADV"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "کارش": [
- {ORTH: "کار", LEMMA: "کار", NORM: "کار", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "کارشان": [
- {ORTH: "کار", LEMMA: "کار", NORM: "کار", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "کارم": [
- {ORTH: "کار", LEMMA: "کار", NORM: "کار", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "کارند": [
- {ORTH: "کار", LEMMA: "کار", NORM: "کار", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "کارهایم": [
- {ORTH: "کارها", LEMMA: "کارها", NORM: "کارها", TAG: "NOUN"},
- {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "NOUN"},
- ],
- "کافیست": [
- {ORTH: "کافی", LEMMA: "کافی", NORM: "کافی", TAG: "ADJ"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "کتابخانهاش": [
- {ORTH: "کتابخانه", LEMMA: "کتابخانه", NORM: "کتابخانه", TAG: "NOUN"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "کتابش": [
- {ORTH: "کتاب", LEMMA: "کتاب", NORM: "کتاب", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "کتابهاشان": [
- {ORTH: "کتابها", LEMMA: "کتابها", NORM: "کتابها", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "کجاست": [
- {ORTH: "کجا", LEMMA: "کجا", NORM: "کجا", TAG: "ADV"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "کدورتهایشان": [
- {ORTH: "کدورتهای", LEMMA: "کدورتهای", NORM: "کدورتهای", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "کردنش": [
- {ORTH: "کردن", LEMMA: "کردن", NORM: "کردن", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "کرمخوردهاش": [
- {ORTH: "کرمخورده", LEMMA: "کرمخورده", NORM: "کرمخورده", TAG: "ADJ"},
- {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"},
- ],
- "کشش": [
- {ORTH: "کش", LEMMA: "کش", NORM: "کش", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "کشورش": [
- {ORTH: "کشور", LEMMA: "کشور", NORM: "کشور", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "کشورشان": [
- {ORTH: "کشور", LEMMA: "کشور", NORM: "کشور", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "کشورمان": [
- {ORTH: "کشور", LEMMA: "کشور", NORM: "کشور", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "کشورهاست": [
- {ORTH: "کشورها", LEMMA: "کشورها", NORM: "کشورها", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "کلیشههاست": [
- {ORTH: "کلیشهها", LEMMA: "کلیشهها", NORM: "کلیشهها", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "کمبودهاست": [
- {ORTH: "کمبودها", LEMMA: "کمبودها", NORM: "کمبودها", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "کمتره": [
- {ORTH: "کمتر", LEMMA: "کمتر", NORM: "کمتر", TAG: "ADJ"},
- {ORTH: "ه", LEMMA: "ه", NORM: "ه", TAG: "VERB"},
- ],
- "کمکم": [
- {ORTH: "کمک", LEMMA: "کمک", NORM: "کمک", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "کنارش": [
- {ORTH: "کنار", LEMMA: "کنار", NORM: "کنار", TAG: "ADP"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "کودکانشان": [
- {ORTH: "کودکان", LEMMA: "کودکان", NORM: "کودکان", TAG: "NOUN"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "کوچکش": [
- {ORTH: "کوچک", LEMMA: "کوچک", NORM: "کوچک", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "کیست": [
- {ORTH: "کی", LEMMA: "کی", NORM: "کی", TAG: "NOUN"},
- {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"},
- ],
- "کیفش": [
- {ORTH: "کیف", LEMMA: "کیف", NORM: "کیف", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "گذشتهاند": [
- {ORTH: "گذشته", LEMMA: "گذشته", NORM: "گذشته", TAG: "ADJ"},
- {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"},
- ],
- "گرانقدرش": [
- {ORTH: "گرانقدر", LEMMA: "گرانقدر", NORM: "گرانقدر", TAG: "ADJ"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "گرانقدرشان": [
- {ORTH: "گرانقدر", LEMMA: "گرانقدر", NORM: "گرانقدر", TAG: "ADJ"},
- {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"},
- ],
- "گردنتان": [
- {ORTH: "گردن", LEMMA: "گردن", NORM: "گردن", TAG: "NOUN"},
- {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"},
- ],
- "گردنش": [
- {ORTH: "گردن", LEMMA: "گردن", NORM: "گردن", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "گرفتارند": [
- {ORTH: "گرفتار", LEMMA: "گرفتار", NORM: "گرفتار", TAG: "ADJ"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "گرفتنت": [
- {ORTH: "گرفتن", LEMMA: "گرفتن", NORM: "گرفتن", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "گروهند": [
- {ORTH: "گروه", LEMMA: "گروه", NORM: "گروه", TAG: "NOUN"},
- {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"},
- ],
- "گروگانهایش": [
- {ORTH: "گروگانهای", LEMMA: "گروگانهای", NORM: "گروگانهای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "گریمش": [
- {ORTH: "گریم", LEMMA: "گریم", NORM: "گریم", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "گفتارمان": [
- {ORTH: "گفتار", LEMMA: "گفتار", NORM: "گفتار", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "گلهایش": [
- {ORTH: "گلهای", LEMMA: "گلهای", NORM: "گلهای", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "گلویش": [
- {ORTH: "گلوی", LEMMA: "گلوی", NORM: "گلوی", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "گناهت": [
- {ORTH: "گناه", LEMMA: "گناه", NORM: "گناه", TAG: "NOUN"},
- {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"},
- ],
- "گوشش": [
- {ORTH: "گوش", LEMMA: "گوش", NORM: "گوش", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "گوشم": [
- {ORTH: "گوش", LEMMA: "گوش", NORM: "گوش", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "گولش": [
- {ORTH: "گول", LEMMA: "گول", NORM: "گول", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- "یادتان": [
- {ORTH: "یاد", LEMMA: "یاد", NORM: "یاد", TAG: "NOUN"},
- {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"},
- ],
- "یادم": [
- {ORTH: "یاد", LEMMA: "یاد", NORM: "یاد", TAG: "NOUN"},
- {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"},
- ],
- "یادمان": [
- {ORTH: "یاد", LEMMA: "یاد", NORM: "یاد", TAG: "NOUN"},
- {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"},
- ],
- "یارانش": [
- {ORTH: "یاران", LEMMA: "یاران", NORM: "یاران", TAG: "NOUN"},
- {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"},
- ],
- }
-)
-TOKENIZER_EXCEPTIONS = _exc
diff --git a/spacy/lang/fi/__init__.py b/spacy/lang/fi/__init__.py
index 45d2f886f..9233c6547 100644
--- a/spacy/lang/fi/__init__.py
+++ b/spacy/lang/fi/__init__.py
@@ -1,28 +1,15 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ..norm_exceptions import BASE_NORMS
from ...language import Language
-from ...attrs import LANG, NORM
-from ...util import update_exc, add_lookups
class FinnishDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters.update(LEX_ATTRS)
- lex_attr_getters[LANG] = lambda text: "fi"
- lex_attr_getters[NORM] = add_lookups(
- Language.Defaults.lex_attr_getters[NORM], BASE_NORMS
- )
infixes = TOKENIZER_INFIXES
suffixes = TOKENIZER_SUFFIXES
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
+ lex_attr_getters = LEX_ATTRS
stop_words = STOP_WORDS
diff --git a/spacy/lang/fi/examples.py b/spacy/lang/fi/examples.py
index 88be248a6..930fac273 100644
--- a/spacy/lang/fi/examples.py
+++ b/spacy/lang/fi/examples.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
"""
Example sentences to test spaCy and its language models.
>>> from spacy.lang.fi.examples import sentences
diff --git a/spacy/lang/fi/lex_attrs.py b/spacy/lang/fi/lex_attrs.py
index e960b55eb..4d500cead 100644
--- a/spacy/lang/fi/lex_attrs.py
+++ b/spacy/lang/fi/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ...attrs import LIKE_NUM
diff --git a/spacy/lang/fi/punctuation.py b/spacy/lang/fi/punctuation.py
index a85c0b228..6e14dde38 100644
--- a/spacy/lang/fi/punctuation.py
+++ b/spacy/lang/fi/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..char_classes import LIST_ELLIPSES, LIST_ICONS, LIST_HYPHENS
from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
from ..punctuation import TOKENIZER_SUFFIXES
diff --git a/spacy/lang/fi/stop_words.py b/spacy/lang/fi/stop_words.py
index e8e39ec6f..8e8dcfa56 100644
--- a/spacy/lang/fi/stop_words.py
+++ b/spacy/lang/fi/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
# Source https://github.com/stopwords-iso/stopwords-fi/blob/master/stopwords-fi.txt
# Reformatted with some minor corrections
STOP_WORDS = set(
diff --git a/spacy/lang/fi/tokenizer_exceptions.py b/spacy/lang/fi/tokenizer_exceptions.py
index 7cdc7cf11..22d710cb0 100644
--- a/spacy/lang/fi/tokenizer_exceptions.py
+++ b/spacy/lang/fi/tokenizer_exceptions.py
@@ -1,7 +1,6 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import ORTH, LEMMA
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import ORTH
+from ...util import update_exc
_exc = {}
@@ -9,76 +8,76 @@ _exc = {}
# Source https://www.cs.tut.fi/~jkorpela/kielenopas/5.5.html
for exc_data in [
- {ORTH: "aik.", LEMMA: "aikaisempi"},
- {ORTH: "alk.", LEMMA: "alkaen"},
- {ORTH: "alv.", LEMMA: "arvonlisävero"},
- {ORTH: "ark.", LEMMA: "arkisin"},
- {ORTH: "as.", LEMMA: "asunto"},
- {ORTH: "eaa.", LEMMA: "ennen ajanlaskun alkua"},
- {ORTH: "ed.", LEMMA: "edellinen"},
- {ORTH: "esim.", LEMMA: "esimerkki"},
- {ORTH: "huom.", LEMMA: "huomautus"},
- {ORTH: "jne.", LEMMA: "ja niin edelleen"},
- {ORTH: "joht.", LEMMA: "johtaja"},
- {ORTH: "k.", LEMMA: "kuollut"},
- {ORTH: "ks.", LEMMA: "katso"},
- {ORTH: "lk.", LEMMA: "luokka"},
- {ORTH: "lkm.", LEMMA: "lukumäärä"},
- {ORTH: "lyh.", LEMMA: "lyhenne"},
- {ORTH: "läh.", LEMMA: "lähettäjä"},
- {ORTH: "miel.", LEMMA: "mieluummin"},
- {ORTH: "milj.", LEMMA: "miljoona"},
- {ORTH: "Mm.", LEMMA: "muun muassa"},
- {ORTH: "mm.", LEMMA: "muun muassa"},
- {ORTH: "myöh.", LEMMA: "myöhempi"},
- {ORTH: "n.", LEMMA: "noin"},
- {ORTH: "nimim.", LEMMA: "nimimerkki"},
- {ORTH: "n:o", LEMMA: "numero"},
- {ORTH: "N:o", LEMMA: "numero"},
- {ORTH: "nro", LEMMA: "numero"},
- {ORTH: "ns.", LEMMA: "niin sanottu"},
- {ORTH: "nyk.", LEMMA: "nykyinen"},
- {ORTH: "oik.", LEMMA: "oikealla"},
- {ORTH: "os.", LEMMA: "osoite"},
- {ORTH: "p.", LEMMA: "päivä"},
- {ORTH: "par.", LEMMA: "paremmin"},
- {ORTH: "per.", LEMMA: "perustettu"},
- {ORTH: "pj.", LEMMA: "puheenjohtaja"},
- {ORTH: "puh.joht.", LEMMA: "puheenjohtaja"},
- {ORTH: "prof.", LEMMA: "professori"},
- {ORTH: "puh.", LEMMA: "puhelin"},
- {ORTH: "pvm.", LEMMA: "päivämäärä"},
- {ORTH: "rak.", LEMMA: "rakennettu"},
- {ORTH: "ry.", LEMMA: "rekisteröity yhdistys"},
- {ORTH: "s.", LEMMA: "sivu"},
- {ORTH: "siht.", LEMMA: "sihteeri"},
- {ORTH: "synt.", LEMMA: "syntynyt"},
- {ORTH: "t.", LEMMA: "toivoo"},
- {ORTH: "tark.", LEMMA: "tarkastanut"},
- {ORTH: "til.", LEMMA: "tilattu"},
- {ORTH: "tms.", LEMMA: "tai muuta sellaista"},
- {ORTH: "toim.", LEMMA: "toimittanut"},
- {ORTH: "v.", LEMMA: "vuosi"},
- {ORTH: "vas.", LEMMA: "vasen"},
- {ORTH: "vast.", LEMMA: "vastaus"},
- {ORTH: "vrt.", LEMMA: "vertaa"},
- {ORTH: "yht.", LEMMA: "yhteensä"},
- {ORTH: "yl.", LEMMA: "yleinen"},
- {ORTH: "ym.", LEMMA: "ynnä muuta"},
- {ORTH: "yms.", LEMMA: "ynnä muuta sellaista"},
- {ORTH: "yo.", LEMMA: "ylioppilas"},
- {ORTH: "yliopp.", LEMMA: "ylioppilas"},
- {ORTH: "ao.", LEMMA: "asianomainen"},
- {ORTH: "em.", LEMMA: "edellä mainittu"},
- {ORTH: "ko.", LEMMA: "kyseessä oleva"},
- {ORTH: "ml.", LEMMA: "mukaan luettuna"},
- {ORTH: "po.", LEMMA: "puheena oleva"},
- {ORTH: "so.", LEMMA: "se on"},
- {ORTH: "ts.", LEMMA: "toisin sanoen"},
- {ORTH: "vm.", LEMMA: "viimeksi mainittu"},
- {ORTH: "srk.", LEMMA: "seurakunta"},
+ {ORTH: "aik."},
+ {ORTH: "alk."},
+ {ORTH: "alv."},
+ {ORTH: "ark."},
+ {ORTH: "as."},
+ {ORTH: "eaa."},
+ {ORTH: "ed."},
+ {ORTH: "esim."},
+ {ORTH: "huom."},
+ {ORTH: "jne."},
+ {ORTH: "joht."},
+ {ORTH: "k."},
+ {ORTH: "ks."},
+ {ORTH: "lk."},
+ {ORTH: "lkm."},
+ {ORTH: "lyh."},
+ {ORTH: "läh."},
+ {ORTH: "miel."},
+ {ORTH: "milj."},
+ {ORTH: "Mm."},
+ {ORTH: "mm."},
+ {ORTH: "myöh."},
+ {ORTH: "n."},
+ {ORTH: "nimim."},
+ {ORTH: "n:o"},
+ {ORTH: "N:o"},
+ {ORTH: "nro"},
+ {ORTH: "ns."},
+ {ORTH: "nyk."},
+ {ORTH: "oik."},
+ {ORTH: "os."},
+ {ORTH: "p."},
+ {ORTH: "par."},
+ {ORTH: "per."},
+ {ORTH: "pj."},
+ {ORTH: "puh.joht."},
+ {ORTH: "prof."},
+ {ORTH: "puh."},
+ {ORTH: "pvm."},
+ {ORTH: "rak."},
+ {ORTH: "ry."},
+ {ORTH: "s."},
+ {ORTH: "siht."},
+ {ORTH: "synt."},
+ {ORTH: "t."},
+ {ORTH: "tark."},
+ {ORTH: "til."},
+ {ORTH: "tms."},
+ {ORTH: "toim."},
+ {ORTH: "v."},
+ {ORTH: "vas."},
+ {ORTH: "vast."},
+ {ORTH: "vrt."},
+ {ORTH: "yht."},
+ {ORTH: "yl."},
+ {ORTH: "ym."},
+ {ORTH: "yms."},
+ {ORTH: "yo."},
+ {ORTH: "yliopp."},
+ {ORTH: "ao."},
+ {ORTH: "em."},
+ {ORTH: "ko."},
+ {ORTH: "ml."},
+ {ORTH: "po."},
+ {ORTH: "so."},
+ {ORTH: "ts."},
+ {ORTH: "vm."},
+ {ORTH: "srk."},
]:
_exc[exc_data[ORTH]] = [exc_data]
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
diff --git a/spacy/lang/fr/__init__.py b/spacy/lang/fr/__init__.py
index 7727aff0e..42241cd8a 100644
--- a/spacy/lang/fr/__init__.py
+++ b/spacy/lang/fr/__init__.py
@@ -1,44 +1,27 @@
-# coding: utf8
-from __future__ import unicode_literals
+from typing import Optional
+
+from thinc.api import Model
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, TOKEN_MATCH
from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
from .punctuation import TOKENIZER_SUFFIXES
-from .tag_map import TAG_MAP
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
-from .lemmatizer import FrenchLemmatizer
from .syntax_iterators import SYNTAX_ITERATORS
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ..norm_exceptions import BASE_NORMS
-from ...language import Language
+from .lemmatizer import FrenchLemmatizer
from ...lookups import Lookups
-from ...attrs import LANG, NORM
-from ...util import update_exc, add_lookups
+from ...language import Language
class FrenchDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters.update(LEX_ATTRS)
- lex_attr_getters[LANG] = lambda text: "fr"
- lex_attr_getters[NORM] = add_lookups(
- Language.Defaults.lex_attr_getters[NORM], BASE_NORMS
- )
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
- tag_map = TAG_MAP
- stop_words = STOP_WORDS
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
prefixes = TOKENIZER_PREFIXES
infixes = TOKENIZER_INFIXES
suffixes = TOKENIZER_SUFFIXES
token_match = TOKEN_MATCH
+ lex_attr_getters = LEX_ATTRS
syntax_iterators = SYNTAX_ITERATORS
-
- @classmethod
- def create_lemmatizer(cls, nlp=None, lookups=None):
- if lookups is None:
- lookups = Lookups()
- return FrenchLemmatizer(lookups)
+ stop_words = STOP_WORDS
class French(Language):
@@ -46,4 +29,22 @@ class French(Language):
Defaults = FrenchDefaults
+@French.factory(
+ "lemmatizer",
+ assigns=["token.lemma"],
+ default_config={"model": None, "mode": "rule", "lookups": None},
+ scores=["lemma_acc"],
+ default_score_weights={"lemma_acc": 1.0},
+)
+def make_lemmatizer(
+ nlp: Language,
+ model: Optional[Model],
+ name: str,
+ mode: str,
+ lookups: Optional[Lookups],
+):
+ lookups = FrenchLemmatizer.load_lookups(nlp.lang, mode, lookups)
+ return FrenchLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
+
+
__all__ = ["French"]
diff --git a/spacy/lang/fr/_tokenizer_exceptions_list.py b/spacy/lang/fr/_tokenizer_exceptions_list.py
index 0fcf02351..50f439501 100644
--- a/spacy/lang/fr/_tokenizer_exceptions_list.py
+++ b/spacy/lang/fr/_tokenizer_exceptions_list.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
FR_BASE_EXCEPTIONS = [
"(+)-amphétamine",
"(5R,6S)-7,8-didehydro-4,5-époxy-3-méthoxy-N-méthylmorphinan-6-ol",
diff --git a/spacy/lang/fr/examples.py b/spacy/lang/fr/examples.py
index a874c22fc..a74a62204 100644
--- a/spacy/lang/fr/examples.py
+++ b/spacy/lang/fr/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/fr/lemmatizer.py b/spacy/lang/fr/lemmatizer.py
index af8345e1b..0dd782cc4 100644
--- a/spacy/lang/fr/lemmatizer.py
+++ b/spacy/lang/fr/lemmatizer.py
@@ -1,10 +1,7 @@
-# coding: utf8
-from __future__ import unicode_literals
+from typing import List, Dict
-from ...lemmatizer import Lemmatizer
-from ...symbols import POS, NOUN, VERB, ADJ, ADV, PRON, DET, AUX, PUNCT, ADP
-from ...symbols import SCONJ, CCONJ
-from ...symbols import VerbForm_inf, VerbForm_none, Number_sing, Degree_pos
+from ...pipeline import Lemmatizer
+from ...tokens import Token
class FrenchLemmatizer(Lemmatizer):
@@ -17,69 +14,55 @@ class FrenchLemmatizer(Lemmatizer):
the lookup table.
"""
- def __call__(self, string, univ_pos, morphology=None):
- lookup_table = self.lookups.get_table("lemma_lookup", {})
- if "lemma_rules" not in self.lookups:
- return [lookup_table.get(string, string)]
- if univ_pos in (NOUN, "NOUN", "noun"):
- univ_pos = "noun"
- elif univ_pos in (VERB, "VERB", "verb"):
- univ_pos = "verb"
- elif univ_pos in (ADJ, "ADJ", "adj"):
- univ_pos = "adj"
- elif univ_pos in (ADP, "ADP", "adp"):
- univ_pos = "adp"
- elif univ_pos in (ADV, "ADV", "adv"):
- univ_pos = "adv"
- elif univ_pos in (AUX, "AUX", "aux"):
- univ_pos = "aux"
- elif univ_pos in (CCONJ, "CCONJ", "cconj"):
- univ_pos = "cconj"
- elif univ_pos in (DET, "DET", "det"):
- univ_pos = "det"
- elif univ_pos in (PRON, "PRON", "pron"):
- univ_pos = "pron"
- elif univ_pos in (PUNCT, "PUNCT", "punct"):
- univ_pos = "punct"
- elif univ_pos in (SCONJ, "SCONJ", "sconj"):
- univ_pos = "sconj"
+ @classmethod
+ def get_lookups_config(cls, mode: str) -> Dict:
+ if mode == "rule":
+ return {
+ "required_tables": [
+ "lemma_lookup",
+ "lemma_rules",
+ "lemma_exc",
+ "lemma_index",
+ ],
+ "optional_tables": [],
+ }
else:
- return [self.lookup(string)]
+ return super().get_lookups_config(mode)
+
+ def rule_lemmatize(self, token: Token) -> List[str]:
+ cache_key = (token.orth, token.pos)
+ if cache_key in self.cache:
+ return self.cache[cache_key]
+ string = token.text
+ univ_pos = token.pos_.lower()
+ if univ_pos in ("", "eol", "space"):
+ return [string.lower()]
+ elif "lemma_rules" not in self.lookups or univ_pos not in (
+ "noun",
+ "verb",
+ "adj",
+ "adp",
+ "adv",
+ "aux",
+ "cconj",
+ "det",
+ "pron",
+ "punct",
+ "sconj",
+ ):
+ return self.lookup_lemmatize(token)
index_table = self.lookups.get_table("lemma_index", {})
exc_table = self.lookups.get_table("lemma_exc", {})
rules_table = self.lookups.get_table("lemma_rules", {})
- lemmas = self.lemmatize(
- string,
- index_table.get(univ_pos, {}),
- exc_table.get(univ_pos, {}),
- rules_table.get(univ_pos, []),
- )
- return lemmas
-
- def noun(self, string, morphology=None):
- return self(string, "noun", morphology)
-
- def verb(self, string, morphology=None):
- return self(string, "verb", morphology)
-
- def adj(self, string, morphology=None):
- return self(string, "adj", morphology)
-
- def punct(self, string, morphology=None):
- return self(string, "punct", morphology)
-
- def lookup(self, string, orth=None):
- lookup_table = self.lookups.get_table("lemma_lookup", {})
- if orth is not None and orth in lookup_table:
- return lookup_table[orth][0]
- return string
-
- def lemmatize(self, string, index, exceptions, rules):
lookup_table = self.lookups.get_table("lemma_lookup", {})
+ index = index_table.get(univ_pos, {})
+ exceptions = exc_table.get(univ_pos, {})
+ rules = rules_table.get(univ_pos, [])
string = string.lower()
forms = []
if string in index:
forms.append(string)
+ self.cache[cache_key] = forms
return forms
forms.extend(exceptions.get(string, []))
oov_forms = []
@@ -96,7 +79,9 @@ class FrenchLemmatizer(Lemmatizer):
if not forms:
forms.extend(oov_forms)
if not forms and string in lookup_table.keys():
- forms.append(lookup_table[string][0])
+ forms.append(self.lookup_lemmatize(token)[0])
if not forms:
forms.append(string)
- return list(set(forms))
+ forms = list(set(forms))
+ self.cache[cache_key] = forms
+ return forms
diff --git a/spacy/lang/fr/lex_attrs.py b/spacy/lang/fr/lex_attrs.py
index e3ccd9fdd..da98c6e37 100644
--- a/spacy/lang/fr/lex_attrs.py
+++ b/spacy/lang/fr/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ...attrs import LIKE_NUM
diff --git a/spacy/lang/fr/punctuation.py b/spacy/lang/fr/punctuation.py
index 7d50c4a9e..873d01d87 100644
--- a/spacy/lang/fr/punctuation.py
+++ b/spacy/lang/fr/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, CURRENCY
from ..char_classes import CONCAT_QUOTES, UNITS, ALPHA, ALPHA_LOWER, ALPHA_UPPER
diff --git a/spacy/lang/fr/stop_words.py b/spacy/lang/fr/stop_words.py
index ae8432043..a331f3c0f 100644
--- a/spacy/lang/fr/stop_words.py
+++ b/spacy/lang/fr/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
STOP_WORDS = set(
"""
a à â abord absolument afin ah ai aie ailleurs ainsi ait allaient allo allons
diff --git a/spacy/lang/fr/syntax_iterators.py b/spacy/lang/fr/syntax_iterators.py
index d6c12e69f..d297203e3 100644
--- a/spacy/lang/fr/syntax_iterators.py
+++ b/spacy/lang/fr/syntax_iterators.py
@@ -1,29 +1,18 @@
-# coding: utf8
-from __future__ import unicode_literals
+from typing import Union, Iterator
from ...symbols import NOUN, PROPN, PRON
from ...errors import Errors
+from ...tokens import Doc, Span
-def noun_chunks(doclike):
- """
- Detect base noun phrases from a dependency parse. Works on both Doc and Span.
- """
- labels = [
- "nsubj",
- "nsubj:pass",
- "obj",
- "iobj",
- "ROOT",
- "appos",
- "nmod",
- "nmod:poss",
- ]
+def noun_chunks(doclike: Union[Doc, Span]) -> Iterator[Span]:
+ """Detect base noun phrases from a dependency parse. Works on Doc and Span."""
+ # fmt: off
+ labels = ["nsubj", "nsubj:pass", "obj", "iobj", "ROOT", "appos", "nmod", "nmod:poss"]
+ # fmt: on
doc = doclike.doc # Ensure works on both Doc and Span.
-
if not doc.is_parsed:
raise ValueError(Errors.E029)
-
np_deps = [doc.vocab.strings[label] for label in labels]
conj = doc.vocab.strings.add("conj")
np_label = doc.vocab.strings.add("NP")
diff --git a/spacy/lang/fr/tag_map.py b/spacy/lang/fr/tag_map.py
deleted file mode 100644
index 93b43c2ec..000000000
--- a/spacy/lang/fr/tag_map.py
+++ /dev/null
@@ -1,219 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, PUNCT, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB
-from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON, AUX, SCONJ
-
-
-TAG_MAP = {
- "ADJ__Gender=Fem|Number=Plur": {POS: ADJ},
- "ADJ__Gender=Fem|Number=Plur|NumType=Ord": {POS: ADJ},
- "ADJ__Gender=Fem|Number=Sing": {POS: ADJ},
- "ADJ__Gender=Fem|Number=Sing|NumType=Ord": {POS: ADJ},
- "ADJ__Gender=Masc": {POS: ADJ},
- "ADJ__Gender=Masc|Number=Plur": {POS: ADJ},
- "ADJ__Gender=Masc|Number=Plur|NumType=Ord": {POS: ADJ},
- "ADJ__Gender=Masc|Number=Sing": {POS: ADJ},
- "ADJ__Gender=Masc|Number=Sing|NumType=Card": {POS: ADJ},
- "ADJ__Gender=Masc|Number=Sing|NumType=Ord": {POS: ADJ},
- "ADJ__NumType=Card": {POS: ADJ},
- "ADJ__NumType=Ord": {POS: ADJ},
- "ADJ__Number=Plur": {POS: ADJ},
- "ADJ__Number=Sing": {POS: ADJ},
- "ADJ__Number=Sing|NumType=Ord": {POS: ADJ},
- "ADJ___": {POS: ADJ},
- "ADP__Gender=Fem|Number=Plur|Person=3": {POS: ADP},
- "ADP__Gender=Masc|Number=Plur|Person=3": {POS: ADP},
- "ADP__Gender=Masc|Number=Sing|Person=3": {POS: ADP},
- "ADP___": {POS: ADP},
- "ADV__Polarity=Neg": {POS: ADV},
- "ADV__PronType=Int": {POS: ADV},
- "ADV___": {POS: ADV},
- "AUX__Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part": {POS: AUX},
- "AUX__Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass": {POS: AUX},
- "AUX__Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part": {POS: AUX},
- "AUX__Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: AUX},
- "AUX__Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part": {POS: AUX},
- "AUX__Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass": {POS: AUX},
- "AUX__Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part": {POS: AUX},
- "AUX__Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: AUX},
- "AUX__Mood=Cnd|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Cnd|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Cnd|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Cnd|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Cnd|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Imp|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Ind|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Ind|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Ind|Number=Sing|Person=2|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Sub|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Sub|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Sub|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "AUX__Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "AUX__Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: AUX},
- "AUX__Tense=Past|VerbForm=Part": {POS: AUX},
- "AUX__Tense=Pres|VerbForm=Part": {POS: AUX},
- "AUX__VerbForm=Inf": {POS: AUX},
- "CCONJ___": {POS: CCONJ},
- "DET__Definite=Def|Gender=Fem|Number=Sing|PronType=Art": {POS: DET},
- "DET__Definite=Def|Gender=Masc|Number=Sing|PronType=Art": {POS: DET},
- "DET__Definite=Def|Number=Plur|PronType=Art": {POS: DET},
- "DET__Definite=Def|Number=Sing|PronType=Art": {POS: DET},
- "DET__Definite=Ind|Gender=Fem|Number=Plur|PronType=Art": {POS: DET},
- "DET__Definite=Ind|Gender=Fem|Number=Sing|PronType=Art": {POS: DET},
- "DET__Definite=Ind|Gender=Masc|Number=Plur|PronType=Art": {POS: DET},
- "DET__Definite=Ind|Gender=Masc|Number=Sing|PronType=Art": {POS: DET},
- "DET__Definite=Ind|Number=Plur|PronType=Art": {POS: DET},
- "DET__Definite=Ind|Number=Sing|PronType=Art": {POS: DET},
- "DET__Gender=Fem|Number=Plur": {POS: DET},
- "DET__Gender=Fem|Number=Plur|PronType=Int": {POS: DET},
- "DET__Gender=Fem|Number=Sing": {POS: DET},
- "DET__Gender=Fem|Number=Sing|Poss=Yes": {POS: DET},
- "DET__Gender=Fem|Number=Sing|PronType=Dem": {POS: DET},
- "DET__Gender=Fem|Number=Sing|PronType=Int": {POS: DET},
- "DET__Gender=Masc|Number=Plur": {POS: DET},
- "DET__Gender=Masc|Number=Sing": {POS: DET},
- "DET__Gender=Masc|Number=Sing|PronType=Dem": {POS: DET},
- "DET__Gender=Masc|Number=Sing|PronType=Int": {POS: DET},
- "DET__Number=Plur": {POS: DET},
- "DET__Number=Plur|Poss=Yes": {POS: DET},
- "DET__Number=Plur|PronType=Dem": {POS: DET},
- "DET__Number=Sing": {POS: DET},
- "DET__Number=Sing|Poss=Yes": {POS: DET},
- "DET___": {POS: DET},
- "INTJ___": {POS: INTJ},
- "NOUN__Gender=Fem": {POS: NOUN},
- "NOUN__Gender=Fem|Number=Plur": {POS: NOUN},
- "NOUN__Gender=Fem|Number=Sing": {POS: NOUN},
- "NOUN__Gender=Masc": {POS: NOUN},
- "NOUN__Gender=Masc|Number=Plur": {POS: NOUN},
- "NOUN__Gender=Masc|Number=Plur|NumType=Card": {POS: NOUN},
- "NOUN__Gender=Masc|Number=Sing": {POS: NOUN},
- "NOUN__Gender=Masc|Number=Sing|NumType=Card": {POS: NOUN},
- "NOUN__NumType=Card": {POS: NOUN},
- "NOUN__Number=Plur": {POS: NOUN},
- "NOUN__Number=Sing": {POS: NOUN},
- "NOUN___": {POS: NOUN},
- "NUM__Gender=Masc|Number=Plur|NumType=Card": {POS: NUM},
- "NUM__NumType=Card": {POS: NUM},
- "PART___": {POS: PART},
- "PRON__Gender=Fem|Number=Plur": {POS: PRON},
- "PRON__Gender=Fem|Number=Plur|Person=3": {POS: PRON},
- "PRON__Gender=Fem|Number=Plur|Person=3|PronType=Prs": {POS: PRON},
- "PRON__Gender=Fem|Number=Plur|Person=3|PronType=Rel": {POS: PRON},
- "PRON__Gender=Fem|Number=Plur|PronType=Dem": {POS: PRON},
- "PRON__Gender=Fem|Number=Plur|PronType=Rel": {POS: PRON},
- "PRON__Gender=Fem|Number=Sing|Person=3": {POS: PRON},
- "PRON__Gender=Fem|Number=Sing|Person=3|PronType=Prs": {POS: PRON},
- "PRON__Gender=Fem|Number=Sing|PronType=Dem": {POS: PRON},
- "PRON__Gender=Fem|Number=Sing|PronType=Rel": {POS: PRON},
- "PRON__Gender=Fem|PronType=Rel": {POS: PRON},
- "PRON__Gender=Masc|Number=Plur": {POS: PRON},
- "PRON__Gender=Masc|Number=Plur|Person=3": {POS: PRON},
- "PRON__Gender=Masc|Number=Plur|Person=3|PronType=Prs": {POS: PRON},
- "PRON__Gender=Masc|Number=Plur|Person=3|PronType=Rel": {POS: PRON},
- "PRON__Gender=Masc|Number=Plur|PronType=Dem": {POS: PRON},
- "PRON__Gender=Masc|Number=Plur|PronType=Rel": {POS: PRON},
- "PRON__Gender=Masc|Number=Sing": {POS: PRON},
- "PRON__Gender=Masc|Number=Sing|Person=3": {POS: PRON},
- "PRON__Gender=Masc|Number=Sing|Person=3|PronType=Dem": {POS: PRON},
- "PRON__Gender=Masc|Number=Sing|Person=3|PronType=Prs": {POS: PRON},
- "PRON__Gender=Masc|Number=Sing|PronType=Dem": {POS: PRON},
- "PRON__Gender=Masc|Number=Sing|PronType=Rel": {POS: PRON},
- "PRON__Gender=Masc|PronType=Rel": {POS: PRON},
- "PRON__NumType=Card|PronType=Rel": {POS: PRON},
- "PRON__Number=Plur|Person=1": {POS: PRON},
- "PRON__Number=Plur|Person=1|PronType=Prs": {POS: PRON},
- "PRON__Number=Plur|Person=1|Reflex=Yes": {POS: PRON},
- "PRON__Number=Plur|Person=2": {POS: PRON},
- "PRON__Number=Plur|Person=2|PronType=Prs": {POS: PRON},
- "PRON__Number=Plur|Person=2|Reflex=Yes": {POS: PRON},
- "PRON__Number=Plur|Person=3": {POS: PRON},
- "PRON__Number=Plur|PronType=Rel": {POS: PRON},
- "PRON__Number=Sing|Person=1": {POS: PRON},
- "PRON__Number=Sing|Person=1|PronType=Prs": {POS: PRON},
- "PRON__Number=Sing|Person=1|Reflex=Yes": {POS: PRON},
- "PRON__Number=Sing|Person=2|PronType=Prs": {POS: PRON},
- "PRON__Number=Sing|Person=3": {POS: PRON},
- "PRON__Number=Sing|PronType=Dem": {POS: PRON},
- "PRON__Number=Sing|PronType=Rel": {POS: PRON},
- "PRON__Person=3": {POS: PRON},
- "PRON__Person=3|Reflex=Yes": {POS: PRON},
- "PRON__PronType=Int": {POS: PRON},
- "PRON__PronType=Rel": {POS: PRON},
- "PRON___": {POS: PRON},
- "PROPN__Gender=Fem|Number=Plur": {POS: PROPN},
- "PROPN__Gender=Fem|Number=Sing": {POS: PROPN},
- "PROPN__Gender=Masc": {POS: PROPN},
- "PROPN__Gender=Masc|Number=Plur": {POS: PROPN},
- "PROPN__Gender=Masc|Number=Sing": {POS: PROPN},
- "PROPN__Number=Plur": {POS: PROPN},
- "PROPN__Number=Sing": {POS: PROPN},
- "PROPN___": {POS: PROPN},
- "PUNCT___": {POS: PUNCT},
- "SCONJ___": {POS: SCONJ},
- "VERB__Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part": {POS: VERB},
- "VERB__Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB},
- "VERB__Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part": {POS: VERB},
- "VERB__Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB},
- "VERB__Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part": {POS: VERB},
- "VERB__Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB},
- "VERB__Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part": {POS: VERB},
- "VERB__Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB},
- "VERB__Gender=Masc|Tense=Past|VerbForm=Part": {POS: VERB},
- "VERB__Gender=Masc|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB},
- "VERB__Mood=Cnd|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Cnd|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Cnd|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Imp|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Imp|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Imp|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=2|Tense=Fut|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=2|Tense=Imp|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|Person=3|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Ind|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Sub|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Sub|Number=Sing|Person=3|Tense=Past|VerbForm=Fin": {POS: VERB},
- "VERB__Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "VERB__Number=Plur|Tense=Past|VerbForm=Part": {POS: VERB},
- "VERB__Number=Plur|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB},
- "VERB__Number=Sing|Tense=Past|VerbForm=Part": {POS: VERB},
- "VERB__Number=Sing|Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB},
- "VERB__Tense=Past|VerbForm=Part": {POS: VERB},
- "VERB__Tense=Past|VerbForm=Part|Voice=Pass": {POS: VERB},
- "VERB__Tense=Pres|VerbForm=Part": {POS: VERB},
- "VERB__VerbForm=Inf": {POS: VERB},
- "VERB__VerbForm=Part": {POS: VERB},
- "X___": {POS: X},
- "_SP": {POS: SPACE},
-}
diff --git a/spacy/lang/fr/tokenizer_exceptions.py b/spacy/lang/fr/tokenizer_exceptions.py
index 933607bdf..6f429eecc 100644
--- a/spacy/lang/fr/tokenizer_exceptions.py
+++ b/spacy/lang/fr/tokenizer_exceptions.py
@@ -1,11 +1,11 @@
-# coding: utf8
-from __future__ import unicode_literals
-
import re
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
from .punctuation import ELISION, HYPHENS
from ..char_classes import ALPHA_LOWER, ALPHA
-from ...symbols import ORTH, LEMMA
+from ...symbols import ORTH
+from ...util import update_exc
+
# not using the large _tokenizer_exceptions_list by default as it slows down the tokenizer
# from ._tokenizer_exceptions_list import FR_BASE_EXCEPTIONS
@@ -28,29 +28,29 @@ def lower_first_letter(text):
return text[0].lower() + text[1:]
-_exc = {"J.-C.": [{LEMMA: "Jésus", ORTH: "J."}, {LEMMA: "Christ", ORTH: "-C."}]}
+_exc = {"J.-C.": [{ORTH: "J."}, {ORTH: "-C."}]}
for exc_data in [
- {LEMMA: "avant", ORTH: "av."},
- {LEMMA: "janvier", ORTH: "janv."},
- {LEMMA: "février", ORTH: "févr."},
- {LEMMA: "avril", ORTH: "avr."},
- {LEMMA: "juillet", ORTH: "juill."},
- {LEMMA: "septembre", ORTH: "sept."},
- {LEMMA: "octobre", ORTH: "oct."},
- {LEMMA: "novembre", ORTH: "nov."},
- {LEMMA: "décembre", ORTH: "déc."},
- {LEMMA: "après", ORTH: "apr."},
- {LEMMA: "docteur", ORTH: "Dr."},
- {LEMMA: "monsieur", ORTH: "M."},
- {LEMMA: "monsieur", ORTH: "Mr."},
- {LEMMA: "madame", ORTH: "Mme."},
- {LEMMA: "mademoiselle", ORTH: "Mlle."},
- {LEMMA: "numéro", ORTH: "n°"},
- {LEMMA: "degrés", ORTH: "d°"},
- {LEMMA: "saint", ORTH: "St."},
- {LEMMA: "sainte", ORTH: "Ste."},
+ {ORTH: "av."},
+ {ORTH: "janv."},
+ {ORTH: "févr."},
+ {ORTH: "avr."},
+ {ORTH: "juill."},
+ {ORTH: "sept."},
+ {ORTH: "oct."},
+ {ORTH: "nov."},
+ {ORTH: "déc."},
+ {ORTH: "apr."},
+ {ORTH: "Dr."},
+ {ORTH: "M."},
+ {ORTH: "Mr."},
+ {ORTH: "Mme."},
+ {ORTH: "Mlle."},
+ {ORTH: "n°"},
+ {ORTH: "d°"},
+ {ORTH: "St."},
+ {ORTH: "Ste."},
]:
_exc[exc_data[ORTH]] = [exc_data]
@@ -80,55 +80,37 @@ for orth in [
_exc[orth] = [{ORTH: orth}]
-for verb, verb_lemma in [
- ("a", "avoir"),
- ("est", "être"),
- ("semble", "sembler"),
- ("indique", "indiquer"),
- ("moque", "moquer"),
- ("passe", "passer"),
+for verb in [
+ "a",
+ "est" "semble",
+ "indique",
+ "moque",
+ "passe",
]:
for orth in [verb, verb.title()]:
for pronoun in ["elle", "il", "on"]:
- token = "{}-t-{}".format(orth, pronoun)
- _exc[token] = [
- {LEMMA: verb_lemma, ORTH: orth}, # , TAG: "VERB"},
- {LEMMA: "t", ORTH: "-t"},
- {LEMMA: pronoun, ORTH: "-" + pronoun},
- ]
+ token = f"{orth}-t-{pronoun}"
+ _exc[token] = [{ORTH: orth}, {ORTH: "-t"}, {ORTH: "-" + pronoun}]
-for verb, verb_lemma in [("est", "être")]:
+for verb in ["est"]:
for orth in [verb, verb.title()]:
- token = "{}-ce".format(orth)
- _exc[token] = [
- {LEMMA: verb_lemma, ORTH: orth}, # , TAG: "VERB"},
- {LEMMA: "ce", ORTH: "-ce"},
- ]
+ _exc[f"{orth}-ce"] = [{ORTH: orth}, {ORTH: "-ce"}]
-for pre, pre_lemma in [("qu'", "que"), ("n'", "ne")]:
+for pre in ["qu'", "n'"]:
for orth in [pre, pre.title()]:
- _exc["%sest-ce" % orth] = [
- {LEMMA: pre_lemma, ORTH: orth},
- {LEMMA: "être", ORTH: "est"},
- {LEMMA: "ce", ORTH: "-ce"},
- ]
+ _exc[f"{orth}est-ce"] = [{ORTH: orth}, {ORTH: "est"}, {ORTH: "-ce"}]
for verb, pronoun in [("est", "il"), ("EST", "IL")]:
- token = "{}-{}".format(verb, pronoun)
- _exc[token] = [
- {LEMMA: "être", ORTH: verb},
- {LEMMA: pronoun, ORTH: "-" + pronoun},
- ]
+ _exc[f"{verb}-{pronoun}"] = [{ORTH: verb}, {ORTH: "-" + pronoun}]
for s, verb, pronoun in [("s", "est", "il"), ("S", "EST", "IL")]:
- token = "{}'{}-{}".format(s, verb, pronoun)
- _exc[token] = [
- {LEMMA: "se", ORTH: s + "'"},
- {LEMMA: "être", ORTH: verb},
- {LEMMA: pronoun, ORTH: "-" + pronoun},
+ _exc[f"{s}'{verb}-{pronoun}"] = [
+ {ORTH: s + "'"},
+ {ORTH: verb},
+ {ORTH: "-" + pronoun},
]
@@ -455,7 +437,7 @@ _regular_exp += [
]
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
TOKEN_MATCH = re.compile(
"(?iu)" + "|".join("(?:{})".format(m) for m in _regular_exp)
).match
diff --git a/spacy/lang/ga/__init__.py b/spacy/lang/ga/__init__.py
index 42b4d0d18..80131368b 100644
--- a/spacy/lang/ga/__init__.py
+++ b/spacy/lang/ga/__init__.py
@@ -1,21 +1,11 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .stop_words import STOP_WORDS
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...language import Language
-from ...attrs import LANG
-from ...util import update_exc
class IrishDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "ga"
-
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
- stop_words = set(STOP_WORDS)
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
+ stop_words = STOP_WORDS
class Irish(Language):
diff --git a/spacy/lang/ga/irish_morphology_helpers.py b/spacy/lang/ga/irish_morphology_helpers.py
index 2133f0d22..d606da975 100644
--- a/spacy/lang/ga/irish_morphology_helpers.py
+++ b/spacy/lang/ga/irish_morphology_helpers.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
# fmt: off
consonants = ["b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "z"]
broad_vowels = ["a", "á", "o", "ó", "u", "ú"]
diff --git a/spacy/lang/ga/stop_words.py b/spacy/lang/ga/stop_words.py
index d8f705b59..4ef052ca5 100644
--- a/spacy/lang/ga/stop_words.py
+++ b/spacy/lang/ga/stop_words.py
@@ -1,7 +1,3 @@
-# encoding: utf8
-from __future__ import unicode_literals
-
-
STOP_WORDS = set(
"""
a ach ag agus an aon ar arna as
diff --git a/spacy/lang/ga/tag_map.py b/spacy/lang/ga/tag_map.py
deleted file mode 100644
index 1d8284014..000000000
--- a/spacy/lang/ga/tag_map.py
+++ /dev/null
@@ -1,369 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-# fmt: off
-TAG_MAP = {
- "ADJ__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
- "ADJ__Case=Gen|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "fem", "Number": "sing"},
- "ADJ__Case=Gen|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "gen", "Gender": "masc", "Number": "sing"},
- "ADJ__Case=Gen|NounType=Strong|Number=Plur": {"pos": "ADJ", "Case": "gen", "Number": "plur", "Other": {"NounType": "strong"}},
- "ADJ__Case=Gen|NounType=Weak|Number=Plur": {"pos": "ADJ", "Case": "gen", "Number": "plur", "Other": {"NounType": "weak"}},
- "ADJ__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
- "ADJ__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
- "ADJ__Case=NomAcc|Gender=Fem|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "plur"},
- "ADJ__Case=NomAcc|Gender=Fem|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "fem", "Number": "sing"},
- "ADJ__Case=NomAcc|Gender=Masc|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "plur"},
- "ADJ__Case=NomAcc|Gender=Masc|Number=Sing": {"pos": "ADJ", "Case": "nom|acc", "Gender": "masc", "Number": "sing"},
- "ADJ__Case=NomAcc|NounType=NotSlender|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Number": "plur", "Other": {"NounType": "notslender"}},
- "ADJ__Case=NomAcc|NounType=Slender|Number=Plur": {"pos": "ADJ", "Case": "nom|acc", "Number": "plur", "Other": {"NounType": "slender"}},
- "ADJ__Degree=Cmp,Sup|Form=Len": {"pos": "ADJ", "Degree": "cmp|sup", "Other": {"Form": "len"}},
- "ADJ__Degree=Cmp,Sup": {"pos": "ADJ", "Degree": "cmp|sup"},
- "ADJ__Degree=Pos|Form=Ecl": {"pos": "ADJ", "Degree": "pos", "Other": {"Form": "ecl"}},
- "ADJ__Degree=Pos|Form=HPref": {"pos": "ADJ", "Degree": "pos", "Other": {"Form": "hpref"}},
- "ADJ__Degree=Pos|Form=Len": {"pos": "ADJ", "Degree": "pos", "Other": {"Form": "len"}},
- "ADJ__Degree=Pos": {"pos": "ADJ", "Degree": "pos"},
- "ADJ__Foreign=Yes": {"pos": "ADJ", "Foreign": "yes"},
- "ADJ__Form=Len|VerbForm=Part": {"pos": "ADJ", "VerbForm": "part", "Other": {"Form": "len"}},
- "ADJ__Gender=Masc|Number=Sing|PartType=Voc": {"pos": "ADJ", "Gender": "masc", "Number": "sing", "Case": "voc"},
- "ADJ__Gender=Masc|Number=Sing|Case=Voc": {"pos": "ADJ", "Gender": "masc", "Number": "sing", "Case": "voc"},
- "ADJ__Number=Plur|PartType=Voc": {"pos": "ADJ", "Number": "plur", "Case": "voc"},
- "ADJ__Number=Plur|Case=Voc": {"pos": "ADJ", "Number": "plur", "Case": "voc"},
- "ADJ__Number=Plur": {"pos": "ADJ", "Number": "plur"},
- "ADJ___": {"pos": "ADJ"},
- "ADJ__VerbForm=Part": {"pos": "ADJ", "VerbForm": "part"},
- "ADP__Foreign=Yes": {"pos": "ADP", "Foreign": "yes"},
- "ADP__Form=Len|Number=Plur|Person=1": {"pos": "ADP", "Number": "plur", "Person": 1, "Other": {"Form": "len"}},
- "ADP__Form=Len|Number=Plur|Person=3": {"pos": "ADP", "Number": "plur", "Person": 3, "Other": {"Form": "len"}},
- "ADP__Form=Len|Number=Sing|Person=1": {"pos": "ADP", "Number": "sing", "Person": 1, "Other": {"Form": "len"}},
- "ADP__Gender=Fem|Number=Sing|Person=3": {"pos": "ADP", "Gender": "fem", "Number": "sing", "Person": 3},
- "ADP__Gender=Fem|Number=Sing|Person=3|Poss=Yes": {"pos": "ADP", "Gender": "fem", "Number": "sing", "Person": 3, "Poss": "yes"},
- "ADP__Gender=Fem|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"pos": "ADP", "Gender": "fem", "Number": "sing", "Person": 3, "Poss": "yes", "PronType": "prs"},
- "ADP__Gender=Masc|Number=Sing|Person=3": {"pos": "ADP", "Gender": "masc", "Number": "sing", "Person": 3},
- "ADP__Gender=Masc|Number=Sing|Person=3|Poss=Yes": {"pos": "ADP", "Gender": "masc", "Number": "sing", "Person": 3, "Poss": "yes"},
- "ADP__Gender=Masc|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {"pos": "ADP", "Gender": "masc", "Number": "sing", "Person": 3, "Poss": "yes", "PronType": "prs"},
- "ADP__Gender=Masc|Number=Sing|Person=3|PronType=Emp": {"pos": "ADP", "Gender": "masc", "Number": "sing", "Person": 3, "PronType": "emp"},
- "ADP__Number=Plur|Person=1": {"pos": "ADP", "Number": "plur", "Person": 1},
- "ADP__Number=Plur|Person=1|Poss=Yes": {"pos": "ADP", "Number": "plur", "Person": 1, "Poss": "yes"},
- "ADP__Number=Plur|Person=1|PronType=Emp": {"pos": "ADP", "Number": "plur", "Person": 1, "PronType": "emp"},
- "ADP__Number=Plur|Person=2": {"pos": "ADP", "Number": "plur", "Person": 2},
- "ADP__Number=Plur|Person=3": {"pos": "ADP", "Number": "plur", "Person": 3},
- "ADP__Number=Plur|Person=3|Poss=Yes": {"pos": "ADP", "Number": "plur", "Person": 3, "Poss": "yes"},
- "ADP__Number=Plur|Person=3|Poss=Yes|PronType=Prs": {"pos": "ADP", "Number": "plur", "Person": 3, "Poss": "yes", "PronType": "prs"},
- "ADP__Number=Plur|Person=3|PronType=Emp": {"pos": "ADP", "Number": "plur", "Person": 3, "PronType": "emp"},
- "ADP__Number=Plur|PronType=Art": {"pos": "ADP", "Number": "plur", "PronType": "art"},
- "ADP__Number=Sing|Person=1": {"pos": "ADP", "Number": "sing", "Person": 1},
- "ADP__Number=Sing|Person=1|Poss=Yes": {"pos": "ADP", "Number": "sing", "Person": 1, "Poss": "yes"},
- "ADP__Number=Sing|Person=1|PronType=Emp": {"pos": "ADP", "Number": "sing", "Person": 1, "PronType": "emp"},
- "ADP__Number=Sing|Person=2": {"pos": "ADP", "Number": "sing", "Person": 2},
- "ADP__Number=Sing|Person=3": {"pos": "ADP", "Number": "sing", "Person": 3},
- "ADP__Number=Sing|PronType=Art": {"pos": "ADP", "Number": "sing", "PronType": "art"},
- "ADP__Person=3|Poss=Yes": {"pos": "ADP", "Person": 3, "Poss": "yes"},
- "ADP___": {"pos": "ADP"},
- "ADP__Poss=Yes": {"pos": "ADP", "Poss": "yes"},
- "ADP__PrepForm=Cmpd": {"pos": "ADP", "Other": {"PrepForm": "cmpd"}},
- "ADP__PronType=Art": {"pos": "ADP", "PronType": "art"},
- "ADV__Form=Len": {"pos": "ADV", "Other": {"Form": "len"}},
- "ADV___": {"pos": "ADV"},
- "ADV__PronType=Int": {"pos": "ADV", "PronType": "int"},
- "AUX__Form=VF|Polarity=Neg|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}},
- "AUX__Form=VF|Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}},
- "AUX__Form=VF|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}},
- "AUX__Form=VF|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "Other": {"Form": "vf", "VerbForm": "cop"}},
- "AUX__Form=VF|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Other": {"Form": "vf", "VerbForm": "cop"}},
- "AUX__Gender=Masc|Number=Sing|Person=3|VerbForm=Cop": {"pos": "AUX", "Gender": "masc", "Number": "sing", "Person": 3, "Other": {"VerbForm": "cop"}},
- "AUX__Mood=Int|Number=Sing|PronType=Art|VerbForm=Cop": {"pos": "AUX", "Number": "sing", "PronType": "art", "Other": {"Mood": "int", "VerbForm": "cop"}},
- "AUX__Mood=Int|Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Other": {"Mood": "int", "VerbForm": "cop"}},
- "AUX__Mood=Int|Polarity=Neg|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "pres", "Other": {"Mood": "int", "VerbForm": "cop"}},
- "AUX__Mood=Int|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Other": {"Mood": "int", "VerbForm": "cop"}},
- "AUX__PartType=Comp|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "Other": {"PartType": "comp", "VerbForm": "cop"}},
- "AUX__Polarity=Neg|PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "past", "Other": {"VerbForm": "cop"}},
- "AUX__Polarity=Neg|PronType=Rel|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "PronType": "rel", "Tense": "pres", "Other": {"VerbForm": "cop"}},
- "AUX__Polarity=Neg|Tense=Past|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "past", "Other": {"VerbForm": "cop"}},
- "AUX__Polarity=Neg|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Polarity": "neg", "Tense": "pres", "Other": {"VerbForm": "cop"}},
- "AUX___": {"pos": "AUX"},
- "AUX__PronType=Dem|VerbForm=Cop": {"pos": "AUX", "PronType": "dem", "Other": {"VerbForm": "cop"}},
- "AUX__PronType=Rel|Tense=Past|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "past", "Other": {"VerbForm": "cop"}},
- "AUX__PronType=Rel|Tense=Pres|VerbForm=Cop": {"pos": "AUX", "PronType": "rel", "Tense": "pres", "Other": {"VerbForm": "cop"}},
- "AUX__Tense=Past|VerbForm=Cop": {"pos": "AUX", "Tense": "past", "Other": {"VerbForm": "cop"}},
- "AUX__Tense=Pres|VerbForm=Cop": {"pos": "AUX", "Tense": "pres", "Other": {"VerbForm": "cop"}},
- "AUX__VerbForm=Cop": {"pos": "AUX", "Other": {"VerbForm": "cop"}},
- "CCONJ___": {"pos": "CCONJ"},
- "DET__Case=Gen|Definite=Def|Gender=Fem|Number=Sing|PronType=Art": {"pos": "DET", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "sing", "PronType": "art"},
- "DET__Definite=Def|Form=Ecl": {"pos": "DET", "Definite": "def", "Other": {"Form": "ecl"}},
- "DET__Definite=Def|Gender=Fem|Number=Sing|PronType=Art": {"pos": "DET", "Definite": "def", "Gender": "fem", "Number": "sing", "PronType": "art"},
- "DET__Definite=Def|Number=Plur|PronType=Art": {"pos": "DET", "Definite": "def", "Number": "plur", "PronType": "art"},
- "DET__Definite=Def|Number=Sing|PronType=Art": {"pos": "DET", "Definite": "def", "Number": "sing", "PronType": "art"},
- "DET__Definite=Def": {"pos": "DET", "Definite": "def"},
- "DET__Form=HPref|PronType=Ind": {"pos": "DET", "PronType": "ind", "Other": {"Form": "hpref"}},
- "DET__Gender=Fem|Number=Sing|Person=3|Poss=Yes": {"pos": "DET", "Gender": "fem", "Number": "sing", "Person": 3, "Poss": "yes"},
- "DET__Gender=Masc|Number=Sing|Person=3|Poss=Yes": {"pos": "DET", "Gender": "masc", "Number": "sing", "Person": 3, "Poss": "yes"},
- "DET__Number=Plur|Person=1|Poss=Yes": {"pos": "DET", "Number": "plur", "Person": 1, "Poss": "yes"},
- "DET__Number=Plur|Person=3|Poss=Yes": {"pos": "DET", "Number": "plur", "Person": 3, "Poss": "yes"},
- "DET__Number=Sing|Person=1|Poss=Yes": {"pos": "DET", "Number": "sing", "Person": 1, "Poss": "yes"},
- "DET__Number=Sing|Person=2|Poss=Yes": {"pos": "DET", "Number": "sing", "Person": 2, "Poss": "yes"},
- "DET__Number=Sing|PronType=Int": {"pos": "DET", "Number": "sing", "PronType": "int"},
- "DET___": {"pos": "DET"},
- "DET__PronType=Dem": {"pos": "DET", "PronType": "dem"},
- "DET__PronType=Ind": {"pos": "DET", "PronType": "ind"},
- "NOUN__Case=Dat|Definite=Ind|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Definite": "ind", "Gender": "fem", "Number": "sing"},
- "NOUN__Case=Dat|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}},
- "NOUN__Case=Dat|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
- "NOUN__Case=Dat|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "fem", "Number": "sing"},
- "NOUN__Case=Dat|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "dat", "Gender": "masc", "Number": "sing"},
- "NOUN__Case=Gen|Definite=Def|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "plur", "Other": {"NounType": "strong"}},
- "NOUN__Case=Gen|Definite=Def|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "sing"},
- "NOUN__Case=Gen|Definite=Def|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "plur", "Other": {"NounType": "strong"}},
- "NOUN__Case=Gen|Definite=Def|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "plur", "Other": {"NounType": "weak"}},
- "NOUN__Case=Gen|Definite=Def|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Definite": "def", "Gender": "masc", "Number": "sing"},
- "NOUN__Case=Gen|Definite=Ind|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Definite": "ind", "Gender": "fem", "Number": "sing"},
- "NOUN__Case=Gen|Form=Ecl|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"Form": "ecl", "NounType": "strong"}},
- "NOUN__Case=Gen|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}},
- "NOUN__Case=Gen|Form=Ecl|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl", "NounType": "strong"}},
- "NOUN__Case=Gen|Form=Ecl|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl", "NounType": "weak"}},
- "NOUN__Case=Gen|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "ecl"}},
- "NOUN__Case=Gen|Form=HPref|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "hpref"}},
- "NOUN__Case=Gen|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
- "NOUN__Case=Gen|Form=Len|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "len", "NounType": "strong"}},
- "NOUN__Case=Gen|Form=Len|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "len", "NounType": "weak"}},
- "NOUN__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
- "NOUN__Case=Gen|Form=Len|VerbForm=Inf": {"pos": "NOUN", "Case": "gen", "VerbForm": "inf", "Other": {"Form": "len"}},
- "NOUN__Case=Gen|Gender=Fem|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"NounType": "strong"}},
- "NOUN__Case=Gen|Gender=Fem|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"NounType": "weak"}},
- "NOUN__Case=Gen|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "plur"},
- "NOUN__Case=Gen|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "fem", "Number": "sing"},
- "NOUN__Case=Gen|Gender=Masc|NounType=Strong|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"NounType": "strong"}},
- "NOUN__Case=Gen|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"NounType": "weak"}},
- "NOUN__Case=Gen|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "plur"},
- "NOUN__Case=Gen|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "gen", "Gender": "masc", "Number": "sing"},
- "NOUN__Case=Gen|Number=Sing": {"pos": "NOUN", "Case": "gen", "Number": "sing"},
- "NOUN__Case=Gen|VerbForm=Inf": {"pos": "NOUN", "Case": "gen", "VerbForm": "inf"},
- "NOUN__Case=NomAcc|Definite=Def|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Definite": "def", "Gender": "fem", "Number": "plur"},
- "NOUN__Case=NomAcc|Definite=Def|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Definite": "def", "Gender": "fem", "Number": "sing"},
- "NOUN__Case=NomAcc|Definite=Def|Gender=Fem": {"pos": "NOUN", "Case": "nom|acc", "Definite": "def", "Gender": "fem"},
- "NOUN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "plur"},
- "NOUN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "sing"},
- "NOUN__Case=NomAcc|Definite=Ind|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Definite": "ind", "Gender": "masc", "Number": "plur"},
- "NOUN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Other": {"Form": "ecl"}},
- "NOUN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}},
- "NOUN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl"}},
- "NOUN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "ecl"}},
- "NOUN__Case=NomAcc|Form=Emp|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "emp"}},
- "NOUN__Case=NomAcc|Form=HPref|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Other": {"Form": "hpref"}},
- "NOUN__Case=NomAcc|Form=HPref|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "hpref"}},
- "NOUN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Other": {"Form": "hpref"}},
- "NOUN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "hpref"}},
- "NOUN__Case=NomAcc|Form=Len|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur", "Other": {"Form": "len"}},
- "NOUN__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
- "NOUN__Case=NomAcc|Form=Len|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur", "Other": {"Form": "len"}},
- "NOUN__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
- "NOUN__Case=NomAcc|Gender=Fem|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "plur"},
- "NOUN__Case=NomAcc|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "fem", "Number": "sing"},
- "NOUN__Case=NomAcc|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "plur"},
- "NOUN__Case=NomAcc|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "nom|acc", "Gender": "masc", "Number": "sing"},
- "NOUN__Case=Voc|Definite=Def|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "voc", "Definite": "def", "Gender": "masc", "Number": "plur"},
- "NOUN__Case=Voc|Form=Len|Gender=Fem|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
- "NOUN__Case=Voc|Form=Len|Gender=Masc|Number=Plur": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "plur", "Other": {"Form": "len"}},
- "NOUN__Case=Voc|Form=Len|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
- "NOUN__Case=Voc|Gender=Masc|Number=Sing": {"pos": "NOUN", "Case": "voc", "Gender": "masc", "Number": "sing"},
- "NOUN__Degree=Pos": {"pos": "NOUN", "Degree": "pos"},
- "NOUN__Foreign=Yes": {"pos": "NOUN", "Foreign": "yes"},
- "NOUN__Form=Ecl|Number=Sing": {"pos": "NOUN", "Number": "sing", "Other": {"Form": "ecl"}},
- "NOUN__Form=Ecl|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Other": {"Form": "ecl"}},
- "NOUN__Form=Ecl|VerbForm=Vnoun": {"pos": "NOUN", "VerbForm": "vnoun", "Other": {"Form": "ecl"}},
- "NOUN__Form=HPref|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Other": {"Form": "hpref"}},
- "NOUN__Form=Len|Number=Sing": {"pos": "NOUN", "Number": "sing", "Other": {"Form": "len"}},
- "NOUN__Form=Len|VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf", "Other": {"Form": "len"}},
- "NOUN__Gender=Fem|Number=Sing": {"pos": "NOUN", "Gender": "fem", "Number": "sing"},
- "NOUN__Number=Sing|PartType=Comp": {"pos": "NOUN", "Number": "sing", "Other": {"PartType": "comp"}},
- "NOUN__Number=Sing": {"pos": "NOUN", "Number": "sing"},
- "NOUN___": {"pos": "NOUN"},
- "NOUN__Reflex=Yes": {"pos": "NOUN", "Reflex": "yes"},
- "NOUN__VerbForm=Inf": {"pos": "NOUN", "VerbForm": "inf"},
- "NOUN__VerbForm=Vnoun": {"pos": "NOUN", "VerbForm": "vnoun"},
- "NUM__Definite=Def|NumType=Card": {"pos": "NUM", "Definite": "def", "NumType": "card"},
- "NUM__Form=Ecl|NumType=Card": {"pos": "NUM", "NumType": "card", "Other": {"Form": "ecl"}},
- "NUM__Form=Ecl|NumType=Ord": {"pos": "NUM", "NumType": "ord", "Other": {"Form": "ecl"}},
- "NUM__Form=HPref|NumType=Card": {"pos": "NUM", "NumType": "card", "Other": {"Form": "hpref"}},
- "NUM__Form=Len|NumType=Card": {"pos": "NUM", "NumType": "card", "Other": {"Form": "len"}},
- "NUM__Form=Len|NumType=Ord": {"pos": "NUM", "NumType": "ord", "Other": {"Form": "len"}},
- "NUM__NumType=Card": {"pos": "NUM", "NumType": "card"},
- "NUM__NumType=Ord": {"pos": "NUM", "NumType": "ord"},
- "NUM___": {"pos": "NUM"},
- "PART__Form=Ecl|PartType=Vb|PronType=Rel": {"pos": "PART", "PronType": "rel", "Other": {"Form": "ecl", "PartType": "vb"}},
- "PART__Mood=Imp|PartType=Vb|Polarity=Neg": {"pos": "PART", "Mood": "imp", "Polarity": "neg", "Other": {"PartType": "vb"}},
- "PART__Mood=Imp|PartType=Vb": {"pos": "PART", "Mood": "imp", "Other": {"PartType": "vb"}},
- "PART__Mood=Int|PartType=Vb|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "Other": {"Mood": "int", "PartType": "vb"}},
- "PART__PartType=Ad": {"pos": "PART", "Other": {"PartType": "ad"}},
- "PART__PartType=Cmpl|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "Other": {"PartType": "cmpl"}},
- "PART__PartType=Cmpl|Polarity=Neg|Tense=Past": {"pos": "PART", "Polarity": "neg", "Tense": "past", "Other": {"PartType": "cmpl"}},
- "PART__PartType=Cmpl": {"pos": "PART", "Other": {"PartType": "cmpl"}},
- "PART__PartType=Comp": {"pos": "PART", "Other": {"PartType": "comp"}},
- "PART__PartType=Cop|PronType=Rel": {"pos": "PART", "PronType": "rel", "Other": {"PartType": "cop"}},
- "PART__PartType=Deg": {"pos": "PART", "Other": {"PartType": "deg"}},
- "PART__PartType=Inf": {"pos": "PART", "PartType": "inf"},
- "PART__PartType=Num": {"pos": "PART", "Other": {"PartType": "num"}},
- "PART__PartType=Pat": {"pos": "PART", "Other": {"PartType": "pat"}},
- "PART__PartType=Vb|Polarity=Neg": {"pos": "PART", "Polarity": "neg", "Other": {"PartType": "vb"}},
- "PART__PartType=Vb|Polarity=Neg|PronType=Rel": {"pos": "PART", "Polarity": "neg", "PronType": "rel", "Other": {"PartType": "vb"}},
- "PART__PartType=Vb|Polarity=Neg|PronType=Rel|Tense=Past": {"pos": "PART", "Polarity": "neg", "PronType": "rel", "Tense": "past", "Other": {"PartType": "vb"}},
- "PART__PartType=Vb|Polarity=Neg|Tense=Past": {"pos": "PART", "Polarity": "neg", "Tense": "past", "Other": {"PartType": "vb"}},
- "PART__PartType=Vb": {"pos": "PART", "Other": {"PartType": "vb"}},
- "PART__PartType=Vb|PronType=Rel": {"pos": "PART", "PronType": "rel", "Other": {"PartType": "vb"}},
- "PART__PartType=Vb|PronType=Rel|Tense=Past": {"pos": "PART", "PronType": "rel", "Tense": "past", "Other": {"PartType": "vb"}},
- "PART__PartType=Vb|Tense=Past": {"pos": "PART", "Tense": "past", "Other": {"PartType": "vb"}},
- "PART__PartType=Voc": {"pos": "PART", "Other": {"PartType": "voc"}},
- "PART___": {"pos": "PART"},
- "PART__PronType=Rel": {"pos": "PART", "PronType": "rel"},
- "PRON__Form=Len|Number=Sing|Person=2": {"pos": "PRON", "Number": "sing", "Person": 2, "Other": {"Form": "len"}},
- "PRON__Form=Len|PronType=Ind": {"pos": "PRON", "PronType": "ind", "Other": {"Form": "len"}},
- "PRON__Gender=Fem|Number=Sing|Person=3": {"pos": "PRON", "Gender": "fem", "Number": "sing", "Person": 3},
- "PRON__Gender=Masc|Number=Sing|Person=3": {"pos": "PRON", "Gender": "masc", "Number": "sing", "Person": 3},
- "PRON__Gender=Masc|Number=Sing|Person=3|PronType=Emp": {"pos": "PRON", "Gender": "masc", "Number": "sing", "Person": 3, "PronType": "emp"},
- "PRON__Gender=Masc|Person=3": {"pos": "PRON", "Gender": "masc", "Person": 3},
- "PRON__Number=Plur|Person=1": {"pos": "PRON", "Number": "plur", "Person": 1},
- "PRON__Number=Plur|Person=1|PronType=Emp": {"pos": "PRON", "Number": "plur", "Person": 1, "PronType": "emp"},
- "PRON__Number=Plur|Person=2": {"pos": "PRON", "Number": "plur", "Person": 2},
- "PRON__Number=Plur|Person=3": {"pos": "PRON", "Number": "plur", "Person": 3},
- "PRON__Number=Plur|Person=3|PronType=Emp": {"pos": "PRON", "Number": "plur", "Person": 3, "PronType": "emp"},
- "PRON__Number=Sing|Person=1": {"pos": "PRON", "Number": "sing", "Person": 1},
- "PRON__Number=Sing|Person=1|PronType=Emp": {"pos": "PRON", "Number": "sing", "Person": 1, "PronType": "emp"},
- "PRON__Number=Sing|Person=2": {"pos": "PRON", "Number": "sing", "Person": 2},
- "PRON__Number=Sing|Person=2|PronType=Emp": {"pos": "PRON", "Number": "sing", "Person": 2, "PronType": "emp"},
- "PRON__Number=Sing|Person=3": {"pos": "PRON", "Number": "sing", "Person": 3},
- "PRON__Number=Sing|PronType=Int": {"pos": "PRON", "Number": "sing", "PronType": "int"},
- "PRON__PronType=Dem": {"pos": "PRON", "PronType": "dem"},
- "PRON__PronType=Ind": {"pos": "PRON", "PronType": "ind"},
- "PRON__PronType=Int": {"pos": "PRON", "PronType": "int"},
- "PRON__Reflex=Yes": {"pos": "PRON", "Reflex": "yes"},
- "PROPN__Abbr=Yes": {"pos": "PROPN", "Other": {"Abbr": "yes"}},
- "PROPN__Case=Dat|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "dat", "Gender": "fem", "Number": "sing"},
- "PROPN__Case=Gen|Definite=Def|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Definite": "def", "Gender": "fem", "Number": "sing"},
- "PROPN__Case=Gen|Form=Ecl|Gender=Fem|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "plur", "Other": {"Form": "ecl"}},
- "PROPN__Case=Gen|Form=Ecl|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"Form": "ecl"}},
- "PROPN__Case=Gen|Form=HPref|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "hpref"}},
- "PROPN__Case=Gen|Form=Len|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
- "PROPN__Case=Gen|Form=Len|Gender=Fem": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Other": {"Form": "len"}},
- "PROPN__Case=Gen|Form=Len|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
- "PROPN__Case=Gen|Form=Len|Gender=Masc": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Other": {"Form": "len"}},
- "PROPN__Case=Gen|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "fem", "Number": "sing"},
- "PROPN__Case=Gen|Gender=Fem": {"pos": "PROPN", "Case": "gen", "Gender": "fem"},
- "PROPN__Case=Gen|Gender=Masc|NounType=Weak|Number=Plur": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "plur", "Other": {"NounType": "weak"}},
- "PROPN__Case=Gen|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "gen", "Gender": "masc", "Number": "sing"},
- "PROPN__Case=Gen|Gender=Masc": {"pos": "PROPN", "Case": "gen", "Gender": "masc"},
- "PROPN__Case=NomAcc|Definite=Def|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Definite": "def", "Gender": "fem", "Number": "sing"},
- "PROPN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "plur"},
- "PROPN__Case=NomAcc|Definite=Def|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Definite": "def", "Gender": "masc", "Number": "sing"},
- "PROPN__Case=NomAcc|Form=Ecl|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "ecl"}},
- "PROPN__Case=NomAcc|Form=Ecl|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "ecl"}},
- "PROPN__Case=NomAcc|Form=HPref|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "hpref"}},
- "PROPN__Case=NomAcc|Form=Len|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Other": {"Form": "len"}},
- "PROPN__Case=NomAcc|Form=Len|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing", "Other": {"Form": "len"}},
- "PROPN__Case=NomAcc|Gender=Fem|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "fem", "Number": "sing"},
- "PROPN__Case=NomAcc|Gender=Masc|Number=Plur": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "plur"},
- "PROPN__Case=NomAcc|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc", "Number": "sing"},
- "PROPN__Case=NomAcc|Gender=Masc": {"pos": "PROPN", "Case": "nom|acc", "Gender": "masc"},
- "PROPN__Case=Voc|Form=Len|Gender=Fem": {"pos": "PROPN", "Case": "voc", "Gender": "fem", "Other": {"Form": "len"}},
- "PROPN__Case=Voc|Gender=Masc|Number=Sing": {"pos": "PROPN", "Case": "voc", "Gender": "masc", "Number": "sing"},
- "PROPN__Gender=Masc|Number=Sing": {"pos": "PROPN", "Gender": "masc", "Number": "sing"},
- "PROPN___": {"pos": "PROPN"},
- "PUNCT___": {"pos": "PUNCT"},
- "SCONJ___": {"pos": "SCONJ"},
- "SCONJ__Tense=Past|VerbForm=Cop": {"pos": "SCONJ", "Tense": "past", "Other": {"VerbForm": "cop"}},
- "SCONJ__VerbForm=Cop": {"pos": "SCONJ", "Other": {"VerbForm": "cop"}},
- "SYM__Abbr=Yes": {"pos": "SYM", "Other": {"Abbr": "yes"}},
- "VERB__Case=NomAcc|Gender=Masc|Mood=Ind|Number=Sing|Tense=Pres": {"pos": "VERB", "Case": "nom|acc", "Gender": "masc", "Mood": "ind", "Number": "sing", "Tense": "pres"},
- "VERB__Dialect=Munster|Form=Len|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Dialect": "munster", "Form": "len"}},
- "VERB__Foreign=Yes": {"pos": "VERB", "Foreign": "yes"},
- "VERB__Form=Ecl|Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1, "Other": {"Form": "ecl"}},
- "VERB__Form=Ecl|Mood=Cnd|Polarity=Neg": {"pos": "VERB", "Mood": "cnd", "Polarity": "neg", "Other": {"Form": "ecl"}},
- "VERB__Form=Ecl|Mood=Cnd": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "ecl"}},
- "VERB__Form=Ecl|Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "ecl", "Voice": "auto"}},
- "VERB__Form=Ecl|Mood=Imp|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "imp", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Form": "ecl"}},
- "VERB__Form=Ecl|Mood=Imp|Tense=Past": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Other": {"Form": "ecl"}},
- "VERB__Form=Ecl|Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres", "Other": {"Form": "ecl"}},
- "VERB__Form=Ecl|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Form": "ecl"}},
- "VERB__Form=Ecl|Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres", "Other": {"Form": "ecl"}},
- "VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "ecl"}},
- "VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "ecl", "Voice": "auto"}},
- "VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Form": "ecl"}},
- "VERB__Form=Ecl|Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "ecl"}},
- "VERB__Form=Ecl|Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "ecl"}},
- "VERB__Form=Ecl|Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "ecl", "Voice": "auto"}},
- "VERB__Form=Ecl|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Form": "ecl"}},
- "VERB__Form=Ecl|Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "ecl"}},
- "VERB__Form=Ecl|Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "ecl", "Voice": "auto"}},
- "VERB__Form=Ecl|Mood=Sub|Tense=Pres": {"pos": "VERB", "Mood": "sub", "Tense": "pres", "Other": {"Form": "ecl"}},
- "VERB__Form=Ecl": {"pos": "VERB", "Other": {"Form": "ecl"}},
- "VERB__Form=Emp|Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres", "Other": {"Form": "emp"}},
- "VERB__Form=Emp|Mood=Ind|Number=Sing|Person=1|PronType=Rel|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "PronType": "rel", "Tense": "pres", "Other": {"Form": "emp"}},
- "VERB__Form=Emp|Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres", "Other": {"Form": "emp"}},
- "VERB__Form=Len|Mood=Cnd|Number=Plur|Person=3": {"pos": "VERB", "Mood": "cnd", "Number": "plur", "Person": 3, "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1, "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Cnd|Number=Sing|Person=2": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 2, "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Cnd|Polarity=Neg": {"pos": "VERB", "Mood": "cnd", "Polarity": "neg", "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Cnd": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Other": {"Form": "len", "Voice": "auto"}},
- "VERB__Form=Len|Mood=Imp|Number=Plur|Person=3|Tense=Past": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 3, "Tense": "past", "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Imp|Tense=Past": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Imp|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "imp", "Tense": "past", "Other": {"Form": "len", "Voice": "auto"}},
- "VERB__Form=Len|Mood=Imp|Voice=Auto": {"pos": "VERB", "Mood": "imp", "Other": {"Form": "len", "Voice": "auto"}},
- "VERB__Form=Len|Mood=Ind|Number=Plur|Person=1|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "fut", "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Ind|Number=Plur|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "past", "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Ind|Number=Plur|Person=3|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 3, "Tense": "past", "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Polarity": "neg", "Tense": "past", "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "fut", "Other": {"Form": "len", "Voice": "auto"}},
- "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Past": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Form": "len", "Voice": "auto"}},
- "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Ind|Polarity=Neg|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len", "Voice": "auto"}},
- "VERB__Form=Len|Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Form": "len", "Voice": "auto"}},
- "VERB__Form=Len|Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Ind|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Form": "len", "Voice": "auto"}},
- "VERB__Form=Len|Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "len"}},
- "VERB__Form=Len|Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Form": "len", "Voice": "auto"}},
- "VERB__Form=Len|Mood=Sub|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "sub", "Polarity": "neg", "Tense": "pres", "Other": {"Form": "len"}},
- "VERB__Form=Len|Polarity=Neg": {"pos": "VERB", "Polarity": "neg", "Other": {"Form": "len"}},
- "VERB__Form=Len": {"pos": "VERB", "Other": {"Form": "len"}},
- "VERB__Mood=Cnd|Number=Plur|Person=3": {"pos": "VERB", "Mood": "cnd", "Number": "plur", "Person": 3},
- "VERB__Mood=Cnd|Number=Sing|Person=1": {"pos": "VERB", "Mood": "cnd", "Number": "sing", "Person": 1},
- "VERB__Mood=Cnd": {"pos": "VERB", "Mood": "cnd"},
- "VERB__Mood=Cnd|Voice=Auto": {"pos": "VERB", "Mood": "cnd", "Other": {"Voice": "auto"}},
- "VERB__Mood=Imp|Number=Plur|Person=1|Polarity=Neg": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 1, "Polarity": "neg"},
- "VERB__Mood=Imp|Number=Plur|Person=1": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 1},
- "VERB__Mood=Imp|Number=Plur|Person=2": {"pos": "VERB", "Mood": "imp", "Number": "plur", "Person": 2},
- "VERB__Mood=Imp|Number=Sing|Person=2": {"pos": "VERB", "Mood": "imp", "Number": "sing", "Person": 2},
- "VERB__Mood=Imp|Tense=Past": {"pos": "VERB", "Mood": "imp", "Tense": "past"},
- "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "past"},
- "VERB__Mood=Ind|Number=Plur|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "plur", "Person": 1, "Tense": "pres"},
- "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past"},
- "VERB__Mood=Ind|Number=Sing|Person=1|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "pres"},
- "VERB__Mood=Ind|Polarity=Neg|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "past", "Other": {"Voice": "auto"}},
- "VERB__Mood=Ind|Polarity=Neg|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Polarity": "neg", "Tense": "pres"},
- "VERB__Mood=Ind|PronType=Rel|Tense=Fut": {"pos": "VERB", "Mood": "ind", "PronType": "rel", "Tense": "fut"},
- "VERB__Mood=Ind|PronType=Rel|Tense=Pres": {"pos": "VERB", "Mood": "ind", "PronType": "rel", "Tense": "pres"},
- "VERB__Mood=Ind|Tense=Fut": {"pos": "VERB", "Mood": "ind", "Tense": "fut"},
- "VERB__Mood=Ind|Tense=Fut|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "fut", "Other": {"Voice": "auto"}},
- "VERB__Mood=Ind|Tense=Past": {"pos": "VERB", "Mood": "ind", "Tense": "past"},
- "VERB__Mood=Ind|Tense=Past|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "past", "Other": {"Voice": "auto"}},
- "VERB__Mood=Ind|Tense=Pres": {"pos": "VERB", "Mood": "ind", "Tense": "pres"},
- "VERB__Mood=Ind|Tense=Pres|Voice=Auto": {"pos": "VERB", "Mood": "ind", "Tense": "pres", "Other": {"Voice": "auto"}},
- "VERB___": {"pos": "VERB"},
- "X__Abbr=Yes": {"pos": "X", "Other": {"Abbr": "yes"}},
- "X__Case=NomAcc|Foreign=Yes|Gender=Fem|Number=Sing": {"pos": "X", "Case": "nom|acc", "Gender": "fem", "Number": "sing", "Foreign": "yes"},
- "X__Definite=Def|Dialect=Ulster": {"pos": "X", "Definite": "def", "Other": {"Dialect": "ulster"}},
- "X__Dialect=Munster|Form=Len|Mood=Ind|Number=Sing|Person=1|Tense=Past": {"pos": "X", "Mood": "ind", "Number": "sing", "Person": 1, "Tense": "past", "Other": {"Dialect": "munster", "Form": "len"}},
- "X__Dialect=Munster|Mood=Imp|Number=Sing|Person=2|Polarity=Neg": {"pos": "X", "Mood": "imp", "Number": "sing", "Person": 2, "Polarity": "neg", "Other": {"Dialect": "munster"}},
- "X__Dialect=Munster|Mood=Ind|Tense=Past|Voice=Auto": {"pos": "X", "Mood": "ind", "Tense": "past", "Other": {"Dialect": "munster", "Voice": "auto"}},
- "X__Dialect=Munster": {"pos": "X", "Other": {"Dialect": "munster"}},
- "X__Dialect=Munster|PronType=Dem": {"pos": "X", "PronType": "dem", "Other": {"Dialect": "munster"}},
- "X__Dialect=Ulster|Gender=Masc|Number=Sing|Person=3": {"pos": "X", "Gender": "masc", "Number": "sing", "Person": 3, "Other": {"Dialect": "ulster"}},
- "X__Dialect=Ulster|PartType=Vb|Polarity=Neg": {"pos": "X", "Polarity": "neg", "Other": {"Dialect": "ulster", "PartType": "vb"}},
- "X__Dialect=Ulster|VerbForm=Cop": {"pos": "X", "Other": {"Dialect": "ulster", "VerbForm": "cop"}},
- "X__Foreign=Yes": {"pos": "X", "Foreign": "yes"},
- "X___": {"pos": "X"}
-}
-# fmt: on
diff --git a/spacy/lang/ga/tokenizer_exceptions.py b/spacy/lang/ga/tokenizer_exceptions.py
index c0e53f522..abf49c511 100644
--- a/spacy/lang/ga/tokenizer_exceptions.py
+++ b/spacy/lang/ga/tokenizer_exceptions.py
@@ -1,82 +1,65 @@
-# encoding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, DET, ADP, CCONJ, ADV, NOUN, X, AUX
-from ...symbols import ORTH, LEMMA, NORM
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import ORTH, NORM
+from ...util import update_exc
_exc = {
- "'acha'n": [
- {ORTH: "'ach", LEMMA: "gach", NORM: "gach", POS: DET},
- {ORTH: "a'n", LEMMA: "aon", NORM: "aon", POS: DET},
- ],
- "dem'": [
- {ORTH: "de", LEMMA: "de", NORM: "de", POS: ADP},
- {ORTH: "m'", LEMMA: "mo", NORM: "mo", POS: DET},
- ],
- "ded'": [
- {ORTH: "de", LEMMA: "de", NORM: "de", POS: ADP},
- {ORTH: "d'", LEMMA: "do", NORM: "do", POS: DET},
- ],
- "lem'": [
- {ORTH: "le", LEMMA: "le", NORM: "le", POS: ADP},
- {ORTH: "m'", LEMMA: "mo", NORM: "mo", POS: DET},
- ],
- "led'": [
- {ORTH: "le", LEMMA: "le", NORM: "le", POS: ADP},
- {ORTH: "d'", LEMMA: "mo", NORM: "do", POS: DET},
- ],
+ "'acha'n": [{ORTH: "'ach", NORM: "gach"}, {ORTH: "a'n", NORM: "aon"}],
+ "dem'": [{ORTH: "de", NORM: "de"}, {ORTH: "m'", NORM: "mo"}],
+ "ded'": [{ORTH: "de", NORM: "de"}, {ORTH: "d'", NORM: "do"}],
+ "lem'": [{ORTH: "le", NORM: "le"}, {ORTH: "m'", NORM: "mo"}],
+ "led'": [{ORTH: "le", NORM: "le"}, {ORTH: "d'", NORM: "do"}],
}
for exc_data in [
- {ORTH: "'gus", LEMMA: "agus", NORM: "agus", POS: CCONJ},
- {ORTH: "'ach", LEMMA: "gach", NORM: "gach", POS: DET},
- {ORTH: "ao'", LEMMA: "aon", NORM: "aon"},
- {ORTH: "'niar", LEMMA: "aniar", NORM: "aniar", POS: ADV},
- {ORTH: "'níos", LEMMA: "aníos", NORM: "aníos", POS: ADV},
- {ORTH: "'ndiu", LEMMA: "inniu", NORM: "inniu", POS: ADV},
- {ORTH: "'nocht", LEMMA: "anocht", NORM: "anocht", POS: ADV},
- {ORTH: "m'", LEMMA: "mo", POS: DET},
- {ORTH: "Aib.", LEMMA: "Aibreán", POS: NOUN},
- {ORTH: "Ath.", LEMMA: "athair", POS: NOUN},
- {ORTH: "Beal.", LEMMA: "Bealtaine", POS: NOUN},
- {ORTH: "a.C.n.", LEMMA: "ante Christum natum", POS: X},
- {ORTH: "m.sh.", LEMMA: "mar shampla", POS: ADV},
- {ORTH: "M.F.", LEMMA: "Meán Fómhair", POS: NOUN},
- {ORTH: "M.Fómh.", LEMMA: "Meán Fómhair", POS: NOUN},
- {ORTH: "D.F.", LEMMA: "Deireadh Fómhair", POS: NOUN},
- {ORTH: "D.Fómh.", LEMMA: "Deireadh Fómhair", POS: NOUN},
- {ORTH: "r.C.", LEMMA: "roimh Chríost", POS: ADV},
- {ORTH: "R.C.", LEMMA: "roimh Chríost", POS: ADV},
- {ORTH: "r.Ch.", LEMMA: "roimh Chríost", POS: ADV},
- {ORTH: "r.Chr.", LEMMA: "roimh Chríost", POS: ADV},
- {ORTH: "R.Ch.", LEMMA: "roimh Chríost", POS: ADV},
- {ORTH: "R.Chr.", LEMMA: "roimh Chríost", POS: ADV},
- {ORTH: "⁊rl.", LEMMA: "agus araile", POS: ADV},
- {ORTH: "srl.", LEMMA: "agus araile", POS: ADV},
- {ORTH: "Co.", LEMMA: "contae", POS: NOUN},
- {ORTH: "Ean.", LEMMA: "Eanáir", POS: NOUN},
- {ORTH: "Feab.", LEMMA: "Feabhra", POS: NOUN},
- {ORTH: "gCo.", LEMMA: "contae", POS: NOUN},
- {ORTH: ".i.", LEMMA: "eadhon", POS: ADV},
- {ORTH: "B'", LEMMA: "ba", POS: AUX},
- {ORTH: "b'", LEMMA: "ba", POS: AUX},
- {ORTH: "lch.", LEMMA: "leathanach", POS: NOUN},
- {ORTH: "Lch.", LEMMA: "leathanach", POS: NOUN},
- {ORTH: "lgh.", LEMMA: "leathanach", POS: NOUN},
- {ORTH: "Lgh.", LEMMA: "leathanach", POS: NOUN},
- {ORTH: "Lún.", LEMMA: "Lúnasa", POS: NOUN},
- {ORTH: "Már.", LEMMA: "Márta", POS: NOUN},
- {ORTH: "Meith.", LEMMA: "Meitheamh", POS: NOUN},
- {ORTH: "Noll.", LEMMA: "Nollaig", POS: NOUN},
- {ORTH: "Samh.", LEMMA: "Samhain", POS: NOUN},
- {ORTH: "tAth.", LEMMA: "athair", POS: NOUN},
- {ORTH: "tUas.", LEMMA: "Uasal", POS: NOUN},
- {ORTH: "teo.", LEMMA: "teoranta", POS: NOUN},
- {ORTH: "Teo.", LEMMA: "teoranta", POS: NOUN},
- {ORTH: "Uas.", LEMMA: "Uasal", POS: NOUN},
- {ORTH: "uimh.", LEMMA: "uimhir", POS: NOUN},
- {ORTH: "Uimh.", LEMMA: "uimhir", POS: NOUN},
+ {ORTH: "'gus", NORM: "agus"},
+ {ORTH: "'ach", NORM: "gach"},
+ {ORTH: "ao'", NORM: "aon"},
+ {ORTH: "'niar", NORM: "aniar"},
+ {ORTH: "'níos", NORM: "aníos"},
+ {ORTH: "'ndiu", NORM: "inniu"},
+ {ORTH: "'nocht", NORM: "anocht"},
+ {ORTH: "m'"},
+ {ORTH: "Aib."},
+ {ORTH: "Ath."},
+ {ORTH: "Beal."},
+ {ORTH: "a.C.n."},
+ {ORTH: "m.sh."},
+ {ORTH: "M.F."},
+ {ORTH: "M.Fómh."},
+ {ORTH: "D.F."},
+ {ORTH: "D.Fómh."},
+ {ORTH: "r.C."},
+ {ORTH: "R.C."},
+ {ORTH: "r.Ch."},
+ {ORTH: "r.Chr."},
+ {ORTH: "R.Ch."},
+ {ORTH: "R.Chr."},
+ {ORTH: "⁊rl."},
+ {ORTH: "srl."},
+ {ORTH: "Co."},
+ {ORTH: "Ean."},
+ {ORTH: "Feab."},
+ {ORTH: "gCo."},
+ {ORTH: ".i."},
+ {ORTH: "B'"},
+ {ORTH: "b'"},
+ {ORTH: "lch."},
+ {ORTH: "Lch."},
+ {ORTH: "lgh."},
+ {ORTH: "Lgh."},
+ {ORTH: "Lún."},
+ {ORTH: "Már."},
+ {ORTH: "Meith."},
+ {ORTH: "Noll."},
+ {ORTH: "Samh."},
+ {ORTH: "tAth."},
+ {ORTH: "tUas."},
+ {ORTH: "teo."},
+ {ORTH: "Teo."},
+ {ORTH: "Uas."},
+ {ORTH: "uimh."},
+ {ORTH: "Uimh."},
]:
_exc[exc_data[ORTH]] = [exc_data]
@@ -84,4 +67,4 @@ for orth in ["d'", "D'"]:
_exc[orth] = [{ORTH: orth}]
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
diff --git a/spacy/lang/gu/__init__.py b/spacy/lang/gu/__init__.py
index 1f080c7c2..67228ac40 100644
--- a/spacy/lang/gu/__init__.py
+++ b/spacy/lang/gu/__init__.py
@@ -1,8 +1,4 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
-
from ...language import Language
diff --git a/spacy/lang/gu/examples.py b/spacy/lang/gu/examples.py
index 202a8d022..1cf75fd32 100644
--- a/spacy/lang/gu/examples.py
+++ b/spacy/lang/gu/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/gu/stop_words.py b/spacy/lang/gu/stop_words.py
index 85d33763d..2c859681b 100644
--- a/spacy/lang/gu/stop_words.py
+++ b/spacy/lang/gu/stop_words.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
STOP_WORDS = set(
"""
એમ
diff --git a/spacy/lang/he/__init__.py b/spacy/lang/he/__init__.py
index 922f61462..e0adc3293 100644
--- a/spacy/lang/he/__init__.py
+++ b/spacy/lang/he/__init__.py
@@ -1,21 +1,11 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
from .lex_attrs import LEX_ATTRS
from ...language import Language
-from ...attrs import LANG
-from ...util import update_exc
class HebrewDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters.update(LEX_ATTRS)
- lex_attr_getters[LANG] = lambda text: "he"
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS)
stop_words = STOP_WORDS
+ lex_attr_getters = LEX_ATTRS
writing_system = {"direction": "rtl", "has_case": False, "has_letters": True}
diff --git a/spacy/lang/he/examples.py b/spacy/lang/he/examples.py
index 34cd157ae..d54d2a145 100644
--- a/spacy/lang/he/examples.py
+++ b/spacy/lang/he/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/he/stop_words.py b/spacy/lang/he/stop_words.py
index d4ac5e846..23bb5176d 100644
--- a/spacy/lang/he/stop_words.py
+++ b/spacy/lang/he/stop_words.py
@@ -1,7 +1,3 @@
-# encoding: utf8
-from __future__ import unicode_literals
-
-
STOP_WORDS = set(
"""
אני
diff --git a/spacy/lang/hi/__init__.py b/spacy/lang/hi/__init__.py
index b0d45ddf3..384f040c8 100644
--- a/spacy/lang/hi/__init__.py
+++ b/spacy/lang/hi/__init__.py
@@ -1,18 +1,11 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
-
from ...language import Language
-from ...attrs import LANG
class HindiDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters.update(LEX_ATTRS)
- lex_attr_getters[LANG] = lambda text: "hi"
stop_words = STOP_WORDS
+ lex_attr_getters = LEX_ATTRS
class Hindi(Language):
diff --git a/spacy/lang/hi/examples.py b/spacy/lang/hi/examples.py
index 76b0e8bf8..1443b4908 100644
--- a/spacy/lang/hi/examples.py
+++ b/spacy/lang/hi/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/hi/lex_attrs.py b/spacy/lang/hi/lex_attrs.py
index 12666d96a..20a8c2975 100644
--- a/spacy/lang/hi/lex_attrs.py
+++ b/spacy/lang/hi/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..norm_exceptions import BASE_NORMS
from ...attrs import NORM, LIKE_NUM
diff --git a/spacy/lang/hi/stop_words.py b/spacy/lang/hi/stop_words.py
index efad18c84..475b07da1 100644
--- a/spacy/lang/hi/stop_words.py
+++ b/spacy/lang/hi/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
# Source: https://github.com/taranjeet/hindi-tokenizer/blob/master/stopwords.txt, https://data.mendeley.com/datasets/bsr3frvvjc/1#file-a21d5092-99d7-45d8-b044-3ae9edd391c6
STOP_WORDS = set(
diff --git a/spacy/lang/hr/__init__.py b/spacy/lang/hr/__init__.py
index 539b164d7..118e0946a 100644
--- a/spacy/lang/hr/__init__.py
+++ b/spacy/lang/hr/__init__.py
@@ -1,22 +1,8 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ..norm_exceptions import BASE_NORMS
from ...language import Language
-from ...attrs import LANG, NORM
-from ...util import update_exc, add_lookups
class CroatianDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "hr"
- lex_attr_getters[NORM] = add_lookups(
- Language.Defaults.lex_attr_getters[NORM], BASE_NORMS
- )
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS)
stop_words = STOP_WORDS
diff --git a/spacy/lang/hr/examples.py b/spacy/lang/hr/examples.py
index dc52ce4f0..b28fb63c2 100644
--- a/spacy/lang/hr/examples.py
+++ b/spacy/lang/hr/examples.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/hr/stop_words.py b/spacy/lang/hr/stop_words.py
index 408b802c5..dd10f792d 100644
--- a/spacy/lang/hr/stop_words.py
+++ b/spacy/lang/hr/stop_words.py
@@ -1,7 +1,3 @@
-# encoding: utf8
-from __future__ import unicode_literals
-
-
# Source: https://github.com/stopwords-iso/stopwords-hr
STOP_WORDS = set(
"""
diff --git a/spacy/lang/hu/__init__.py b/spacy/lang/hu/__init__.py
index a331adc5b..8962603a6 100644
--- a/spacy/lang/hu/__init__.py
+++ b/spacy/lang/hu/__init__.py
@@ -1,29 +1,16 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, TOKEN_MATCH
from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
from .stop_words import STOP_WORDS
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ..norm_exceptions import BASE_NORMS
from ...language import Language
-from ...attrs import LANG, NORM
-from ...util import update_exc, add_lookups
class HungarianDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "hu"
- lex_attr_getters[NORM] = add_lookups(
- Language.Defaults.lex_attr_getters[NORM], BASE_NORMS
- )
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
- stop_words = STOP_WORDS
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
prefixes = TOKENIZER_PREFIXES
suffixes = TOKENIZER_SUFFIXES
infixes = TOKENIZER_INFIXES
token_match = TOKEN_MATCH
+ stop_words = STOP_WORDS
class Hungarian(Language):
diff --git a/spacy/lang/hu/examples.py b/spacy/lang/hu/examples.py
index 3267887fe..711a438bd 100644
--- a/spacy/lang/hu/examples.py
+++ b/spacy/lang/hu/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/hu/punctuation.py b/spacy/lang/hu/punctuation.py
index a010bb7ae..f827cd677 100644
--- a/spacy/lang/hu/punctuation.py
+++ b/spacy/lang/hu/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, CONCAT_QUOTES
from ..char_classes import CONCAT_ICONS, UNITS, ALPHA, ALPHA_LOWER, ALPHA_UPPER
@@ -10,6 +7,7 @@ _concat_icons = CONCAT_ICONS.replace("\u00B0", "")
_currency = r"\$¢£€¥฿"
_quotes = CONCAT_QUOTES.replace("'", "")
+_units = UNITS.replace("%", "")
_prefixes = (
LIST_PUNCT
@@ -29,7 +27,7 @@ _suffixes = (
r"(?<=[0-9])\+",
r"(?<=°[FfCcKk])\.",
r"(?<=[0-9])(?:[{c}])".format(c=_currency),
- r"(?<=[0-9])(?:{u})".format(u=UNITS),
+ r"(?<=[0-9])(?:{u})".format(u=_units),
r"(?<=[{al}{e}{q}(?:{c})])\.".format(
al=ALPHA_LOWER, e=r"%²\-\+", q=CONCAT_QUOTES, c=_currency
),
diff --git a/spacy/lang/hu/stop_words.py b/spacy/lang/hu/stop_words.py
index c9a217dd6..e39a26d35 100644
--- a/spacy/lang/hu/stop_words.py
+++ b/spacy/lang/hu/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
STOP_WORDS = set(
"""
a abban ahhoz ahogy ahol aki akik akkor akár alatt amely amelyek amelyekben
diff --git a/spacy/lang/hu/tokenizer_exceptions.py b/spacy/lang/hu/tokenizer_exceptions.py
index d328baa22..4a64a1d2c 100644
--- a/spacy/lang/hu/tokenizer_exceptions.py
+++ b/spacy/lang/hu/tokenizer_exceptions.py
@@ -1,10 +1,9 @@
-# coding: utf8
-from __future__ import unicode_literals
-
import re
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ..punctuation import ALPHA_LOWER, CURRENCY
from ...symbols import ORTH
+from ...util import update_exc
_exc = {}
@@ -647,5 +646,5 @@ _nums = r"(({ne})|({t})|({on})|({c}))({s})?".format(
)
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
TOKEN_MATCH = re.compile(r"^{n}$".format(n=_nums)).match
diff --git a/spacy/lang/hy/__init__.py b/spacy/lang/hy/__init__.py
index 6aaa965bb..4577ab641 100644
--- a/spacy/lang/hy/__init__.py
+++ b/spacy/lang/hy/__init__.py
@@ -1,21 +1,11 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
-from .tag_map import TAG_MAP
-
-from ...attrs import LANG
from ...language import Language
class ArmenianDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "hy"
-
- lex_attr_getters.update(LEX_ATTRS)
+ lex_attr_getters = LEX_ATTRS
stop_words = STOP_WORDS
- tag_map = TAG_MAP
class Armenian(Language):
diff --git a/spacy/lang/hy/examples.py b/spacy/lang/hy/examples.py
index 8a00fd243..212a2ec86 100644
--- a/spacy/lang/hy/examples.py
+++ b/spacy/lang/hy/examples.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
"""
Example sentences to test spaCy and its language models.
>>> from spacy.lang.hy.examples import sentences
diff --git a/spacy/lang/hy/lex_attrs.py b/spacy/lang/hy/lex_attrs.py
index dea3c0e97..9c9c0380c 100644
--- a/spacy/lang/hy/lex_attrs.py
+++ b/spacy/lang/hy/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ...attrs import LIKE_NUM
diff --git a/spacy/lang/hy/stop_words.py b/spacy/lang/hy/stop_words.py
index d75aad6e2..46d0f6b51 100644
--- a/spacy/lang/hy/stop_words.py
+++ b/spacy/lang/hy/stop_words.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
STOP_WORDS = set(
"""
նա
diff --git a/spacy/lang/hy/tag_map.py b/spacy/lang/hy/tag_map.py
deleted file mode 100644
index 4d5b6e918..000000000
--- a/spacy/lang/hy/tag_map.py
+++ /dev/null
@@ -1,2303 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, ADJ, NUM, DET, ADV, ADP, X, VERB, NOUN
-from ...symbols import PROPN, PART, INTJ, PRON, SCONJ, AUX, CCONJ
-
-TAG_MAP = {
- "ADJ_Abbr=Yes": {POS: ADJ, "Abbr": "Yes"},
- "ADJ_Degree=Pos|NumForm=Word|NumType=Ord": {
- POS: ADJ,
- "Degree": "Pos",
- "NumForm": "Word",
- "NumType": "Ord",
- },
- "ADJ_Degree=Pos": {POS: ADJ, "Degree": "Pos"},
- "ADJ_Degree=Pos|Style=Coll": {POS: ADJ, "Degree": "Pos", "Style": "Coll"},
- "ADJ_Degree=Pos|Style=Expr": {POS: ADJ, "Degree": "Pos", "Style": "Expr"},
- "ADJ_Degree=Sup": {POS: ADJ, "Degree": "Sup"},
- "ADJ_NumForm=Digit|NumType=Ord": {POS: ADJ, "NumForm": "Digit", "NumType": "Ord"},
- "ADJ_NumForm=Word|NumType=Card": {POS: ADJ, "NumForm": "Word", "NumType": "Card"},
- "ADJ_NumForm=Word|NumType=Ord": {POS: ADJ, "NumForm": "Word", "NumType": "Ord"},
- "ADJ_Style=Coll": {POS: ADJ, "Style": "Coll"},
- "ADJ_Style=Expr": {POS: ADJ, "Style": "Expr"},
- "ADP_AdpType=Post|Case=Dat": {POS: ADP, "AdpType": "Post", "Case": "Dat"},
- "ADP_AdpType=Post|Case=Nom": {POS: ADP, "AdpType": "Post", "Case": "Nom"},
- "ADP_AdpType=Post|Number=Plur|Person=3": {
- POS: ADP,
- "AdpType": "Post",
- "Number": "Plur",
- "Person": "three",
- },
- "ADP_AdpType=Post": {POS: ADP, "AdpType": "Post"},
- "ADP_AdpType=Prep": {POS: ADP, "AdpType": "Prep"},
- "ADP_AdpType=Prep|Style=Arch": {POS: ADP, "AdpType": "Prep", "Style": "Arch"},
- "ADV_Degree=Cmp": {POS: ADV, "Degree": "Cmp"},
- "ADV_Degree=Pos": {POS: ADV, "Degree": "Pos"},
- "ADV_Degree=Sup": {POS: ADV, "Degree": "Sup"},
- "ADV_Distance=Dist|PronType=Dem": {POS: ADV, "PronType": "Dem"},
- "ADV_Distance=Dist|PronType=Exc": {POS: ADV, "PronType": "Exc"},
- "ADV_Distance=Med|PronType=Dem": {POS: ADV, "PronType": "Dem"},
- "ADV_Distance=Med|PronType=Dem|Style=Coll": {
- POS: ADV,
- "PronType": "Dem",
- "Style": "Coll",
- },
- "ADV_NumForm=Word|NumType=Card|PronType=Tot": {
- POS: ADV,
- "NumForm": "Word",
- "NumType": "Card",
- "PronType": "Tot",
- },
- "ADV_PronType=Dem": {POS: ADV, "PronType": "Dem"},
- "ADV_PronType=Exc": {POS: ADV, "PronType": "Exc"},
- "ADV_PronType=Ind": {POS: ADV, "PronType": "Ind"},
- "ADV_PronType=Int": {POS: ADV, "PronType": "Int"},
- "ADV_PronType=Int|Style=Coll": {POS: ADV, "PronType": "Int", "Style": "Coll"},
- "ADV_PronType=Rel": {POS: ADV, "PronType": "Rel"},
- "ADV_Style=Coll": {POS: ADV, "Style": "Coll"},
- "ADV_Style=Rare": {POS: ADV, "Style": "Rare"},
- "AUX_Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Polarity=Neg|Tense=Pres|VerbForm=Fin": {
- POS: AUX,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "AUX_Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Polarity=Pos|Tense=Pres|VerbForm=Fin": {
- POS: AUX,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "two",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "AUX_Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Polarity=Neg|Tense=Imp|VerbForm=Fin": {
- POS: AUX,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Imp",
- "VerbForm": "Fin",
- },
- "AUX_Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Polarity=Neg|Tense=Pres|VerbForm=Fin": {
- POS: AUX,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "AUX_Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Polarity=Pos|Tense=Imp|VerbForm=Fin": {
- POS: AUX,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Imp",
- "VerbForm": "Fin",
- },
- "AUX_Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Polarity=Pos|Tense=Pres|VerbForm=Fin": {
- POS: AUX,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "AUX_Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Imp|VerbForm=Fin": {
- POS: AUX,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Neg",
- "Tense": "Imp",
- "VerbForm": "Fin",
- },
- "AUX_Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Tense=Pres|VerbForm=Fin": {
- POS: AUX,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "AUX_Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Polarity=Pos|Tense=Imp|VerbForm=Fin": {
- POS: AUX,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Tense": "Imp",
- "VerbForm": "Fin",
- },
- "AUX_Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Polarity=Pos|Tense=Pres|VerbForm=Fin": {
- POS: AUX,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "AUX_Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Polarity=Neg|Tense=Pres|VerbForm=Fin": {
- POS: AUX,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "AUX_Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Polarity=Pos|Tense=Pres|VerbForm=Fin": {
- POS: AUX,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "AUX_Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Polarity=Neg|Tense=Imp|VerbForm=Fin": {
- POS: AUX,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Imp",
- "VerbForm": "Fin",
- },
- "AUX_Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Polarity=Neg|Tense=Pres|VerbForm=Fin": {
- POS: AUX,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "AUX_Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Polarity=Pos|Tense=Imp|VerbForm=Fin": {
- POS: AUX,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Imp",
- "VerbForm": "Fin",
- },
- "AUX_Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Polarity=Pos|Tense=Pres|VerbForm=Fin": {
- POS: AUX,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "AUX_Aspect=Imp|VerbForm=Part": {POS: AUX, "Aspect": "Imp", "VerbForm": "Part"},
- "AUX_Aspect=Perf|VerbForm=Part": {POS: AUX, "Aspect": "Perf", "VerbForm": "Part"},
- "AUX_Aspect=Prosp|VerbForm=Part": {POS: AUX, "Aspect": "Prosp", "VerbForm": "Part"},
- "AUX_Polarity=Pos": {POS: AUX, "Polarity": "Pos"},
- "CCONJ_ConjType=Comp": {POS: CCONJ, "ConjType": "Comp"},
- "CCONJ_ConjType=Comp|Style=Coll": {POS: CCONJ, "ConjType": "Comp", "Style": "Coll"},
- "DET_Case=Gen|Distance=Med|Number=Plur|Poss=Yes|PronType=Dem": {
- POS: DET,
- "Case": "Gen",
- "Number": "Plur",
- "Poss": "Yes",
- "PronType": "Dem",
- },
- "DET_Case=Gen|Distance=Med|Number=Sing|Poss=Yes|PronType=Dem": {
- POS: DET,
- "Case": "Gen",
- "Number": "Sing",
- "Poss": "Yes",
- "PronType": "Dem",
- },
- "DET_Case=Gen|Number=Plur|Person=1|Poss=Yes|PronType=Prs": {
- POS: DET,
- "Case": "Gen",
- "Number": "Plur",
- "Person": "one",
- "Poss": "Yes",
- "PronType": "Prs",
- },
- "DET_Case=Gen|Number=Plur|Person=2|Polite=Infm|Poss=Yes|PronType=Prs": {
- POS: DET,
- "Case": "Gen",
- "Number": "Plur",
- "Person": "two",
- "Poss": "Yes",
- "PronType": "Prs",
- },
- "DET_Case=Gen|Number=Plur|Person=3|Poss=Yes|PronType=Emp": {
- POS: DET,
- "Case": "Gen",
- "Number": "Plur",
- "Person": "three",
- "Poss": "Yes",
- },
- "DET_Case=Gen|Number=Plur|Person=3|Poss=Yes|PronType=Emp|Reflex=Yes": {
- POS: DET,
- "Case": "Gen",
- "Number": "Plur",
- "Person": "three",
- "Poss": "Yes",
- "Reflex": "Yes",
- },
- "DET_Case=Gen|Number=Sing|Person=1|Poss=Yes|PronType=Prs": {
- POS: DET,
- "Case": "Gen",
- "Number": "Sing",
- "Person": "one",
- "Poss": "Yes",
- "PronType": "Prs",
- },
- "DET_Case=Gen|Number=Sing|Person=2|Polite=Infm|Poss=Yes|PronType=Prs": {
- POS: DET,
- "Case": "Gen",
- "Number": "Sing",
- "Person": "two",
- "Poss": "Yes",
- "PronType": "Prs",
- },
- "DET_Case=Gen|Number=Sing|Person=3|Poss=Yes|PronType=Emp": {
- POS: DET,
- "Case": "Gen",
- "Number": "Sing",
- "Person": "three",
- "Poss": "Yes",
- },
- "DET_Case=Gen|Number=Sing|Person=3|Poss=Yes|PronType=Emp|Reflex=Yes": {
- POS: DET,
- "Case": "Gen",
- "Number": "Sing",
- "Person": "three",
- "Poss": "Yes",
- "Reflex": "Yes",
- },
- "DET_Case=Gen|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {
- POS: DET,
- "Case": "Gen",
- "Number": "Sing",
- "Person": "three",
- "Poss": "Yes",
- "PronType": "Prs",
- },
- "DET_Case=Gen|Number=Sing|Poss=Yes|PronType=Rel": {
- POS: DET,
- "Case": "Gen",
- "Number": "Sing",
- "Poss": "Yes",
- "PronType": "Rel",
- },
- "DET_Distance=Dist|PronType=Dem": {POS: DET, "PronType": "Dem"},
- "DET_Distance=Dist|PronType=Dem|Style=Coll": {
- POS: DET,
- "PronType": "Dem",
- "Style": "Coll",
- },
- "DET_Distance=Dist|PronType=Dem|Style=Vrnc": {
- POS: DET,
- "PronType": "Dem",
- "Style": "Vrnc",
- },
- "DET_Distance=Med|PronType=Dem": {POS: DET, "PronType": "Dem"},
- "DET_Distance=Med|PronType=Dem|Style=Coll": {
- POS: DET,
- "PronType": "Dem",
- "Style": "Coll",
- },
- "DET_Distance=Prox|PronType=Dem": {POS: DET, "PronType": "Dem"},
- "DET_Distance=Prox|PronType=Dem|Style=Coll": {
- POS: DET,
- "PronType": "Dem",
- "Style": "Coll",
- },
- "DET_PronType=Art": {POS: DET, "PronType": "Art"},
- "DET_PronType=Exc": {POS: DET, "PronType": "Exc"},
- "DET_PronType=Ind": {POS: DET, "PronType": "Ind"},
- "DET_PronType=Int": {POS: DET, "PronType": "Int"},
- "DET_PronType=Tot": {POS: DET, "PronType": "Tot"},
- "DET_PronType=Tot|Style=Arch": {POS: DET, "PronType": "Tot", "Style": "Arch"},
- "INTJ_Style=Vrnc": {POS: INTJ, "Style": "Vrnc"},
- "NOUN_Abbr=Yes|Animacy=Nhum|Case=Dat|Definite=Ind|Number=Plur": {
- POS: NOUN,
- "Abbr": "Yes",
- "Animacy": "Nhum",
- "Case": "Dat",
- "Definite": "Ind",
- "Number": "Plur",
- },
- "NOUN_Abbr=Yes|Animacy=Nhum|Case=Nom|Definite=Ind|Number=Sing": {
- POS: NOUN,
- "Abbr": "Yes",
- "Animacy": "Nhum",
- "Case": "Nom",
- "Definite": "Ind",
- "Number": "Sing",
- },
- "NOUN_Animacy=Hum|Case=Abl|Definite=Ind|Number=Plur": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Abl",
- "Definite": "Ind",
- "Number": "Plur",
- },
- "NOUN_Animacy=Hum|Case=Abl|Definite=Ind|Number=Plur|Style=Slng": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Abl",
- "Definite": "Ind",
- "Number": "Plur",
- },
- "NOUN_Animacy=Hum|Case=Abl|Definite=Ind|Number=Sing": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Abl",
- "Definite": "Ind",
- "Number": "Sing",
- },
- "NOUN_Animacy=Hum|Case=Dat|Definite=Def|Number=Plur": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Dat",
- "Definite": "Def",
- "Number": "Plur",
- },
- "NOUN_Animacy=Hum|Case=Dat|Definite=Def|Number=Sing": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Dat",
- "Definite": "Def",
- "Number": "Sing",
- },
- "NOUN_Animacy=Hum|Case=Dat|Definite=Def|Number=Sing|Style=Slng": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Dat",
- "Definite": "Def",
- "Number": "Sing",
- },
- "NOUN_Animacy=Hum|Case=Dat|Definite=Ind|Number=Assoc": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Dat",
- "Definite": "Ind",
- },
- "NOUN_Animacy=Hum|Case=Dat|Definite=Ind|Number=Plur": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Dat",
- "Definite": "Ind",
- "Number": "Plur",
- },
- "NOUN_Animacy=Hum|Case=Dat|Definite=Ind|Number=Plur|Style=Coll": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Dat",
- "Definite": "Ind",
- "Number": "Plur",
- "Style": "Coll",
- },
- "NOUN_Animacy=Hum|Case=Dat|Definite=Ind|Number=Plur|Style=Slng": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Dat",
- "Definite": "Ind",
- "Number": "Plur",
- },
- "NOUN_Animacy=Hum|Case=Dat|Definite=Ind|Number=Sing": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Dat",
- "Definite": "Ind",
- "Number": "Sing",
- },
- "NOUN_Animacy=Hum|Case=Dat|Definite=Ind|Number=Sing|Style=Arch": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Dat",
- "Definite": "Ind",
- "Number": "Sing",
- "Style": "Arch",
- },
- "NOUN_Animacy=Hum|Case=Dat|Number=Sing|Number=Sing|Person=1": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Dat",
- "Number": "Sing",
- "Number": "Sing",
- "Person": "one",
- },
- "NOUN_Animacy=Hum|Case=Dat|Number=Sing|Number=Sing|Person=1|Style=Coll": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Dat",
- "Number": "Sing",
- "Number": "Sing",
- "Person": "one",
- "Style": "Coll",
- },
- "NOUN_Animacy=Hum|Case=Ins|Definite=Ind|Number=Sing": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Ins",
- "Definite": "Ind",
- "Number": "Sing",
- },
- "NOUN_Animacy=Hum|Case=Nom|Definite=Def|Number=Plur": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Nom",
- "Definite": "Def",
- "Number": "Plur",
- },
- "NOUN_Animacy=Hum|Case=Nom|Definite=Def|Number=Plur|Style=Slng": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Nom",
- "Definite": "Def",
- "Number": "Plur",
- },
- "NOUN_Animacy=Hum|Case=Nom|Definite=Def|Number=Sing": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Nom",
- "Definite": "Def",
- "Number": "Sing",
- },
- "NOUN_Animacy=Hum|Case=Nom|Definite=Def|Number=Sing|Style=Coll": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Nom",
- "Definite": "Def",
- "Number": "Sing",
- "Style": "Coll",
- },
- "NOUN_Animacy=Hum|Case=Nom|Definite=Ind|Number=Assoc": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Nom",
- "Definite": "Ind",
- },
- "NOUN_Animacy=Hum|Case=Nom|Definite=Ind|Number=Plur": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Nom",
- "Definite": "Ind",
- "Number": "Plur",
- },
- "NOUN_Animacy=Hum|Case=Nom|Definite=Ind|Number=Plur|Style=Coll": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Nom",
- "Definite": "Ind",
- "Number": "Plur",
- "Style": "Coll",
- },
- "NOUN_Animacy=Hum|Case=Nom|Definite=Ind|Number=Plur|Style=Slng": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Nom",
- "Definite": "Ind",
- "Number": "Plur",
- },
- "NOUN_Animacy=Hum|Case=Nom|Definite=Ind|Number=Plur|Typo=Yes": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Nom",
- "Definite": "Ind",
- "Number": "Plur",
- "Typo": "Yes",
- },
- "NOUN_Animacy=Hum|Case=Nom|Definite=Ind|Number=Sing": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Nom",
- "Definite": "Ind",
- "Number": "Sing",
- },
- "NOUN_Animacy=Hum|Case=Nom|Definite=Ind|Number=Sing|Style=Coll": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Nom",
- "Definite": "Ind",
- "Number": "Sing",
- "Style": "Coll",
- },
- "NOUN_Animacy=Hum|Case=Nom|Number=Sing|Number=Sing|Person=1": {
- POS: NOUN,
- "Animacy": "Hum",
- "Case": "Nom",
- "Number": "Sing",
- "Number": "Sing",
- "Person": "one",
- },
- "NOUN_Animacy=Nhum|Case=Abl|Definite=Ind|Number=Coll": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Abl",
- "Definite": "Ind",
- },
- "NOUN_Animacy=Nhum|Case=Abl|Definite=Ind|Number=Plur": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Abl",
- "Definite": "Ind",
- "Number": "Plur",
- },
- "NOUN_Animacy=Nhum|Case=Abl|Definite=Ind|Number=Sing": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Abl",
- "Definite": "Ind",
- "Number": "Sing",
- },
- "NOUN_Animacy=Nhum|Case=Abl|Definite=Ind|Number=Sing|Style=Arch": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Abl",
- "Definite": "Ind",
- "Number": "Sing",
- "Style": "Arch",
- },
- "NOUN_Animacy=Nhum|Case=Abl|Number=Sing|Number=Sing|Person=2": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Abl",
- "Number": "Sing",
- "Number": "Sing",
- "Person": "two",
- },
- "NOUN_Animacy=Nhum|Case=Dat|Definite=Def|Number=Coll": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Dat",
- "Definite": "Def",
- },
- "NOUN_Animacy=Nhum|Case=Dat|Definite=Def|Number=Plur": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Dat",
- "Definite": "Def",
- "Number": "Plur",
- },
- "NOUN_Animacy=Nhum|Case=Dat|Definite=Def|Number=Sing|NumForm=Digit": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Dat",
- "Definite": "Def",
- "Number": "Sing",
- "NumForm": "Digit",
- },
- "NOUN_Animacy=Nhum|Case=Dat|Definite=Def|Number=Sing|NumForm=Word": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Dat",
- "Definite": "Def",
- "Number": "Sing",
- "NumForm": "Word",
- },
- "NOUN_Animacy=Nhum|Case=Dat|Definite=Def|Number=Sing": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Dat",
- "Definite": "Def",
- "Number": "Sing",
- },
- "NOUN_Animacy=Nhum|Case=Dat|Definite=Def|Number=Sing|Style=Rare": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Dat",
- "Definite": "Def",
- "Number": "Sing",
- "Style": "Rare",
- },
- "NOUN_Animacy=Nhum|Case=Dat|Definite=Def|Number=Sing|Style=Vrnc": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Dat",
- "Definite": "Def",
- "Number": "Sing",
- "Style": "Vrnc",
- },
- "NOUN_Animacy=Nhum|Case=Dat|Definite=Ind|Number=Coll": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Dat",
- "Definite": "Ind",
- },
- "NOUN_Animacy=Nhum|Case=Dat|Definite=Ind|Number=Plur": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Dat",
- "Definite": "Ind",
- "Number": "Plur",
- },
- "NOUN_Animacy=Nhum|Case=Dat|Definite=Ind|Number=Sing|NumForm=Digit": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Dat",
- "Definite": "Ind",
- "Number": "Sing",
- "NumForm": "Digit",
- },
- "NOUN_Animacy=Nhum|Case=Dat|Definite=Ind|Number=Sing": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Dat",
- "Definite": "Ind",
- "Number": "Sing",
- },
- "NOUN_Animacy=Nhum|Case=Dat|Definite=Ind|Number=Sing|Style=Coll": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Dat",
- "Definite": "Ind",
- "Number": "Sing",
- "Style": "Coll",
- },
- "NOUN_Animacy=Nhum|Case=Dat|Definite=Ind|Number=Sing|Style=Vrnc": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Dat",
- "Definite": "Ind",
- "Number": "Sing",
- "Style": "Vrnc",
- },
- "NOUN_Animacy=Nhum|Case=Dat|Number=Coll|Number=Sing|Person=1": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Dat",
- #
- "Number": "Sing",
- "Person": "one",
- },
- "NOUN_Animacy=Nhum|Case=Dat|Number=Sing|Number=Sing|Person=1": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Dat",
- "Number": "Sing",
- "Number": "Sing",
- "Person": "one",
- },
- "NOUN_Animacy=Nhum|Case=Dat|Number=Sing|Number=Sing|Person=2": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Dat",
- "Number": "Sing",
- "Number": "Sing",
- "Person": "two",
- },
- "NOUN_Animacy=Nhum|Case=Gen|Definite=Ind|Number=Sing|Style=Arch": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Gen",
- "Definite": "Ind",
- "Number": "Sing",
- "Style": "Arch",
- },
- "NOUN_Animacy=Nhum|Case=Ins|Definite=Ind|Number=Coll": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Ins",
- "Definite": "Ind",
- },
- "NOUN_Animacy=Nhum|Case=Ins|Definite=Ind|Number=Plur": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Ins",
- "Definite": "Ind",
- "Number": "Plur",
- },
- "NOUN_Animacy=Nhum|Case=Ins|Definite=Ind|Number=Sing": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Ins",
- "Definite": "Ind",
- "Number": "Sing",
- },
- "NOUN_Animacy=Nhum|Case=Ins|Definite=Ind|Number=Sing|Style=Coll": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Ins",
- "Definite": "Ind",
- "Number": "Sing",
- "Style": "Coll",
- },
- "NOUN_Animacy=Nhum|Case=Ins|Number=Sing|Number=Sing|Person=1": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Ins",
- "Number": "Sing",
- "Number": "Sing",
- "Person": "one",
- },
- "NOUN_Animacy=Nhum|Case=Loc|Definite=Ind|Number=Plur": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Loc",
- "Definite": "Ind",
- "Number": "Plur",
- },
- "NOUN_Animacy=Nhum|Case=Loc|Definite=Ind|Number=Sing": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Loc",
- "Definite": "Ind",
- "Number": "Sing",
- },
- "NOUN_Animacy=Nhum|Case=Loc|Number=Sing|Number=Sing|Person=2": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Loc",
- "Number": "Sing",
- "Number": "Sing",
- "Person": "two",
- },
- "NOUN_Animacy=Nhum|Case=Nom|Definite=Def|Number=Coll": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Nom",
- "Definite": "Def",
- },
- "NOUN_Animacy=Nhum|Case=Nom|Definite=Def|Number=Plur|Number=Sing|Poss=Yes": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Nom",
- "Definite": "Def",
- # "Number": "Plur",
- "Number": "Sing",
- "Poss": "Yes",
- },
- "NOUN_Animacy=Nhum|Case=Nom|Definite=Def|Number=Plur": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Nom",
- "Definite": "Def",
- "Number": "Plur",
- },
- "NOUN_Animacy=Nhum|Case=Nom|Definite=Def|Number=Sing|NumForm=Digit": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Nom",
- "Definite": "Def",
- "Number": "Sing",
- "NumForm": "Digit",
- },
- "NOUN_Animacy=Nhum|Case=Nom|Definite=Def|Number=Sing": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Nom",
- "Definite": "Def",
- "Number": "Sing",
- },
- "NOUN_Animacy=Nhum|Case=Nom|Definite=Ind|Number=Coll": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Nom",
- "Definite": "Ind",
- },
- "NOUN_Animacy=Nhum|Case=Nom|Definite=Ind|Number=Coll|Typo=Yes": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Nom",
- "Definite": "Ind",
- "Typo": "Yes",
- },
- "NOUN_Animacy=Nhum|Case=Nom|Definite=Ind|Number=Plur": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Nom",
- "Definite": "Ind",
- "Number": "Plur",
- },
- "NOUN_Animacy=Nhum|Case=Nom|Definite=Ind|Number=Sing": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Nom",
- "Definite": "Ind",
- "Number": "Sing",
- },
- "NOUN_Animacy=Nhum|Case=Nom|Definite=Ind": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Nom",
- "Definite": "Ind",
- },
- "NOUN_Animacy=Nhum|Case=Nom|Number=Plur|Number=Sing|Person=2": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Nom",
- # "Number": "Plur",
- "Number": "Sing",
- "Person": "two",
- },
- "NOUN_Animacy=Nhum|Case=Nom|Number=Sing|Number=Sing|Person=1": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Nom",
- "Number": "Sing",
- "Number": "Sing",
- "Person": "one",
- },
- "NOUN_Animacy=Nhum|Case=Nom|Number=Sing|Number=Sing|Person=2": {
- POS: NOUN,
- "Animacy": "Nhum",
- "Case": "Nom",
- "Number": "Sing",
- "Number": "Sing",
- "Person": "two",
- },
- "NUM_NumForm=Digit|NumType=Card": {POS: NUM, "NumForm": "Digit", "NumType": "Card"},
- "NUM_NumForm=Digit|NumType=Frac|Typo=Yes": {
- POS: NUM,
- "NumForm": "Digit",
- "NumType": "Frac",
- "Typo": "Yes",
- },
- "NUM_NumForm=Digit|NumType=Range": {POS: NUM, "NumForm": "Digit",},
- "NUM_NumForm=Word|NumType=Card": {POS: NUM, "NumForm": "Word", "NumType": "Card"},
- "NUM_NumForm=Word|NumType=Dist": {POS: NUM, "NumForm": "Word", "NumType": "Dist"},
- "NUM_NumForm=Word|NumType=Range": {POS: NUM, "NumForm": "Word",},
- "PART_Polarity=Neg": {POS: PART, "Polarity": "Neg"},
- "PRON_Case=Abl|Definite=Ind|Number=Sing|Person=3|PronType=Prs": {
- POS: PRON,
- "Case": "Abl",
- "Definite": "Ind",
- "Number": "Sing",
- "Person": "three",
- "PronType": "Prs",
- },
- "PRON_Case=Abl|Number=Plur|Person=3|PronType=Prs": {
- POS: PRON,
- "Case": "Abl",
- "Number": "Plur",
- "Person": "three",
- "PronType": "Prs",
- },
- "PRON_Case=Abl|Number=Sing|Person=2|Polite=Infm|PronType=Prs": {
- POS: PRON,
- "Case": "Abl",
- "Number": "Sing",
- "Person": "two",
- "PronType": "Prs",
- },
- "PRON_Case=Dat|Definite=Def|Distance=Dist|Number=Sing|PronType=Dem": {
- POS: PRON,
- "Case": "Dat",
- "Definite": "Def",
- "Number": "Sing",
- "PronType": "Dem",
- },
- "PRON_Case=Dat|Definite=Def|Number=Sing|Person=3|PronType=Prs": {
- POS: PRON,
- "Case": "Dat",
- "Definite": "Def",
- "Number": "Sing",
- "Person": "three",
- "PronType": "Prs",
- },
- "PRON_Case=Dat|Definite=Ind|Number=Sing|PronType=Int": {
- POS: PRON,
- "Case": "Dat",
- "Definite": "Ind",
- "Number": "Sing",
- "PronType": "Int",
- },
- "PRON_Case=Dat|Distance=Dist|Number=Sing|PronType=Dem": {
- POS: PRON,
- "Case": "Dat",
- "Number": "Sing",
- "PronType": "Dem",
- },
- "PRON_Case=Dat|Distance=Med|Number=Plur|PronType=Dem": {
- POS: PRON,
- "Case": "Dat",
- "Number": "Plur",
- "PronType": "Dem",
- },
- "PRON_Case=Dat|Number=Plur|Person=1|PronType=Prs": {
- POS: PRON,
- "Case": "Dat",
- "Number": "Plur",
- "Person": "one",
- "PronType": "Prs",
- },
- "PRON_Case=Dat|Number=Plur|Person=2|Polite=Infm|PronType=Prs": {
- POS: PRON,
- "Case": "Dat",
- "Number": "Plur",
- "Person": "two",
- "PronType": "Prs",
- },
- "PRON_Case=Dat|Number=Plur|Person=3|PronType=Emp|Reflex=Yes": {
- POS: PRON,
- "Case": "Dat",
- "Number": "Plur",
- "Person": "three",
- "Reflex": "Yes",
- },
- "PRON_Case=Dat|Number=Plur|Person=3|PronType=Prs": {
- POS: PRON,
- "Case": "Dat",
- "Number": "Plur",
- "Person": "three",
- "PronType": "Prs",
- },
- "PRON_Case=Dat|Number=Plur|PronType=Rcp": {
- POS: PRON,
- "Case": "Dat",
- "Number": "Plur",
- "PronType": "Rcp",
- },
- "PRON_Case=Dat|Number=Sing|Person=1|PronType=Prs": {
- POS: PRON,
- "Case": "Dat",
- "Number": "Sing",
- "Person": "one",
- "PronType": "Prs",
- },
- "PRON_Case=Dat|Number=Sing|Person=2|Polite=Infm|PronType=Prs": {
- POS: PRON,
- "Case": "Dat",
- "Number": "Sing",
- "Person": "two",
- "PronType": "Prs",
- },
- "PRON_Case=Dat|Number=Sing|Person=3|PronType=Emp": {
- POS: PRON,
- "Case": "Dat",
- "Number": "Sing",
- "Person": "three",
- },
- "PRON_Case=Dat|Number=Sing|Person=3|PronType=Emp|Reflex=Yes": {
- POS: PRON,
- "Case": "Dat",
- "Number": "Sing",
- "Person": "three",
- "Reflex": "Yes",
- },
- "PRON_Case=Dat|Number=Sing|PronType=Int": {
- POS: PRON,
- "Case": "Dat",
- "Number": "Sing",
- "PronType": "Int",
- },
- "PRON_Case=Dat|Number=Sing|PronType=Rel": {
- POS: PRON,
- "Case": "Dat",
- "Number": "Sing",
- "PronType": "Rel",
- },
- "PRON_Case=Dat|PronType=Tot": {POS: PRON, "Case": "Dat", "PronType": "Tot"},
- "PRON_Case=Gen|Distance=Med|Number=Sing|PronType=Dem": {
- POS: PRON,
- "Case": "Gen",
- "Number": "Sing",
- "PronType": "Dem",
- },
- "PRON_Case=Gen|Number=Plur|Person=1|PronType=Prs": {
- POS: PRON,
- "Case": "Gen",
- "Number": "Plur",
- "Person": "one",
- "PronType": "Prs",
- },
- "PRON_Case=Gen|Number=Sing|Person=2|PronType=Prs": {
- POS: PRON,
- "Case": "Gen",
- "Number": "Sing",
- "Person": "two",
- "PronType": "Prs",
- },
- "PRON_Case=Gen|Number=Sing|Person=3|PronType=Prs": {
- POS: PRON,
- "Case": "Gen",
- "Number": "Sing",
- "Person": "three",
- "PronType": "Prs",
- },
- "PRON_Case=Gen|PronType=Tot": {POS: PRON, "Case": "Gen", "PronType": "Tot"},
- "PRON_Case=Ins|Definite=Ind|Number=Sing|PronType=Rel": {
- POS: PRON,
- "Case": "Ins",
- "Definite": "Ind",
- "Number": "Sing",
- "PronType": "Rel",
- },
- "PRON_Case=Ins|Distance=Med|Number=Sing|PronType=Dem": {
- POS: PRON,
- "Case": "Ins",
- "Number": "Sing",
- "PronType": "Dem",
- },
- "PRON_Case=Loc|Definite=Ind|Number=Sing|PronType=Rel": {
- POS: PRON,
- "Case": "Loc",
- "Definite": "Ind",
- "Number": "Sing",
- "PronType": "Rel",
- },
- "PRON_Case=Loc|Distance=Med|Number=Sing|PronType=Dem": {
- POS: PRON,
- "Case": "Loc",
- "Number": "Sing",
- "PronType": "Dem",
- },
- "PRON_Case=Nom|Definite=Def|Distance=Dist|Number=Plur|PronType=Dem": {
- POS: PRON,
- "Case": "Nom",
- "Definite": "Def",
- "Number": "Plur",
- "PronType": "Dem",
- },
- "PRON_Case=Nom|Definite=Def|Distance=Med|Number=Sing|PronType=Dem|Style=Coll": {
- POS: PRON,
- "Case": "Nom",
- "Definite": "Def",
- "Number": "Sing",
- "PronType": "Dem",
- "Style": "Coll",
- },
- "PRON_Case=Nom|Definite=Def|Number=Sing|PronType=Int": {
- POS: PRON,
- "Case": "Nom",
- "Definite": "Def",
- "Number": "Sing",
- "PronType": "Int",
- },
- "PRON_Case=Nom|Definite=Def|Number=Sing|PronType=Rel": {
- POS: PRON,
- "Case": "Nom",
- "Definite": "Def",
- "Number": "Sing",
- "PronType": "Rel",
- },
- "PRON_Case=Nom|Definite=Ind|Number=Sing|PronType=Int": {
- POS: PRON,
- "Case": "Nom",
- "Definite": "Ind",
- "Number": "Sing",
- "PronType": "Int",
- },
- "PRON_Case=Nom|Definite=Ind|Number=Sing|PronType=Neg": {
- POS: PRON,
- "Case": "Nom",
- "Definite": "Ind",
- "Number": "Sing",
- "PronType": "Neg",
- },
- "PRON_Case=Nom|Definite=Ind|Number=Sing|PronType=Rel": {
- POS: PRON,
- "Case": "Nom",
- "Definite": "Ind",
- "Number": "Sing",
- "PronType": "Rel",
- },
- "PRON_Case=Nom|Distance=Dist|Number=Plur|Person=1|PronType=Dem": {
- POS: PRON,
- "Case": "Nom",
- "Number": "Plur",
- "Person": "one",
- "PronType": "Dem",
- },
- "PRON_Case=Nom|Distance=Med|Number=Plur|PronType=Dem": {
- POS: PRON,
- "Case": "Nom",
- "Number": "Plur",
- "PronType": "Dem",
- },
- "PRON_Case=Nom|Distance=Med|Number=Sing|PronType=Dem": {
- POS: PRON,
- "Case": "Nom",
- "Number": "Sing",
- "PronType": "Dem",
- },
- "PRON_Case=Nom|Distance=Prox|Number=Sing|PronType=Dem": {
- POS: PRON,
- "Case": "Nom",
- "Number": "Sing",
- "PronType": "Dem",
- },
- "PRON_Case=Nom|Number=Plur|Person=1|PronType=Prs": {
- POS: PRON,
- "Case": "Nom",
- "Number": "Plur",
- "Person": "one",
- "PronType": "Prs",
- },
- "PRON_Case=Nom|Number=Plur|Person=3|PronType=Emp": {
- POS: PRON,
- "Case": "Nom",
- "Number": "Plur",
- "Person": "three",
- },
- "PRON_Case=Nom|Number=Plur|Person=3|PronType=Prs": {
- POS: PRON,
- "Case": "Nom",
- "Number": "Plur",
- "Person": "three",
- "PronType": "Prs",
- },
- "PRON_Case=Nom|Number=Plur|PronType=Rel": {
- POS: PRON,
- "Case": "Nom",
- "Number": "Plur",
- "PronType": "Rel",
- },
- "PRON_Case=Nom|Number=Sing|Number=Plur|Person=3|Person=1|PronType=Emp": {
- POS: PRON,
- "Case": "Nom",
- # "Number": "Sing",
- "Number": "Plur",
- # "Person": "three",
- "Person": "one",
- },
- "PRON_Case=Nom|Number=Sing|Person=1|PronType=Int": {
- POS: PRON,
- "Case": "Nom",
- "Number": "Sing",
- "Person": "one",
- "PronType": "Int",
- },
- "PRON_Case=Nom|Number=Sing|Person=1|PronType=Prs": {
- POS: PRON,
- "Case": "Nom",
- "Number": "Sing",
- "Person": "one",
- "PronType": "Prs",
- },
- "PRON_Case=Nom|Number=Sing|Person=2|Polite=Infm|PronType=Prs": {
- POS: PRON,
- "Case": "Nom",
- "Number": "Sing",
- "Person": "two",
- "PronType": "Prs",
- },
- "PRON_Case=Nom|Number=Sing|Person=3|PronType=Emp": {
- POS: PRON,
- "Case": "Nom",
- "Number": "Sing",
- "Person": "three",
- },
- "PRON_Case=Nom|Number=Sing|Person=3|PronType=Prs": {
- POS: PRON,
- "Case": "Nom",
- "Number": "Sing",
- "Person": "three",
- "PronType": "Prs",
- },
- "PRON_Case=Nom|Number=Sing|PronType=Int": {
- POS: PRON,
- "Case": "Nom",
- "Number": "Sing",
- "PronType": "Int",
- },
- "PRON_Case=Nom|Number=Sing|PronType=Rel": {
- POS: PRON,
- "Case": "Nom",
- "Number": "Sing",
- "PronType": "Rel",
- },
- "PRON_Case=Nom|Person=1|PronType=Tot": {
- POS: PRON,
- "Case": "Nom",
- "Person": "one",
- "PronType": "Tot",
- },
- "PRON_Case=Nom|PronType=Ind": {POS: PRON, "Case": "Nom", "PronType": "Ind"},
- "PRON_Case=Nom|PronType=Tot": {POS: PRON, "Case": "Nom", "PronType": "Tot"},
- "PRON_Distance=Dist|Number=Sing|PronType=Dem": {
- POS: PRON,
- "Number": "Sing",
- "PronType": "Dem",
- },
- "PRON_Distance=Med|PronType=Dem|Style=Coll": {
- POS: PRON,
- "PronType": "Dem",
- "Style": "Coll",
- },
- "PRON_Distance=Prox|PronType=Dem|Style=Coll": {
- POS: PRON,
- "PronType": "Dem",
- "Style": "Coll",
- },
- "PRON_Number=Plur|PronType=Rel": {POS: PRON, "Number": "Plur", "PronType": "Rel"},
- "PROPN_Abbr=Yes|Animacy=Hum|Case=Nom|Definite=Ind|NameType=Giv|Number=Sing": {
- POS: PROPN,
- "Abbr": "Yes",
- "Animacy": "Hum",
- "Case": "Nom",
- "Definite": "Ind",
- "NameType": "Giv",
- "Number": "Sing",
- },
- "PROPN_Abbr=Yes|Animacy=Nhum|Case=Nom|Definite=Ind|NameType=Com|Number=Sing": {
- POS: PROPN,
- "Abbr": "Yes",
- "Animacy": "Nhum",
- "Case": "Nom",
- "Definite": "Ind",
- "NameType": "Com",
- "Number": "Sing",
- },
- "PROPN_Animacy=Hum|Case=Dat|Definite=Def|NameType=Sur|Number=Sing": {
- POS: PROPN,
- "Animacy": "Hum",
- "Case": "Dat",
- "Definite": "Def",
- "NameType": "Sur",
- "Number": "Sing",
- },
- "PROPN_Animacy=Hum|Case=Dat|Definite=Ind|NameType=Prs|Number=Sing": {
- POS: PROPN,
- "Animacy": "Hum",
- "Case": "Dat",
- "Definite": "Ind",
- "NameType": "Prs",
- "Number": "Sing",
- },
- "PROPN_Animacy=Hum|Case=Dat|Definite=Ind|NameType=Sur|Number=Sing": {
- POS: PROPN,
- "Animacy": "Hum",
- "Case": "Dat",
- "Definite": "Ind",
- "NameType": "Sur",
- "Number": "Sing",
- },
- "PROPN_Animacy=Hum|Case=Nom|Definite=Def|NameType=Giv|Number=Sing": {
- POS: PROPN,
- "Animacy": "Hum",
- "Case": "Nom",
- "Definite": "Def",
- "NameType": "Giv",
- "Number": "Sing",
- },
- "PROPN_Animacy=Hum|Case=Nom|Definite=Def|NameType=Sur|Number=Sing": {
- POS: PROPN,
- "Animacy": "Hum",
- "Case": "Nom",
- "Definite": "Def",
- "NameType": "Sur",
- "Number": "Sing",
- },
- "PROPN_Animacy=Hum|Case=Nom|Definite=Ind|NameType=Giv|Number=Sing": {
- POS: PROPN,
- "Animacy": "Hum",
- "Case": "Nom",
- "Definite": "Ind",
- "NameType": "Giv",
- "Number": "Sing",
- },
- "PROPN_Animacy=Hum|Case=Nom|Definite=Ind|NameType=Sur|Number=Sing": {
- POS: PROPN,
- "Animacy": "Hum",
- "Case": "Nom",
- "Definite": "Ind",
- "NameType": "Sur",
- "Number": "Sing",
- },
- "PROPN_Animacy=Nhum|Case=Abl|Definite=Ind|NameType=Geo|Number=Coll": {
- POS: PROPN,
- "Animacy": "Nhum",
- "Case": "Abl",
- "Definite": "Ind",
- "NameType": "Geo",
- },
- "PROPN_Animacy=Nhum|Case=Abl|Definite=Ind|NameType=Geo|Number=Sing": {
- POS: PROPN,
- "Animacy": "Nhum",
- "Case": "Abl",
- "Definite": "Ind",
- "NameType": "Geo",
- "Number": "Sing",
- },
- "PROPN_Animacy=Nhum|Case=Abl|Definite=Ind|Number=Plur": {
- POS: PROPN,
- "Animacy": "Nhum",
- "Case": "Abl",
- "Definite": "Ind",
- "Number": "Plur",
- },
- "PROPN_Animacy=Nhum|Case=Dat|Definite=Ind|NameType=Geo|Number=Sing": {
- POS: PROPN,
- "Animacy": "Nhum",
- "Case": "Dat",
- "Definite": "Ind",
- "NameType": "Geo",
- "Number": "Sing",
- },
- "PROPN_Animacy=Nhum|Case=Dat|Definite=Ind|NameType=Geo|Number=Sing|Style=Coll": {
- POS: PROPN,
- "Animacy": "Nhum",
- "Case": "Dat",
- "Definite": "Ind",
- "NameType": "Geo",
- "Number": "Sing",
- "Style": "Coll",
- },
- "PROPN_Animacy=Nhum|Case=Loc|Definite=Ind|NameType=Geo|Number=Sing": {
- POS: PROPN,
- "Animacy": "Nhum",
- "Case": "Loc",
- "Definite": "Ind",
- "NameType": "Geo",
- "Number": "Sing",
- },
- "PROPN_Animacy=Nhum|Case=Nom|Definite=Def|NameType=Geo|Number=Sing": {
- POS: PROPN,
- "Animacy": "Nhum",
- "Case": "Nom",
- "Definite": "Def",
- "NameType": "Geo",
- "Number": "Sing",
- },
- "PROPN_Animacy=Nhum|Case=Nom|Definite=Def|NameType=Pro|Number=Sing|Style=Coll": {
- POS: PROPN,
- "Animacy": "Nhum",
- "Case": "Nom",
- "Definite": "Def",
- "NameType": "Pro",
- "Number": "Sing",
- "Style": "Coll",
- },
- "PROPN_Animacy=Nhum|Case=Nom|Definite=Ind|NameType=Geo|Number=Coll": {
- POS: PROPN,
- "Animacy": "Nhum",
- "Case": "Nom",
- "Definite": "Ind",
- "NameType": "Geo",
- },
- "PROPN_Animacy=Nhum|Case=Nom|Definite=Ind|NameType=Geo|Number=Sing": {
- POS: PROPN,
- "Animacy": "Nhum",
- "Case": "Nom",
- "Definite": "Ind",
- "NameType": "Geo",
- "Number": "Sing",
- },
- "PROPN_Animacy=Nhum|Case=Nom|Definite=Ind|NameType=Geo|Number=Sing|Style=Vrnc": {
- POS: PROPN,
- "Animacy": "Nhum",
- "Case": "Nom",
- "Definite": "Ind",
- "NameType": "Geo",
- "Number": "Sing",
- "Style": "Vrnc",
- },
- "SCONJ_Style=Coll": {POS: SCONJ, "Style": "Coll"},
- "VERB_Aspect=Dur|Polarity=Neg|Subcat=Intr|VerbForm=Part|Voice=Pass": {
- POS: VERB,
- "Polarity": "Neg",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "VERB_Aspect=Dur|Polarity=Pos|Subcat=Intr|VerbForm=Part|Voice=Mid": {
- POS: VERB,
- "Polarity": "Pos",
- "VerbForm": "Part",
- "Voice": "Mid",
- },
- "VERB_Aspect=Dur|Polarity=Pos|Subcat=Intr|VerbForm=Part|Voice=Pass": {
- POS: VERB,
- "Polarity": "Pos",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "VERB_Aspect=Dur|Polarity=Pos|Subcat=Tran|VerbForm=Part|Voice=Act": {
- POS: VERB,
- "Polarity": "Pos",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "VERB_Aspect=Dur|Polarity=Pos|Subcat=Tran|VerbForm=Part|Voice=Mid": {
- POS: VERB,
- "Polarity": "Pos",
- "VerbForm": "Part",
- "Voice": "Mid",
- },
- "VERB_Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Polarity=Neg|Subcat=Tran|Tense=Pres|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Polarity=Pos|Subcat=Tran|Tense=Pres|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Polarity=Neg|Subcat=Intr|Tense=Pres|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Polarity=Pos|Subcat=Intr|Tense=Pres|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Polarity=Pos|Subcat=Tran|Tense=Imp|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Tense": "Imp",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Polarity=Neg|Subcat=Tran|Tense=Pres|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Polarity=Neg|Subcat=Intr|Tense=Pres|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Polarity=Neg|Subcat=Tran|Tense=Pres|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Polarity=Pos|Subcat=Intr|Tense=Pres|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Polarity=Pos|Subcat=Tran|Tense=Imp|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Imp",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Polarity=Pos|Subcat=Tran|Tense=Pres|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Imp",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Imp|Style=Coll|Subcat=Intr|VerbForm=Part|Voice=Mid": {
- POS: VERB,
- "Aspect": "Imp",
- "Style": "Coll",
- "VerbForm": "Part",
- "Voice": "Mid",
- },
- "VERB_Aspect=Imp|Style=Vrnc|Subcat=Intr|VerbForm=Part|Voice=Mid": {
- POS: VERB,
- "Aspect": "Imp",
- "Style": "Vrnc",
- "VerbForm": "Part",
- "Voice": "Mid",
- },
- "VERB_Aspect=Imp|Subcat=Intr|VerbForm=Part": {
- POS: VERB,
- "Aspect": "Imp",
- "VerbForm": "Part",
- },
- "VERB_Aspect=Imp|Subcat=Intr|VerbForm=Part|Voice=Act": {
- POS: VERB,
- "Aspect": "Imp",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "VERB_Aspect=Imp|Subcat=Intr|VerbForm=Part|Voice=Mid": {
- POS: VERB,
- "Aspect": "Imp",
- "VerbForm": "Part",
- "Voice": "Mid",
- },
- "VERB_Aspect=Imp|Subcat=Intr|VerbForm=Part|Voice=Pass": {
- POS: VERB,
- "Aspect": "Imp",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "VERB_Aspect=Imp|Subcat=Tran|VerbForm=Part|Voice=Act": {
- POS: VERB,
- "Aspect": "Imp",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "VERB_Aspect=Imp|Subcat=Tran|VerbForm=Part|Voice=Cau": {
- POS: VERB,
- "Aspect": "Imp",
- "VerbForm": "Part",
- "Voice": "Cau",
- },
- "VERB_Aspect=Iter|Case=Ins|Definite=Ind|Number=Coll|Polarity=Pos|Subcat=Intr|VerbForm=Gdv|Voice=Mid": {
- POS: VERB,
- "Aspect": "Iter",
- "Case": "Ins",
- "Definite": "Ind",
- "Polarity": "Pos",
- "VerbForm": "Gdv",
- "Voice": "Mid",
- },
- "VERB_Aspect=Iter|Case=Ins|Definite=Ind|Number=Coll|Polarity=Pos|Subcat=Tran|VerbForm=Gdv|Voice=Act": {
- POS: VERB,
- "Aspect": "Iter",
- "Case": "Ins",
- "Definite": "Ind",
- "Polarity": "Pos",
- "VerbForm": "Gdv",
- "Voice": "Act",
- },
- "VERB_Aspect=Iter": {POS: VERB, "Aspect": "Iter"},
- "VERB_Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Polarity=Neg|Subcat=Intr|Tense=Past|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Perf",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Polarity=Pos|Subcat=Intr|Tense=Past|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Perf",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Polarity=Pos|Subcat=Tran|Tense=Past|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Perf",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Perf|Mood=Ind|Number=Sing|Person=1|Polarity=Neg|Subcat=Intr|Tense=Past|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Perf",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Perf|Mood=Ind|Number=Sing|Person=1|Polarity=Pos|Style=Vrnc|Subcat=Tran|Tense=Past|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Perf",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Style": "Vrnc",
- "Tense": "Past",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Perf|Mood=Ind|Number=Sing|Person=1|Polarity=Pos|Subcat=Intr|Tense=Past|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Perf",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Perf|Mood=Ind|Number=Sing|Person=1|Polarity=Pos|Subcat=Tran|Tense=Past|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Perf",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Perf|Mood=Ind|Number=Sing|Person=2|Polarity=Pos|Subcat=Tran|Tense=Past|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Perf",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Polarity=Neg|Style=Vrnc|Subcat=Intr|Tense=Past|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Perf",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Neg",
- "Style": "Vrnc",
- "Tense": "Past",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Polarity=Neg|Subcat=Tran|Tense=Past|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Perf",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Polarity=Pos|Subcat=Intr|Tense=Past|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Perf",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Polarity=Pos|Subcat=Tran|Tense=Past|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Perf",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Polarity=Pos|Subcat=Tran|Tense=Past|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Perf",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Perf|Polarity=Neg|Subcat=Intr|VerbForm=Part|Voice=Pass": {
- POS: VERB,
- "Aspect": "Perf",
- "Polarity": "Neg",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "VERB_Aspect=Perf|Polarity=Pos|Subcat=Intr|VerbForm=Part|Voice=Mid": {
- POS: VERB,
- "Aspect": "Perf",
- "Polarity": "Pos",
- "VerbForm": "Part",
- "Voice": "Mid",
- },
- "VERB_Aspect=Perf|Polarity=Pos|Subcat=Intr|VerbForm=Part|Voice=Pass": {
- POS: VERB,
- "Aspect": "Perf",
- "Polarity": "Pos",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "VERB_Aspect=Perf|Polarity=Pos|Subcat=Tran|VerbForm=Part|Voice=Act": {
- POS: VERB,
- "Aspect": "Perf",
- "Polarity": "Pos",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "VERB_Aspect=Perf|Polarity=Pos|Subcat=Tran|VerbForm=Part|Voice=Pass": {
- POS: VERB,
- "Aspect": "Perf",
- "Polarity": "Pos",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "VERB_Aspect=Perf|Polarity=Pos|VerbForm=Part|Voice=Act": {
- POS: VERB,
- "Aspect": "Perf",
- "Polarity": "Pos",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "VERB_Aspect=Perf|Subcat=Intr|VerbForm=Part|Voice=Mid": {
- POS: VERB,
- "Aspect": "Perf",
- "VerbForm": "Part",
- "Voice": "Mid",
- },
- "VERB_Aspect=Perf|Subcat=Intr|VerbForm=Part|Voice=Pass": {
- POS: VERB,
- "Aspect": "Perf",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "VERB_Aspect=Perf|Subcat=Tran|VerbForm=Part|Voice=Act": {
- POS: VERB,
- "Aspect": "Perf",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "VERB_Aspect=Perf|Subcat=Tran|VerbForm=Part|Voice=Cau": {
- POS: VERB,
- "Aspect": "Perf",
- "VerbForm": "Part",
- "Voice": "Cau",
- },
- "VERB_Aspect=Prog|Subcat=Intr|VerbForm=Conv|Voice=Mid": {
- POS: VERB,
- "Aspect": "Prog",
- "VerbForm": "Conv",
- "Voice": "Mid",
- },
- "VERB_Aspect=Prosp|Connegative=Yes|Mood=Cnd|Subcat=Tran|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Prosp",
- "Connegative": "Yes",
- "Mood": "Cnd",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Prosp|Mood=Cnd|Number=Plur|Person=3|Polarity=Pos|Style=Vrnc|Subcat=Tran|Tense=Pres|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Cnd",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Pos",
- "Style": "Vrnc",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Prosp|Mood=Cnd|Number=Plur|Person=3|Polarity=Pos|Subcat=Intr|Tense=Pres|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Cnd",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Prosp|Mood=Cnd|Number=Sing|Person=1|Polarity=Pos|Subcat=Intr|Tense=Pres|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Prosp|Mood=Cnd|Number=Sing|Person=2|Polarity=Pos|Subcat=Tran|Tense=Pres|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Prosp|Mood=Cnd|Number=Sing|Person=3|Polarity=Pos|Subcat=Intr|Tense=Pres|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Prosp|Mood=Cnd|Number=Sing|Person=3|Polarity=Pos|Subcat=Intr|Tense=Pres|VerbForm=Fin|Voice=Pass": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Pass",
- },
- "VERB_Aspect=Prosp|Mood=Cnd|Number=Sing|Person=3|Polarity=Pos|Subcat=Tran|Tense=Imp|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Imp",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Prosp|Mood=Cnd|Number=Sing|Person=3|Polarity=Pos|Subcat=Tran|Tense=Pres|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Prosp|Mood=Imp|Number=Sing|Person=2|Subcat=Intr|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Imp",
- "Number": "Sing",
- "Person": "two",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Prosp|Mood=Imp|Number=Sing|Person=2|Subcat=Tran|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Imp",
- "Number": "Sing",
- "Person": "two",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Prosp|Mood=Sub|Number=Plur|Person=1|Polarity=Pos|Subcat=Intr|Tense=Pres|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Sub",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Prosp|Mood=Sub|Number=Plur|Person=3|Polarity=Neg|Subcat=Intr|Tense=Pres|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Sub",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Prosp|Mood=Sub|Number=Plur|Person=3|Polarity=Pos|Subcat=Tran|Tense=Pres|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Sub",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Prosp|Mood=Sub|Number=Sing|Person=1|Polarity=Neg|Subcat=Intr|Tense=Pres|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Sub",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Prosp|Mood=Sub|Number=Sing|Person=1|Polarity=Neg|Subcat=Tran|Tense=Pres|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Sub",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Prosp|Mood=Sub|Number=Sing|Person=1|Polarity=Pos|Subcat=Intr|Tense=Pres|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Sub",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Prosp|Mood=Sub|Number=Sing|Person=1|Polarity=Pos|Subcat=Tran|Tense=Pres|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Sub",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Prosp|Mood=Sub|Number=Sing|Person=2|Polarity=Pos|Subcat=Tran|Tense=Imp|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Sub",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Pos",
- "Tense": "Imp",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Prosp|Mood=Sub|Number=Sing|Person=2|Polarity=Pos|Subcat=Tran|Tense=Pres|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Sub",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Prosp|Mood=Sub|Number=Sing|Person=3|Polarity=Pos|Subcat=Intr|Tense=Imp|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Sub",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Imp",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Prosp|Mood=Sub|Number=Sing|Person=3|Polarity=Pos|Subcat=Intr|Tense=Pres|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Sub",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Aspect=Prosp|Mood=Sub|Number=Sing|Person=3|Polarity=Pos|Subcat=Intr|VerbForm=Fin|Voice=Pass": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Sub",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "VerbForm": "Fin",
- "Voice": "Pass",
- },
- "VERB_Aspect=Prosp|Mood=Sub|Number=Sing|Person=3|Polarity=Pos|Subcat=Tran|Tense=Imp|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Sub",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Imp",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Prosp|Mood=Sub|Number=Sing|Person=3|Polarity=Pos|Subcat=Tran|Tense=Pres|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Sub",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Prosp|Mood=Sub|Person=1|Polarity=Neg|Subcat=Tran|Tense=Pres|VerbForm=Fin|Voice=Act": {
- POS: VERB,
- "Aspect": "Prosp",
- "Mood": "Sub",
- "Person": "one",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- "Voice": "Act",
- },
- "VERB_Aspect=Prosp|Polarity=Pos|Subcat=Intr|VerbForm=Part|Voice=Mid": {
- POS: VERB,
- "Aspect": "Prosp",
- "Polarity": "Pos",
- "VerbForm": "Part",
- "Voice": "Mid",
- },
- "VERB_Aspect=Prosp|Polarity=Pos|Subcat=Tran|VerbForm=Part|Voice=Act": {
- POS: VERB,
- "Aspect": "Prosp",
- "Polarity": "Pos",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "VERB_Aspect=Prosp|Subcat=Intr|VerbForm=Part|Voice=Mid": {
- POS: VERB,
- "Aspect": "Prosp",
- "VerbForm": "Part",
- "Voice": "Mid",
- },
- "VERB_Aspect=Prosp|Subcat=Intr|VerbForm=Part|Voice=Pass": {
- POS: VERB,
- "Aspect": "Prosp",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "VERB_Aspect=Prosp|Subcat=Tran|VerbForm=Part|Voice=Act": {
- POS: VERB,
- "Aspect": "Prosp",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "VERB_Case=Abl|Definite=Ind|Number=Coll|Polarity=Pos|Subcat=Intr|VerbForm=Gdv|Voice=Mid": {
- POS: VERB,
- "Case": "Abl",
- "Definite": "Ind",
- "Polarity": "Pos",
- "VerbForm": "Gdv",
- "Voice": "Mid",
- },
- "VERB_Case=Abl|Definite=Ind|Number=Coll|Polarity=Pos|Subcat=Intr|VerbForm=Gdv|Voice=Pass": {
- POS: VERB,
- "Case": "Abl",
- "Definite": "Ind",
- "Polarity": "Pos",
- "VerbForm": "Gdv",
- "Voice": "Pass",
- },
- "VERB_Case=Abl|Definite=Ind|Number=Coll|Polarity=Pos|Subcat=Tran|VerbForm=Gdv|Voice=Act": {
- POS: VERB,
- "Case": "Abl",
- "Definite": "Ind",
- "Polarity": "Pos",
- "VerbForm": "Gdv",
- "Voice": "Act",
- },
- "VERB_Case=Dat|Definite=Def|Number=Coll|Polarity=Pos|Subcat=Intr|VerbForm=Gdv|Voice=Mid": {
- POS: VERB,
- "Case": "Dat",
- "Definite": "Def",
- "Polarity": "Pos",
- "VerbForm": "Gdv",
- "Voice": "Mid",
- },
- "VERB_Case=Dat|Definite=Ind|Number=Coll|Polarity=Neg|Subcat=Intr|VerbForm=Gdv|Voice=Pass": {
- POS: VERB,
- "Case": "Dat",
- "Definite": "Ind",
- "Polarity": "Neg",
- "VerbForm": "Gdv",
- "Voice": "Pass",
- },
- "VERB_Case=Dat|Definite=Ind|Number=Coll|Polarity=Pos|Subcat=Intr|VerbForm=Gdv|Voice=Mid": {
- POS: VERB,
- "Case": "Dat",
- "Definite": "Ind",
- "Polarity": "Pos",
- "VerbForm": "Gdv",
- "Voice": "Mid",
- },
- "VERB_Case=Dat|Definite=Ind|Number=Coll|Polarity=Pos|Subcat=Tran|VerbForm=Gdv|Voice=Act": {
- POS: VERB,
- "Case": "Dat",
- "Definite": "Ind",
- "Polarity": "Pos",
- "VerbForm": "Gdv",
- "Voice": "Act",
- },
- "VERB_Case=Ins|Definite=Ind|Number=Coll|Polarity=Pos|Subcat=Intr|VerbForm=Gdv|Voice=Mid": {
- POS: VERB,
- "Case": "Ins",
- "Definite": "Ind",
- "Polarity": "Pos",
- "VerbForm": "Gdv",
- "Voice": "Mid",
- },
- "VERB_Case=Ins|Definite=Ind|Number=Coll|Polarity=Pos|Subcat=Tran|VerbForm=Gdv|Voice=Act": {
- POS: VERB,
- "Case": "Ins",
- "Definite": "Ind",
- "Polarity": "Pos",
- "VerbForm": "Gdv",
- "Voice": "Act",
- },
- "VERB_Case=Nom|Definite=Def|Number=Coll|Polarity=Pos|Subcat=Intr|VerbForm=Gdv|Voice=Mid": {
- POS: VERB,
- "Case": "Nom",
- "Definite": "Def",
- "Polarity": "Pos",
- "VerbForm": "Gdv",
- "Voice": "Mid",
- },
- "VERB_Case=Nom|Definite=Def|Number=Coll|Polarity=Pos|Subcat=Tran|VerbForm=Gdv|Voice=Act": {
- POS: VERB,
- "Case": "Nom",
- "Definite": "Def",
- "Polarity": "Pos",
- "VerbForm": "Gdv",
- "Voice": "Act",
- },
- "VERB_Mood=Imp|Number=Sing|Person=2|Subcat=Intr|VerbForm=Fin|Voice=Mid": {
- POS: VERB,
- "Mood": "Imp",
- "Number": "Sing",
- "Person": "two",
- "VerbForm": "Fin",
- "Voice": "Mid",
- },
- "VERB_Polarity=Neg|Subcat=Intr|VerbForm=Inf|Voice=Mid": {
- POS: VERB,
- "Polarity": "Neg",
- "VerbForm": "Inf",
- "Voice": "Mid",
- },
- "VERB_Polarity=Pos|Style=Coll|Subcat=Tran|VerbForm=Inf|Voice=Act": {
- POS: VERB,
- "Polarity": "Pos",
- "Style": "Coll",
- "VerbForm": "Inf",
- "Voice": "Act",
- },
- "VERB_Polarity=Pos|Style=Vrnc|Subcat=Tran|VerbForm=Inf|Voice=Act": {
- POS: VERB,
- "Polarity": "Pos",
- "Style": "Vrnc",
- "VerbForm": "Inf",
- "Voice": "Act",
- },
- "VERB_Polarity=Pos|Subcat=Intr|VerbForm=Inf|Voice=Mid": {
- POS: VERB,
- "Polarity": "Pos",
- "VerbForm": "Inf",
- "Voice": "Mid",
- },
- "VERB_Polarity=Pos|Subcat=Intr|VerbForm=Inf|Voice=Pass": {
- POS: VERB,
- "Polarity": "Pos",
- "VerbForm": "Inf",
- "Voice": "Pass",
- },
- "VERB_Polarity=Pos|Subcat=Tran|Typo=Yes|VerbForm=Inf|Voice=Act": {
- POS: VERB,
- "Polarity": "Pos",
- "Typo": "Yes",
- "VerbForm": "Inf",
- "Voice": "Act",
- },
- "VERB_Polarity=Pos|Subcat=Tran|VerbForm=Inf|Voice=Act": {
- POS: VERB,
- "Polarity": "Pos",
- "VerbForm": "Inf",
- "Voice": "Act",
- },
- "X_Foreign=Yes": {POS: X, "Foreign": "Yes"},
- "X_Style=Vrnc": {POS: X, "Style": "Vrnc"},
-}
diff --git a/spacy/lang/id/__init__.py b/spacy/lang/id/__init__.py
index 8e2266a40..87373551c 100644
--- a/spacy/lang/id/__init__.py
+++ b/spacy/lang/id/__init__.py
@@ -1,30 +1,19 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
from .punctuation import TOKENIZER_SUFFIXES, TOKENIZER_PREFIXES, TOKENIZER_INFIXES
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .lex_attrs import LEX_ATTRS
from .syntax_iterators import SYNTAX_ITERATORS
-from .tag_map import TAG_MAP
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...language import Language
-from ...attrs import LANG
-from ...util import update_exc
class IndonesianDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "id"
- lex_attr_getters.update(LEX_ATTRS)
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
- stop_words = STOP_WORDS
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
prefixes = TOKENIZER_PREFIXES
suffixes = TOKENIZER_SUFFIXES
infixes = TOKENIZER_INFIXES
syntax_iterators = SYNTAX_ITERATORS
- tag_map = TAG_MAP
+ lex_attr_getters = LEX_ATTRS
+ stop_words = STOP_WORDS
class Indonesian(Language):
diff --git a/spacy/lang/id/_tokenizer_exceptions_list.py b/spacy/lang/id/_tokenizer_exceptions_list.py
index fec878d5a..a0b35fa1a 100644
--- a/spacy/lang/id/_tokenizer_exceptions_list.py
+++ b/spacy/lang/id/_tokenizer_exceptions_list.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
ID_BASE_EXCEPTIONS = set(
"""
aba-aba
diff --git a/spacy/lang/id/examples.py b/spacy/lang/id/examples.py
index 56ac9165e..1069232ff 100644
--- a/spacy/lang/id/examples.py
+++ b/spacy/lang/id/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/id/lex_attrs.py b/spacy/lang/id/lex_attrs.py
index 1d4584ae3..3167f4659 100644
--- a/spacy/lang/id/lex_attrs.py
+++ b/spacy/lang/id/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
import unicodedata
from .punctuation import LIST_CURRENCY
diff --git a/spacy/lang/id/punctuation.py b/spacy/lang/id/punctuation.py
index e4794d42b..f6c2387d8 100644
--- a/spacy/lang/id/punctuation.py
+++ b/spacy/lang/id/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
from ..char_classes import ALPHA, merge_chars, split_chars, _currency, _units
diff --git a/spacy/lang/id/stop_words.py b/spacy/lang/id/stop_words.py
index 0a9f91947..b1bfaea79 100644
--- a/spacy/lang/id/stop_words.py
+++ b/spacy/lang/id/stop_words.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
STOP_WORDS = set(
"""
ada adalah adanya adapun agak agaknya agar akan akankah akhir akhiri akhirnya
diff --git a/spacy/lang/id/syntax_iterators.py b/spacy/lang/id/syntax_iterators.py
index d6c12e69f..f6d261643 100644
--- a/spacy/lang/id/syntax_iterators.py
+++ b/spacy/lang/id/syntax_iterators.py
@@ -1,29 +1,20 @@
-# coding: utf8
-from __future__ import unicode_literals
+from typing import Union, Iterator
from ...symbols import NOUN, PROPN, PRON
from ...errors import Errors
+from ...tokens import Doc, Span
-def noun_chunks(doclike):
+def noun_chunks(doclike: Union[Doc, Span]) -> Iterator[Span]:
"""
Detect base noun phrases from a dependency parse. Works on both Doc and Span.
"""
- labels = [
- "nsubj",
- "nsubj:pass",
- "obj",
- "iobj",
- "ROOT",
- "appos",
- "nmod",
- "nmod:poss",
- ]
+ # fmt: off
+ labels = ["nsubj", "nsubj:pass", "obj", "iobj", "ROOT", "appos", "nmod", "nmod:poss"]
+ # fmt: on
doc = doclike.doc # Ensure works on both Doc and Span.
-
if not doc.is_parsed:
raise ValueError(Errors.E029)
-
np_deps = [doc.vocab.strings[label] for label in labels]
conj = doc.vocab.strings.add("conj")
np_label = doc.vocab.strings.add("NP")
diff --git a/spacy/lang/id/tag_map.py b/spacy/lang/id/tag_map.py
deleted file mode 100644
index 16391a840..000000000
--- a/spacy/lang/id/tag_map.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, PUNCT, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB
-from ...symbols import NOUN, PRON, AUX, SCONJ, INTJ, PART, PROPN
-
-
-# POS explanations for indonesian available from https://www.aclweb.org/anthology/Y12-1014
-TAG_MAP = {
- "NSD": {POS: NOUN},
- "Z--": {POS: PUNCT},
- "VSA": {POS: VERB},
- "CC-": {POS: NUM},
- "R--": {POS: ADP},
- "D--": {POS: ADV},
- "ASP": {POS: ADJ},
- "S--": {POS: SCONJ},
- "VSP": {POS: VERB},
- "H--": {POS: CCONJ},
- "F--": {POS: X},
- "B--": {POS: DET},
- "CO-": {POS: NUM},
- "G--": {POS: ADV},
- "PS3": {POS: PRON},
- "W--": {POS: ADV},
- "O--": {POS: AUX},
- "PP1": {POS: PRON},
- "ASS": {POS: ADJ},
- "PS1": {POS: PRON},
- "APP": {POS: ADJ},
- "CD-": {POS: NUM},
- "VPA": {POS: VERB},
- "VPP": {POS: VERB},
- "X--": {POS: X},
- "CO-+PS3": {POS: NUM},
- "NSD+PS3": {POS: NOUN},
- "ASP+PS3": {POS: ADJ},
- "M--": {POS: AUX},
- "VSA+PS3": {POS: VERB},
- "R--+PS3": {POS: ADP},
- "W--+T--": {POS: ADV},
- "PS2": {POS: PRON},
- "NSD+PS1": {POS: NOUN},
- "PP3": {POS: PRON},
- "VSA+T--": {POS: VERB},
- "D--+T--": {POS: ADV},
- "VSP+PS3": {POS: VERB},
- "F--+PS3": {POS: X},
- "M--+T--": {POS: AUX},
- "F--+T--": {POS: X},
- "PUNCT": {POS: PUNCT},
- "PROPN": {POS: PROPN},
- "I--": {POS: INTJ},
- "S--+PS3": {POS: SCONJ},
- "ASP+T--": {POS: ADJ},
- "CC-+PS3": {POS: NUM},
- "NSD+PS2": {POS: NOUN},
- "B--+T--": {POS: DET},
- "H--+T--": {POS: CCONJ},
- "VSA+PS2": {POS: VERB},
- "NSF": {POS: NOUN},
- "PS1+VSA": {POS: PRON},
- "NPD": {POS: NOUN},
- "PP2": {POS: PRON},
- "VSA+PS1": {POS: VERB},
- "T--": {POS: PART},
- "NSM": {POS: NOUN},
- "NUM": {POS: NUM},
- "ASP+PS2": {POS: ADJ},
- "G--+T--": {POS: PART},
- "D--+PS3": {POS: ADV},
- "R--+PS2": {POS: ADP},
- "NSM+PS3": {POS: NOUN},
- "VSP+T--": {POS: VERB},
- "M--+PS3": {POS: AUX},
- "ASS+PS3": {POS: ADJ},
- "G--+PS3": {POS: PART},
- "F--+PS1": {POS: X},
- "NSD+T--": {POS: NOUN},
- "PP1+T--": {POS: PRON},
- "B--+PS3": {POS: DET},
- "NOUN": {POS: NOUN},
- "NPD+PS3": {POS: NOUN},
- "R--+PS1": {POS: ADP},
- "F--+PS2": {POS: X},
- "CD-+PS3": {POS: NUM},
- "PS1+VSA+T--": {POS: VERB},
- "PS2+VSA": {POS: VERB},
- "VERB": {POS: VERB},
- "CC-+T--": {POS: NUM},
- "NPD+PS2": {POS: NOUN},
- "D--+PS2": {POS: ADV},
- "PP3+T--": {POS: PRON},
- "X": {POS: X},
-}
diff --git a/spacy/lang/id/tokenizer_exceptions.py b/spacy/lang/id/tokenizer_exceptions.py
index 86fe611bf..ff77ede9f 100644
--- a/spacy/lang/id/tokenizer_exceptions.py
+++ b/spacy/lang/id/tokenizer_exceptions.py
@@ -1,8 +1,8 @@
-# coding: utf8
-from __future__ import unicode_literals
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ._tokenizer_exceptions_list import ID_BASE_EXCEPTIONS
-from ...symbols import ORTH, LEMMA, NORM
+from ...symbols import ORTH, NORM
+from ...util import update_exc
+
# Daftar singkatan dan Akronim dari:
# https://id.wiktionary.org/wiki/Wiktionary:Daftar_singkatan_dan_akronim_bahasa_Indonesia#A
@@ -11,53 +11,47 @@ _exc = {}
for orth in ID_BASE_EXCEPTIONS:
_exc[orth] = [{ORTH: orth}]
-
orth_title = orth.title()
_exc[orth_title] = [{ORTH: orth_title}]
-
orth_caps = orth.upper()
_exc[orth_caps] = [{ORTH: orth_caps}]
-
orth_lower = orth.lower()
_exc[orth_lower] = [{ORTH: orth_lower}]
-
orth_first_upper = orth[0].upper() + orth[1:]
_exc[orth_first_upper] = [{ORTH: orth_first_upper}]
-
if "-" in orth:
orth_title = "-".join([part.title() for part in orth.split("-")])
_exc[orth_title] = [{ORTH: orth_title}]
-
orth_caps = "-".join([part.upper() for part in orth.split("-")])
_exc[orth_caps] = [{ORTH: orth_caps}]
for exc_data in [
- {ORTH: "Jan.", LEMMA: "Januari", NORM: "Januari"},
- {ORTH: "Feb.", LEMMA: "Februari", NORM: "Februari"},
- {ORTH: "Mar.", LEMMA: "Maret", NORM: "Maret"},
- {ORTH: "Apr.", LEMMA: "April", NORM: "April"},
- {ORTH: "Jun.", LEMMA: "Juni", NORM: "Juni"},
- {ORTH: "Jul.", LEMMA: "Juli", NORM: "Juli"},
- {ORTH: "Agu.", LEMMA: "Agustus", NORM: "Agustus"},
- {ORTH: "Ags.", LEMMA: "Agustus", NORM: "Agustus"},
- {ORTH: "Sep.", LEMMA: "September", NORM: "September"},
- {ORTH: "Okt.", LEMMA: "Oktober", NORM: "Oktober"},
- {ORTH: "Nov.", LEMMA: "November", NORM: "November"},
- {ORTH: "Des.", LEMMA: "Desember", NORM: "Desember"},
+ {ORTH: "Jan.", NORM: "Januari"},
+ {ORTH: "Feb.", NORM: "Februari"},
+ {ORTH: "Mar.", NORM: "Maret"},
+ {ORTH: "Apr.", NORM: "April"},
+ {ORTH: "Jun.", NORM: "Juni"},
+ {ORTH: "Jul.", NORM: "Juli"},
+ {ORTH: "Agu.", NORM: "Agustus"},
+ {ORTH: "Ags.", NORM: "Agustus"},
+ {ORTH: "Sep.", NORM: "September"},
+ {ORTH: "Okt.", NORM: "Oktober"},
+ {ORTH: "Nov.", NORM: "November"},
+ {ORTH: "Des.", NORM: "Desember"},
]:
_exc[exc_data[ORTH]] = [exc_data]
_other_exc = {
- "do'a": [{ORTH: "do'a", LEMMA: "doa", NORM: "doa"}],
- "jum'at": [{ORTH: "jum'at", LEMMA: "Jumat", NORM: "Jumat"}],
- "Jum'at": [{ORTH: "Jum'at", LEMMA: "Jumat", NORM: "Jumat"}],
- "la'nat": [{ORTH: "la'nat", LEMMA: "laknat", NORM: "laknat"}],
- "ma'af": [{ORTH: "ma'af", LEMMA: "maaf", NORM: "maaf"}],
- "mu'jizat": [{ORTH: "mu'jizat", LEMMA: "mukjizat", NORM: "mukjizat"}],
- "Mu'jizat": [{ORTH: "Mu'jizat", LEMMA: "mukjizat", NORM: "mukjizat"}],
- "ni'mat": [{ORTH: "ni'mat", LEMMA: "nikmat", NORM: "nikmat"}],
- "raka'at": [{ORTH: "raka'at", LEMMA: "rakaat", NORM: "rakaat"}],
- "ta'at": [{ORTH: "ta'at", LEMMA: "taat", NORM: "taat"}],
+ "do'a": [{ORTH: "do'a", NORM: "doa"}],
+ "jum'at": [{ORTH: "jum'at", NORM: "Jumat"}],
+ "Jum'at": [{ORTH: "Jum'at", NORM: "Jumat"}],
+ "la'nat": [{ORTH: "la'nat", NORM: "laknat"}],
+ "ma'af": [{ORTH: "ma'af", NORM: "maaf"}],
+ "mu'jizat": [{ORTH: "mu'jizat", NORM: "mukjizat"}],
+ "Mu'jizat": [{ORTH: "Mu'jizat", NORM: "mukjizat"}],
+ "ni'mat": [{ORTH: "ni'mat", NORM: "nikmat"}],
+ "raka'at": [{ORTH: "raka'at", NORM: "rakaat"}],
+ "ta'at": [{ORTH: "ta'at", NORM: "taat"}],
}
_exc.update(_other_exc)
@@ -224,4 +218,4 @@ for orth in [
]:
_exc[orth] = [{ORTH: orth}]
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
diff --git a/spacy/lang/is/__init__.py b/spacy/lang/is/__init__.py
index 18e41432d..be5de5981 100644
--- a/spacy/lang/is/__init__.py
+++ b/spacy/lang/is/__init__.py
@@ -1,14 +1,8 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
from ...language import Language
-from ...attrs import LANG
class IcelandicDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "is"
stop_words = STOP_WORDS
diff --git a/spacy/lang/is/stop_words.py b/spacy/lang/is/stop_words.py
index e4ae0498b..917fb6df4 100644
--- a/spacy/lang/is/stop_words.py
+++ b/spacy/lang/is/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
# Source: https://github.com/Xangis/extra-stopwords
STOP_WORDS = set(
diff --git a/spacy/lang/it/__init__.py b/spacy/lang/it/__init__.py
index 06d146748..25cbaa651 100644
--- a/spacy/lang/it/__init__.py
+++ b/spacy/lang/it/__init__.py
@@ -1,27 +1,12 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
-from .tag_map import TAG_MAP
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ..norm_exceptions import BASE_NORMS
from ...language import Language
-from ...attrs import LANG, NORM
-from ...util import update_exc, add_lookups
class ItalianDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "it"
- lex_attr_getters[NORM] = add_lookups(
- Language.Defaults.lex_attr_getters[NORM], BASE_NORMS
- )
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
stop_words = STOP_WORDS
- tag_map = TAG_MAP
prefixes = TOKENIZER_PREFIXES
infixes = TOKENIZER_INFIXES
diff --git a/spacy/lang/it/examples.py b/spacy/lang/it/examples.py
index af66b7eca..506721276 100644
--- a/spacy/lang/it/examples.py
+++ b/spacy/lang/it/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/it/punctuation.py b/spacy/lang/it/punctuation.py
index 1d641f144..f01ab4f0d 100644
--- a/spacy/lang/it/punctuation.py
+++ b/spacy/lang/it/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..punctuation import TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES
from ..char_classes import LIST_ELLIPSES, LIST_ICONS
from ..char_classes import ALPHA, HYPHENS, CONCAT_QUOTES
diff --git a/spacy/lang/it/stop_words.py b/spacy/lang/it/stop_words.py
index 84233d381..e97613912 100644
--- a/spacy/lang/it/stop_words.py
+++ b/spacy/lang/it/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
STOP_WORDS = set(
"""
a abbastanza abbia abbiamo abbiano abbiate accidenti ad adesso affinche agl
diff --git a/spacy/lang/it/tag_map.py b/spacy/lang/it/tag_map.py
deleted file mode 100644
index 798c45d80..000000000
--- a/spacy/lang/it/tag_map.py
+++ /dev/null
@@ -1,323 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, PUNCT, SYM, ADJ, NUM, DET, ADV, ADP, X, VERB
-from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON, SCONJ, AUX, CONJ
-
-
-TAG_MAP = {
- "AP__Gender=Fem|Number=Plur|Poss=Yes|PronType=Prs": {POS: DET},
- "AP__Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs": {POS: DET},
- "AP__Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs": {POS: DET},
- "AP__Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs": {POS: DET},
- "AP__Gender=Masc|Poss=Yes|PronType=Prs": {POS: DET},
- "AP__Number=Sing|Poss=Yes|PronType=Prs": {POS: DET},
- "AP__Poss=Yes|PronType=Prs": {POS: DET},
- "A__Degree=Abs|Gender=Fem|Number=Plur": {POS: ADJ},
- "A__Degree=Abs|Gender=Fem|Number=Sing": {POS: ADJ},
- "A__Degree=Abs|Gender=Masc|Number=Plur": {POS: ADJ},
- "A__Degree=Abs|Gender=Masc|Number=Sing": {POS: ADJ},
- "A__Degree=Cmp": {POS: ADJ},
- "A__Degree=Cmp|Number=Plur": {POS: ADJ},
- "A__Degree=Cmp|Number=Sing": {POS: ADJ},
- "A__Gender=Fem|Number=Plur": {POS: ADJ},
- "A__Gender=Fem|Number=Sing": {POS: ADJ},
- "A__Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs": {POS: ADJ},
- "A__Gender=Masc": {POS: ADJ},
- "A__Gender=Masc|Number=Plur": {POS: ADJ},
- "A__Gender=Masc|Number=Sing": {POS: ADJ},
- "A__Number=Plur": {POS: ADJ},
- "A__Number=Sing": {POS: ADJ},
- "A___": {POS: ADJ},
- "BN__PronType=Neg": {POS: ADV},
- "B__Degree=Abs": {POS: ADV},
- "B__Degree=Abs|Gender=Masc|Number=Sing": {POS: ADV},
- "B___": {POS: ADV},
- "CC___": {POS: CONJ},
- "CS___": {POS: SCONJ},
- "DD__Gender=Fem|Number=Plur|PronType=Dem": {POS: DET},
- "DD__Gender=Fem|Number=Sing|PronType=Dem": {POS: DET},
- "DD__Gender=Masc|Number=Plur|PronType=Dem": {POS: DET},
- "DD__Gender=Masc|Number=Sing|PronType=Dem": {POS: DET},
- "DD__Gender=Masc|PronType=Dem": {POS: DET},
- "DD__Number=Plur|PronType=Dem": {POS: DET},
- "DD__Number=Sing|PronType=Dem": {POS: DET},
- "DE__PronType=Exc": {POS: DET},
- "DI__Definite=Def|Gender=Fem|Number=Plur|PronType=Art": {POS: DET},
- "DI__Gender=Fem|Number=Plur": {POS: DET},
- "DI__Gender=Fem|Number=Plur|PronType=Ind": {POS: DET},
- "DI__Gender=Fem|Number=Sing|PronType=Ind": {POS: DET},
- "DI__Gender=Masc|Number=Plur": {POS: DET},
- "DI__Gender=Masc|Number=Plur|PronType=Ind": {POS: DET},
- "DI__Gender=Masc|Number=Sing|PronType=Ind": {POS: DET},
- "DI__Number=Sing|PronType=Art": {POS: DET},
- "DI__Number=Sing|PronType=Ind": {POS: DET},
- "DI__PronType=Ind": {POS: DET},
- "DQ__Gender=Fem|Number=Plur|PronType=Int": {POS: DET},
- "DQ__Gender=Fem|Number=Sing|PronType=Int": {POS: DET},
- "DQ__Gender=Masc|Number=Plur|PronType=Int": {POS: DET},
- "DQ__Gender=Masc|Number=Sing|PronType=Int": {POS: DET},
- "DQ__Number=Plur|PronType=Int": {POS: DET},
- "DQ__Number=Sing|PronType=Int": {POS: DET},
- "DQ__PronType=Int": {POS: DET},
- "DQ___": {POS: DET},
- "DR__Number=Plur|PronType=Rel": {POS: DET},
- "DR__PronType=Rel": {POS: DET},
- "E__Gender=Masc|Number=Sing": {POS: ADP},
- "E___": {POS: ADP},
- "FB___": {POS: PUNCT},
- "FC___": {POS: PUNCT},
- "FF___": {POS: PUNCT},
- "FS___": {POS: PUNCT},
- "I__Polarity=Neg": {POS: INTJ},
- "I__Polarity=Pos": {POS: INTJ},
- "I___": {POS: INTJ},
- "NO__Gender=Fem|Number=Plur|NumType=Ord": {POS: ADJ},
- "NO__Gender=Fem|Number=Sing|NumType=Ord": {POS: ADJ},
- "NO__Gender=Masc|Number=Plur": {POS: ADJ},
- "NO__Gender=Masc|Number=Plur|NumType=Ord": {POS: ADJ},
- "NO__Gender=Masc|Number=Sing|NumType=Ord": {POS: ADJ},
- "NO__NumType=Ord": {POS: ADJ},
- "NO__Number=Sing|NumType=Ord": {POS: ADJ},
- "NO___": {POS: ADJ},
- "N__Gender=Masc|Number=Sing": {POS: NUM},
- "N__NumType=Card": {POS: NUM},
- "N__NumType=Range": {POS: NUM},
- "N___": {POS: NUM},
- "PART___": {POS: PART},
- "PC__Clitic=Yes|Definite=Def|Gender=Fem|Number=Plur|PronType=Art": {POS: PRON},
- "PC__Clitic=Yes|Gender=Fem|Number=Plur|Person=3|PronType=Prs": {POS: PRON},
- "PC__Clitic=Yes|Gender=Fem|Number=Plur|PronType=Prs": {POS: PRON},
- "PC__Clitic=Yes|Gender=Fem|Number=Sing|Person=3|PronType=Prs": {POS: PRON},
- "PC__Clitic=Yes|Gender=Fem|Person=3|PronType=Prs": {POS: PRON},
- "PC__Clitic=Yes|Gender=Masc|Number=Plur|Person=3|PronType=Prs": {POS: PRON},
- "PC__Clitic=Yes|Gender=Masc|Number=Sing|Person=3|PronType=Prs": {POS: PRON},
- "PC__Clitic=Yes|Gender=Masc|Number=Sing|PronType=Prs": {POS: PRON},
- "PC__Clitic=Yes|Number=Plur|Person=1|PronType=Prs": {POS: PRON},
- "PC__Clitic=Yes|Number=Plur|Person=2|PronType=Prs": {POS: PRON},
- "PC__Clitic=Yes|Number=Plur|Person=3|PronType=Prs": {POS: PRON},
- "PC__Clitic=Yes|Number=Plur|PronType=Prs": {POS: PRON},
- "PC__Clitic=Yes|Number=Sing|Person=1|PronType=Prs": {POS: PRON},
- "PC__Clitic=Yes|Number=Sing|Person=2|PronType=Prs": {POS: PRON},
- "PC__Clitic=Yes|Number=Sing|Person=3|PronType=Prs": {POS: PRON},
- "PC__Clitic=Yes|Person=3|PronType=Prs": {POS: PRON},
- "PC__Clitic=Yes|PronType=Prs": {POS: PRON},
- "PD__Gender=Fem|Number=Plur|PronType=Dem": {POS: PRON},
- "PD__Gender=Fem|Number=Sing|PronType=Dem": {POS: PRON},
- "PD__Gender=Masc|Number=Plur|PronType=Dem": {POS: PRON},
- "PD__Gender=Masc|Number=Sing|PronType=Dem": {POS: PRON},
- "PD__Number=Plur|PronType=Dem": {POS: PRON},
- "PD__Number=Sing|PronType=Dem": {POS: PRON},
- "PD__PronType=Dem": {POS: PRON},
- "PE__Gender=Fem|Number=Plur|Person=3|PronType=Prs": {POS: PRON},
- "PE__Gender=Fem|Number=Sing|Person=3|PronType=Prs": {POS: PRON},
- "PE__Gender=Masc|Number=Plur|Person=3|PronType=Prs": {POS: PRON},
- "PE__Gender=Masc|Number=Sing|Person=3|PronType=Prs": {POS: PRON},
- "PE__Number=Plur|Person=1|PronType=Prs": {POS: PRON},
- "PE__Number=Plur|Person=2|PronType=Prs": {POS: PRON},
- "PE__Number=Plur|Person=3|PronType=Prs": {POS: PRON},
- "PE__Number=Sing|Person=1|PronType=Prs": {POS: PRON},
- "PE__Number=Sing|Person=2|PronType=Prs": {POS: PRON},
- "PE__Number=Sing|Person=3|PronType=Prs": {POS: PRON},
- "PE__Person=3|PronType=Prs": {POS: PRON},
- "PE__PronType=Prs": {POS: PRON},
- "PI__Gender=Fem|Number=Plur|PronType=Ind": {POS: PRON},
- "PI__Gender=Fem|Number=Sing|PronType=Ind": {POS: PRON},
- "PI__Gender=Masc|Number=Plur|PronType=Ind": {POS: PRON},
- "PI__Gender=Masc|Number=Sing": {POS: PRON},
- "PI__Gender=Masc|Number=Sing|PronType=Ind": {POS: PRON},
- "PI__Number=Plur|PronType=Ind": {POS: PRON},
- "PI__Number=Sing|PronType=Ind": {POS: PRON},
- "PI__PronType=Ind": {POS: PRON},
- "PP__Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs": {POS: PRON},
- "PP__Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs": {POS: PRON},
- "PP__Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs": {POS: PRON},
- "PP__Number=Plur|Poss=Yes|PronType=Prs": {POS: PRON},
- "PP__Number=Sing|Poss=Yes|PronType=Prs": {POS: PRON},
- "PQ__Gender=Fem|Number=Plur|PronType=Int": {POS: PRON},
- "PQ__Gender=Fem|Number=Sing|PronType=Int": {POS: PRON},
- "PQ__Gender=Masc|Number=Plur|PronType=Int": {POS: PRON},
- "PQ__Gender=Masc|Number=Sing|PronType=Int": {POS: PRON},
- "PQ__Number=Plur|PronType=Int": {POS: PRON},
- "PQ__Number=Sing|PronType=Int": {POS: PRON},
- "PQ__PronType=Int": {POS: PRON},
- "PR__Gender=Masc|Number=Plur|PronType=Rel": {POS: PRON},
- "PR__Gender=Masc|Number=Sing|PronType=Rel": {POS: PRON},
- "PR__Gender=Masc|PronType=Rel": {POS: PRON},
- "PR__Number=Plur|PronType=Rel": {POS: PRON},
- "PR__Number=Sing|PronType=Rel": {POS: PRON},
- "PR__Person=3|PronType=Rel": {POS: PRON},
- "PR__PronType=Rel": {POS: PRON},
- "RD__Definite=Def": {POS: DET},
- "RD__Definite=Def|Gender=Fem": {POS: DET},
- "RD__Definite=Def|Gender=Fem|Number=Plur|PronType=Art": {POS: DET},
- "RD__Definite=Def|Gender=Fem|Number=Sing|PronType=Art": {POS: DET},
- "RD__Definite=Def|Gender=Masc|Number=Plur|PronType=Art": {POS: DET},
- "RD__Definite=Def|Gender=Masc|Number=Sing|PronType=Art": {POS: DET},
- "RD__Definite=Def|Number=Plur|PronType=Art": {POS: DET},
- "RD__Definite=Def|Number=Sing|PronType=Art": {POS: DET},
- "RD__Definite=Def|PronType=Art": {POS: DET},
- "RD__Gender=Fem|Number=Sing": {POS: DET},
- "RD__Gender=Masc|Number=Sing": {POS: DET},
- "RD__Number=Sing": {POS: DET},
- "RD__Number=Sing|PronType=Art": {POS: DET},
- "RI__Definite=Ind|Gender=Fem|Number=Plur|PronType=Art": {POS: DET},
- "RI__Definite=Ind|Gender=Fem|Number=Sing|PronType=Art": {POS: DET},
- "RI__Definite=Ind|Gender=Masc|Number=Plur|PronType=Art": {POS: DET},
- "RI__Definite=Ind|Gender=Masc|Number=Sing|PronType=Art": {POS: DET},
- "RI__Definite=Ind|Number=Sing|PronType=Art": {POS: DET},
- "RI__Definite=Ind|PronType=Art": {POS: DET},
- "SP__Gender=Fem|Number=Plur": {POS: PROPN},
- "SP__NumType=Card": {POS: PROPN},
- "SP___": {POS: PROPN},
- "SW__Foreign=Yes": {POS: X},
- "SW__Foreign=Yes|Gender=Masc": {POS: X},
- "SW__Foreign=Yes|Number=Sing": {POS: X},
- "SYM___": {POS: SYM},
- "S__Gender=Fem": {POS: NOUN},
- "S__Gender=Fem|Number=Plur": {POS: NOUN},
- "S__Gender=Fem|Number=Sing": {POS: NOUN},
- "S__Gender=Masc": {POS: NOUN},
- "S__Gender=Masc|Number=Plur": {POS: NOUN},
- "S__Gender=Masc|Number=Sing": {POS: NOUN},
- "S__Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part": {POS: NOUN},
- "S__Number=Plur": {POS: NOUN},
- "S__Number=Sing": {POS: NOUN},
- "S___": {POS: NOUN},
- "Sw___": {POS: X},
- "T__Gender=Fem|Number=Plur|PronType=Tot": {POS: DET},
- "T__Gender=Fem|Number=Sing": {POS: DET},
- "T__Gender=Fem|Number=Sing|PronType=Tot": {POS: DET},
- "T__Gender=Masc|Number=Plur|PronType=Tot": {POS: DET},
- "T__Gender=Masc|Number=Sing|PronType=Tot": {POS: DET},
- "T__Number=Plur|PronType=Tot": {POS: DET},
- "T__PronType=Tot": {POS: DET},
- "VA__Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part": {POS: AUX},
- "VA__Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part": {POS: AUX},
- "VA__Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part": {POS: AUX},
- "VA__Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part": {POS: AUX},
- "VA__Mood=Cnd|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Cnd|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Cnd|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Cnd|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Plur|Person=2|Tense=Fut|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Plur|Person=2|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Sub|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Sub|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "VA__Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VA__VerbForm=Ger": {POS: AUX},
- "VA__VerbForm=Inf": {POS: AUX},
- "VM__Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part": {POS: AUX},
- "VM__Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part": {POS: AUX},
- "VM__Mood=Cnd|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Cnd|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Cnd|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Cnd|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Cnd|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Cnd|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Imp|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Imp|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Ind|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Ind|Number=Plur|Person=2|Tense=Fut|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Ind|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Ind|Number=Sing|Person=2|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Sub|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Sub|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {POS: AUX},
- "VM__Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX},
- "VM__VerbForm=Ger": {POS: AUX},
- "VM__VerbForm=Inf": {POS: AUX},
- "V__Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part": {POS: VERB},
- "V__Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part": {POS: VERB},
- "V__Gender=Masc|Number=Plur|Tense=Past|VerbForm=Fin": {POS: VERB},
- "V__Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part": {POS: VERB},
- "V__Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part": {POS: VERB},
- "V__Mood=Cnd|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Cnd|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Cnd|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Cnd|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Cnd|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Cnd|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Imp|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Imp|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Imp|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Imp|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Plur|Person=1|Tense=Past|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Plur|Person=2|Tense=Fut|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Plur|Person=3|Tense=Past|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Sing|Person=1|Tense=Past|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Ind|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Sub|Number=Plur|Person=1|Tense=Imp|VerbForm=Fin": {POS: VERB},
- "V__Mood=Sub|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Sub|Number=Plur|Person=2|Tense=Imp|VerbForm=Fin": {POS: VERB},
- "V__Mood=Sub|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Sub|Number=Plur|Person=3|Tense=Imp|VerbForm=Fin": {POS: VERB},
- "V__Mood=Sub|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Sub|Number=Sing|Person=1|Tense=Imp|VerbForm=Fin": {POS: VERB},
- "V__Mood=Sub|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Sub|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Sub|Number=Sing|Person=3|Tense=Imp|VerbForm=Fin": {POS: VERB},
- "V__Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB},
- "V__Mood=Sub|Number=Sing|Person=3|VerbForm=Fin": {POS: VERB},
- "V__Number=Plur|Tense=Pres|VerbForm=Part": {POS: VERB},
- "V__Number=Sing|Tense=Pres|VerbForm=Part": {POS: VERB},
- "V__Tense=Past|VerbForm=Part": {POS: VERB},
- "V__VerbForm=Ger": {POS: VERB},
- "V__VerbForm=Inf": {POS: VERB},
- "X___": {POS: X},
- "_SP": {POS: SPACE},
-}
diff --git a/spacy/lang/it/tokenizer_exceptions.py b/spacy/lang/it/tokenizer_exceptions.py
index 70519ba6a..0c9968bc6 100644
--- a/spacy/lang/it/tokenizer_exceptions.py
+++ b/spacy/lang/it/tokenizer_exceptions.py
@@ -1,6 +1,7 @@
-# coding: utf8
-from __future__ import unicode_literals
-from ...symbols import ORTH, LEMMA
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import ORTH
+from ...util import update_exc
+
_exc = {
"all'art.": [{ORTH: "all'"}, {ORTH: "art."}],
@@ -9,7 +10,7 @@ _exc = {
"L'art.": [{ORTH: "L'"}, {ORTH: "art."}],
"l'art.": [{ORTH: "l'"}, {ORTH: "art."}],
"nell'art.": [{ORTH: "nell'"}, {ORTH: "art."}],
- "po'": [{ORTH: "po'", LEMMA: "poco"}],
+ "po'": [{ORTH: "po'"}],
"sett..": [{ORTH: "sett."}, {ORTH: "."}],
}
@@ -54,4 +55,4 @@ for orth in [
]:
_exc[orth] = [{ORTH: orth}]
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
diff --git a/spacy/lang/ja/__init__.py b/spacy/lang/ja/__init__.py
index 80cb7a837..117514c09 100644
--- a/spacy/lang/ja/__init__.py
+++ b/spacy/lang/ja/__init__.py
@@ -1,26 +1,180 @@
-# encoding: utf8
-from __future__ import unicode_literals, print_function
-
+from typing import Optional, Union, Dict, Any
+from pathlib import Path
import srsly
-from collections import namedtuple, OrderedDict
+from collections import namedtuple
+from thinc.api import Config
from .stop_words import STOP_WORDS
from .syntax_iterators import SYNTAX_ITERATORS
from .tag_map import TAG_MAP
from .tag_orth_map import TAG_ORTH_MAP
from .tag_bigram_map import TAG_BIGRAM_MAP
-from ...attrs import LANG
from ...compat import copy_reg
from ...errors import Errors
from ...language import Language
from ...symbols import POS
from ...tokens import Doc
-from ...util import DummyTokenizer
+from ...util import DummyTokenizer, registry
from ... import util
+DEFAULT_CONFIG = """
+[nlp]
+
+[nlp.tokenizer]
+@tokenizers = "spacy.ja.JapaneseTokenizer"
+split_mode = null
+"""
+
+
+@registry.tokenizers("spacy.ja.JapaneseTokenizer")
+def create_tokenizer(split_mode: Optional[str] = None):
+ def japanese_tokenizer_factory(nlp):
+ return JapaneseTokenizer(nlp, split_mode=split_mode)
+
+ return japanese_tokenizer_factory
+
+
+class JapaneseTokenizer(DummyTokenizer):
+ def __init__(self, nlp: Language, split_mode: Optional[str] = None) -> None:
+ self.vocab = nlp.vocab
+ self.split_mode = split_mode
+ self.tokenizer = try_sudachi_import(self.split_mode)
+
+ def __call__(self, text: str) -> Doc:
+ # convert sudachipy.morpheme.Morpheme to DetailedToken and merge continuous spaces
+ sudachipy_tokens = self.tokenizer.tokenize(text)
+ dtokens = self._get_dtokens(sudachipy_tokens)
+ dtokens, spaces = get_dtokens_and_spaces(dtokens, text)
+
+ # create Doc with tag bi-gram based part-of-speech identification rules
+ words, tags, inflections, lemmas, readings, sub_tokens_list = (
+ zip(*dtokens) if dtokens else [[]] * 6
+ )
+ sub_tokens_list = list(sub_tokens_list)
+ doc = Doc(self.vocab, words=words, spaces=spaces)
+ next_pos = None # for bi-gram rules
+ for idx, (token, dtoken) in enumerate(zip(doc, dtokens)):
+ token.tag_ = dtoken.tag
+ if next_pos: # already identified in previous iteration
+ token.pos = next_pos
+ next_pos = None
+ else:
+ token.pos, next_pos = resolve_pos(
+ token.orth_,
+ dtoken.tag,
+ tags[idx + 1] if idx + 1 < len(tags) else None,
+ )
+ # if there's no lemma info (it's an unk) just use the surface
+ token.lemma_ = dtoken.lemma if dtoken.lemma else dtoken.surface
+ doc.user_data["inflections"] = inflections
+ doc.user_data["reading_forms"] = readings
+ doc.user_data["sub_tokens"] = sub_tokens_list
+ return doc
+
+ def _get_dtokens(self, sudachipy_tokens, need_sub_tokens: bool = True):
+ sub_tokens_list = (
+ self._get_sub_tokens(sudachipy_tokens) if need_sub_tokens else None
+ )
+ dtokens = [
+ DetailedToken(
+ token.surface(), # orth
+ "-".join([xx for xx in token.part_of_speech()[:4] if xx != "*"]), # tag
+ ",".join([xx for xx in token.part_of_speech()[4:] if xx != "*"]), # inf
+ token.dictionary_form(), # lemma
+ token.reading_form(), # user_data['reading_forms']
+ sub_tokens_list[idx]
+ if sub_tokens_list
+ else None, # user_data['sub_tokens']
+ )
+ for idx, token in enumerate(sudachipy_tokens)
+ if len(token.surface()) > 0
+ # remove empty tokens which can be produced with characters like … that
+ ]
+ # Sudachi normalizes internally and outputs each space char as a token.
+ # This is the preparation for get_dtokens_and_spaces() to merge the continuous space tokens
+ return [
+ t
+ for idx, t in enumerate(dtokens)
+ if idx == 0
+ or not t.surface.isspace()
+ or t.tag != "空白"
+ or not dtokens[idx - 1].surface.isspace()
+ or dtokens[idx - 1].tag != "空白"
+ ]
+
+ def _get_sub_tokens(self, sudachipy_tokens):
+ if (
+ self.split_mode is None or self.split_mode == "A"
+ ): # do nothing for default split mode
+ return None
+
+ sub_tokens_list = [] # list of (list of list of DetailedToken | None)
+ for token in sudachipy_tokens:
+ sub_a = token.split(self.tokenizer.SplitMode.A)
+ if len(sub_a) == 1: # no sub tokens
+ sub_tokens_list.append(None)
+ elif self.split_mode == "B":
+ sub_tokens_list.append([self._get_dtokens(sub_a, False)])
+ else: # "C"
+ sub_b = token.split(self.tokenizer.SplitMode.B)
+ if len(sub_a) == len(sub_b):
+ dtokens = self._get_dtokens(sub_a, False)
+ sub_tokens_list.append([dtokens, dtokens])
+ else:
+ sub_tokens_list.append(
+ [
+ self._get_dtokens(sub_a, False),
+ self._get_dtokens(sub_b, False),
+ ]
+ )
+ return sub_tokens_list
+
+ def _get_config(self) -> Dict[str, Any]:
+ return {"split_mode": self.split_mode}
+
+ def _set_config(self, config: Dict[str, Any] = {}) -> None:
+ self.split_mode = config.get("split_mode", None)
+
+ def to_bytes(self, **kwargs) -> bytes:
+ serializers = {"cfg": lambda: srsly.json_dumps(self._get_config())}
+ return util.to_bytes(serializers, [])
+
+ def from_bytes(self, data: bytes, **kwargs) -> "JapaneseTokenizer":
+ deserializers = {"cfg": lambda b: self._set_config(srsly.json_loads(b))}
+ util.from_bytes(data, deserializers, [])
+ self.tokenizer = try_sudachi_import(self.split_mode)
+ return self
+
+ def to_disk(self, path: Union[str, Path], **kwargs) -> None:
+ path = util.ensure_path(path)
+ serializers = {"cfg": lambda p: srsly.write_json(p, self._get_config())}
+ return util.to_disk(path, serializers, [])
+
+ def from_disk(self, path: Union[str, Path], **kwargs) -> "JapaneseTokenizer":
+ path = util.ensure_path(path)
+ serializers = {"cfg": lambda p: self._set_config(srsly.read_json(p))}
+ util.from_disk(path, serializers, [])
+ self.tokenizer = try_sudachi_import(self.split_mode)
+ return self
+
+
+class JapaneseDefaults(Language.Defaults):
+ config = Config().from_str(DEFAULT_CONFIG)
+ stop_words = STOP_WORDS
+ syntax_iterators = SYNTAX_ITERATORS
+ writing_system = {"direction": "ltr", "has_case": False, "has_letters": False}
+
+
+class Japanese(Language):
+ lang = "ja"
+ Defaults = JapaneseDefaults
+
+
# Hold the attributes we need with convenient names
-DetailedToken = namedtuple("DetailedToken", ["surface", "tag", "inf", "lemma", "reading", "sub_tokens"])
+DetailedToken = namedtuple(
+ "DetailedToken", ["surface", "tag", "inf", "lemma", "reading", "sub_tokens"]
+)
def try_sudachi_import(split_mode="A"):
@@ -29,15 +183,14 @@ def try_sudachi_import(split_mode="A"):
split_mode should be one of these values: "A", "B", "C", None->"A"."""
try:
from sudachipy import dictionary, tokenizer
+
split_mode = {
None: tokenizer.Tokenizer.SplitMode.A,
"A": tokenizer.Tokenizer.SplitMode.A,
"B": tokenizer.Tokenizer.SplitMode.B,
"C": tokenizer.Tokenizer.SplitMode.C,
}[split_mode]
- tok = dictionary.Dictionary().create(
- mode=split_mode
- )
+ tok = dictionary.Dictionary().create(mode=split_mode)
return tok
except ImportError:
raise ImportError(
@@ -45,7 +198,7 @@ def try_sudachi_import(split_mode="A"):
"(https://github.com/WorksApplications/SudachiPy). "
"Install with `pip install sudachipy sudachidict_core` or "
"install spaCy with `pip install spacy[ja]`."
- )
+ ) from None
def resolve_pos(orth, tag, next_tag):
@@ -71,7 +224,10 @@ def resolve_pos(orth, tag, next_tag):
if tag_bigram in TAG_BIGRAM_MAP:
current_pos, next_pos = TAG_BIGRAM_MAP[tag_bigram]
if current_pos is None: # apply tag uni-gram mapping for current_pos
- return TAG_MAP[tag][POS], next_pos # only next_pos is identified by tag bi-gram mapping
+ return (
+ TAG_MAP[tag][POS],
+ next_pos,
+ ) # only next_pos is identified by tag bi-gram mapping
else:
return current_pos, next_pos
@@ -93,7 +249,7 @@ def get_dtokens_and_spaces(dtokens, text, gap_tag="空白"):
return text_dtokens, text_spaces
elif len([word for word in words if not word.isspace()]) == 0:
assert text.isspace()
- text_dtokens = [DetailedToken(text, gap_tag, '', text, None, None)]
+ text_dtokens = [DetailedToken(text, gap_tag, "", text, None, None)]
text_spaces = [False]
return text_dtokens, text_spaces
@@ -105,12 +261,12 @@ def get_dtokens_and_spaces(dtokens, text, gap_tag="空白"):
try:
word_start = text[text_pos:].index(word)
except ValueError:
- raise ValueError(Errors.E194.format(text=text, words=words))
+ raise ValueError(Errors.E194.format(text=text, words=words)) from None
# space token
if word_start > 0:
- w = text[text_pos:text_pos + word_start]
- text_dtokens.append(DetailedToken(w, gap_tag, '', w, None, None))
+ w = text[text_pos : text_pos + word_start]
+ text_dtokens.append(DetailedToken(w, gap_tag, "", w, None, None))
text_spaces.append(False)
text_pos += word_start
@@ -126,162 +282,12 @@ def get_dtokens_and_spaces(dtokens, text, gap_tag="空白"):
# trailing space token
if text_pos < len(text):
w = text[text_pos:]
- text_dtokens.append(DetailedToken(w, gap_tag, '', w, None, None))
+ text_dtokens.append(DetailedToken(w, gap_tag, "", w, None, None))
text_spaces.append(False)
return text_dtokens, text_spaces
-class JapaneseTokenizer(DummyTokenizer):
- def __init__(self, cls, nlp=None, config={}):
- self.vocab = nlp.vocab if nlp is not None else cls.create_vocab(nlp)
- self.split_mode = config.get("split_mode", None)
- self.tokenizer = try_sudachi_import(self.split_mode)
-
- def __call__(self, text):
- # convert sudachipy.morpheme.Morpheme to DetailedToken and merge continuous spaces
- sudachipy_tokens = self.tokenizer.tokenize(text)
- dtokens = self._get_dtokens(sudachipy_tokens)
- dtokens, spaces = get_dtokens_and_spaces(dtokens, text)
-
- # create Doc with tag bi-gram based part-of-speech identification rules
- words, tags, inflections, lemmas, readings, sub_tokens_list = zip(*dtokens) if dtokens else [[]] * 6
- sub_tokens_list = list(sub_tokens_list)
- doc = Doc(self.vocab, words=words, spaces=spaces)
- next_pos = None # for bi-gram rules
- for idx, (token, dtoken) in enumerate(zip(doc, dtokens)):
- token.tag_ = dtoken.tag
- if next_pos: # already identified in previous iteration
- token.pos = next_pos
- next_pos = None
- else:
- token.pos, next_pos = resolve_pos(
- token.orth_,
- dtoken.tag,
- tags[idx + 1] if idx + 1 < len(tags) else None
- )
- # if there's no lemma info (it's an unk) just use the surface
- token.lemma_ = dtoken.lemma if dtoken.lemma else dtoken.surface
-
- doc.user_data["inflections"] = inflections
- doc.user_data["reading_forms"] = readings
- doc.user_data["sub_tokens"] = sub_tokens_list
- doc.is_tagged = True
-
- return doc
-
- def _get_dtokens(self, sudachipy_tokens, need_sub_tokens=True):
- sub_tokens_list = self._get_sub_tokens(sudachipy_tokens) if need_sub_tokens else None
- dtokens = [
- DetailedToken(
- token.surface(), # orth
- '-'.join([xx for xx in token.part_of_speech()[:4] if xx != '*']), # tag
- ','.join([xx for xx in token.part_of_speech()[4:] if xx != '*']), # inf
- token.dictionary_form(), # lemma
- token.reading_form(), # user_data['reading_forms']
- sub_tokens_list[idx] if sub_tokens_list else None, # user_data['sub_tokens']
- ) for idx, token in enumerate(sudachipy_tokens) if len(token.surface()) > 0
- # remove empty tokens which can be produced with characters like … that
- ]
- # Sudachi normalizes internally and outputs each space char as a token.
- # This is the preparation for get_dtokens_and_spaces() to merge the continuous space tokens
- return [
- t for idx, t in enumerate(dtokens) if
- idx == 0 or
- not t.surface.isspace() or t.tag != '空白' or
- not dtokens[idx - 1].surface.isspace() or dtokens[idx - 1].tag != '空白'
- ]
-
- def _get_sub_tokens(self, sudachipy_tokens):
- if self.split_mode is None or self.split_mode == "A": # do nothing for default split mode
- return None
-
- sub_tokens_list = [] # list of (list of list of DetailedToken | None)
- for token in sudachipy_tokens:
- sub_a = token.split(self.tokenizer.SplitMode.A)
- if len(sub_a) == 1: # no sub tokens
- sub_tokens_list.append(None)
- elif self.split_mode == "B":
- sub_tokens_list.append([self._get_dtokens(sub_a, False)])
- else: # "C"
- sub_b = token.split(self.tokenizer.SplitMode.B)
- if len(sub_a) == len(sub_b):
- dtokens = self._get_dtokens(sub_a, False)
- sub_tokens_list.append([dtokens, dtokens])
- else:
- sub_tokens_list.append([self._get_dtokens(sub_a, False), self._get_dtokens(sub_b, False)])
- return sub_tokens_list
-
- def _get_config(self):
- config = OrderedDict(
- (
- ("split_mode", self.split_mode),
- )
- )
- return config
-
- def _set_config(self, config={}):
- self.split_mode = config.get("split_mode", None)
-
- def to_bytes(self, **kwargs):
- serializers = OrderedDict(
- (
- ("cfg", lambda: srsly.json_dumps(self._get_config())),
- )
- )
- return util.to_bytes(serializers, [])
-
- def from_bytes(self, data, **kwargs):
- deserializers = OrderedDict(
- (
- ("cfg", lambda b: self._set_config(srsly.json_loads(b))),
- )
- )
- util.from_bytes(data, deserializers, [])
- self.tokenizer = try_sudachi_import(self.split_mode)
- return self
-
- def to_disk(self, path, **kwargs):
- path = util.ensure_path(path)
- serializers = OrderedDict(
- (
- ("cfg", lambda p: srsly.write_json(p, self._get_config())),
- )
- )
- return util.to_disk(path, serializers, [])
-
- def from_disk(self, path, **kwargs):
- path = util.ensure_path(path)
- serializers = OrderedDict(
- (
- ("cfg", lambda p: self._set_config(srsly.read_json(p))),
- )
- )
- util.from_disk(path, serializers, [])
- self.tokenizer = try_sudachi_import(self.split_mode)
-
-
-class JapaneseDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda _text: "ja"
- stop_words = STOP_WORDS
- tag_map = TAG_MAP
- syntax_iterators = SYNTAX_ITERATORS
- writing_system = {"direction": "ltr", "has_case": False, "has_letters": False}
-
- @classmethod
- def create_tokenizer(cls, nlp=None, config={}):
- return JapaneseTokenizer(cls, nlp, config)
-
-
-class Japanese(Language):
- lang = "ja"
- Defaults = JapaneseDefaults
-
- def make_doc(self, text):
- return self.tokenizer(text)
-
-
def pickle_japanese(instance):
return Japanese, tuple()
diff --git a/spacy/lang/ja/examples.py b/spacy/lang/ja/examples.py
index e00001ed5..c3a011862 100644
--- a/spacy/lang/ja/examples.py
+++ b/spacy/lang/ja/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/ja/stop_words.py b/spacy/lang/ja/stop_words.py
index bb232a2d2..98560d7e2 100644
--- a/spacy/lang/ja/stop_words.py
+++ b/spacy/lang/ja/stop_words.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
# This list was created by taking the top 2000 words from a Wikipedia dump and
# filtering out everything that wasn't hiragana. ー (one) was also added.
# Considered keeping some non-hiragana words but too many place names were
diff --git a/spacy/lang/ja/syntax_iterators.py b/spacy/lang/ja/syntax_iterators.py
index cd1e4fde7..cca4902ab 100644
--- a/spacy/lang/ja/syntax_iterators.py
+++ b/spacy/lang/ja/syntax_iterators.py
@@ -1,35 +1,23 @@
-# coding: utf8
-from __future__ import unicode_literals
+from typing import Union, Iterator
from ...symbols import NOUN, PROPN, PRON, VERB
+from ...tokens import Doc, Span
-# XXX this can probably be pruned a bit
-labels = [
- "nsubj",
- "nmod",
- "dobj",
- "nsubjpass",
- "pcomp",
- "pobj",
- "obj",
- "obl",
- "dative",
- "appos",
- "attr",
- "ROOT",
-]
-def noun_chunks(obj):
- """
- Detect base noun phrases from a dependency parse. Works on both Doc and Span.
- """
+# TODO: this can probably be pruned a bit
+# fmt: off
+labels = ["nsubj", "nmod", "ddoclike", "nsubjpass", "pcomp", "pdoclike", "doclike", "obl", "dative", "appos", "attr", "ROOT"]
+# fmt: on
- doc = obj.doc # Ensure works on both Doc and Span.
+
+def noun_chunks(doclike: Union[Doc, Span]) -> Iterator[Span]:
+ """Detect base noun phrases from a dependency parse. Works on Doc and Span."""
+ doc = doclike.doc # Ensure works on both Doc and Span.
np_deps = [doc.vocab.strings.add(label) for label in labels]
- conj = doc.vocab.strings.add("conj")
+ doc.vocab.strings.add("conj")
np_label = doc.vocab.strings.add("NP")
seen = set()
- for i, word in enumerate(obj):
+ for i, word in enumerate(doclike):
if word.pos not in (NOUN, PROPN, PRON):
continue
# Prevent nested chunks from being produced
@@ -39,12 +27,10 @@ def noun_chunks(obj):
unseen = [w.i for w in word.subtree if w.i not in seen]
if not unseen:
continue
-
# this takes care of particles etc.
seen.update(j.i for j in word.subtree)
# This avoids duplicating embedded clauses
seen.update(range(word.i + 1))
-
# if the head of this is a verb, mark that and rights seen
# Don't do the subtree as that can hide other phrases
if word.head.pos == VERB:
@@ -52,4 +38,5 @@ def noun_chunks(obj):
seen.update(w.i for w in word.head.rights)
yield unseen[0], word.i + 1, np_label
+
SYNTAX_ITERATORS = {"noun_chunks": noun_chunks}
diff --git a/spacy/lang/ja/tag_bigram_map.py b/spacy/lang/ja/tag_bigram_map.py
index 5ed9aec89..9d15fc520 100644
--- a/spacy/lang/ja/tag_bigram_map.py
+++ b/spacy/lang/ja/tag_bigram_map.py
@@ -1,21 +1,15 @@
-# encoding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, ADJ, AUX, NOUN, PART, VERB
+from ...symbols import ADJ, AUX, NOUN, PART, VERB
# mapping from tag bi-gram to pos of previous token
TAG_BIGRAM_MAP = {
# This covers only small part of AUX.
("形容詞-非自立可能", "助詞-終助詞"): (AUX, None),
-
("名詞-普通名詞-形状詞可能", "助動詞"): (ADJ, None),
# ("副詞", "名詞-普通名詞-形状詞可能"): (None, ADJ),
-
# This covers acl, advcl, obl and root, but has side effect for compound.
("名詞-普通名詞-サ変可能", "動詞-非自立可能"): (VERB, AUX),
# This covers almost all of the deps
("名詞-普通名詞-サ変形状詞可能", "動詞-非自立可能"): (VERB, AUX),
-
("名詞-普通名詞-副詞可能", "動詞-非自立可能"): (None, VERB),
("副詞", "動詞-非自立可能"): (None, VERB),
("形容詞-一般", "動詞-非自立可能"): (None, VERB),
@@ -25,12 +19,9 @@ TAG_BIGRAM_MAP = {
("助詞-副助詞", "動詞-非自立可能"): (None, VERB),
("助詞-格助詞", "動詞-非自立可能"): (None, VERB),
("補助記号-読点", "動詞-非自立可能"): (None, VERB),
-
("形容詞-一般", "接尾辞-名詞的-一般"): (None, PART),
-
("助詞-格助詞", "形状詞-助動詞語幹"): (None, NOUN),
("連体詞", "形状詞-助動詞語幹"): (None, NOUN),
-
("動詞-一般", "助詞-副助詞"): (None, PART),
("動詞-非自立可能", "助詞-副助詞"): (None, PART),
("助動詞", "助詞-副助詞"): (None, PART),
diff --git a/spacy/lang/ja/tag_map.py b/spacy/lang/ja/tag_map.py
index ad416e109..c6de3831a 100644
--- a/spacy/lang/ja/tag_map.py
+++ b/spacy/lang/ja/tag_map.py
@@ -1,8 +1,5 @@
-# encoding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, PUNCT, INTJ, X, ADJ, AUX, ADP, PART, CCONJ, SCONJ, NOUN
-from ...symbols import SYM, PRON, VERB, ADV, PROPN, NUM, DET, SPACE
+from ...symbols import POS, PUNCT, INTJ, ADJ, AUX, ADP, PART, SCONJ, NOUN
+from ...symbols import SYM, PRON, VERB, ADV, PROPN, NUM, DET, SPACE, CCONJ
TAG_MAP = {
@@ -11,94 +8,61 @@ TAG_MAP = {
# Universal Dependencies Mapping: (Some of the entries in this mapping are updated to v2.6 in the list below)
# http://universaldependencies.org/ja/overview/morphology.html
# http://universaldependencies.org/ja/pos/all.html
- "記号-一般": {
- POS: NOUN
- }, # this includes characters used to represent sounds like ドレミ
+ "記号-一般": {POS: NOUN}, # this includes characters used to represent sounds like ドレミ
"記号-文字": {
POS: NOUN
}, # this is for Greek and Latin characters having some meanings, or used as symbols, as in math
"感動詞-フィラー": {POS: INTJ},
"感動詞-一般": {POS: INTJ},
-
"空白": {POS: SPACE},
-
"形状詞-一般": {POS: ADJ},
"形状詞-タリ": {POS: ADJ},
"形状詞-助動詞語幹": {POS: AUX},
-
"形容詞-一般": {POS: ADJ},
-
"形容詞-非自立可能": {POS: ADJ}, # XXX ADJ if alone, AUX otherwise
-
"助詞-格助詞": {POS: ADP},
-
"助詞-係助詞": {POS: ADP},
-
"助詞-終助詞": {POS: PART},
"助詞-準体助詞": {POS: SCONJ}, # の as in 走るのが速い
"助詞-接続助詞": {POS: SCONJ}, # verb ending て0
-
"助詞-副助詞": {POS: ADP}, # ばかり, つつ after a verb
-
"助動詞": {POS: AUX},
-
"接続詞": {POS: CCONJ}, # XXX: might need refinement
"接頭辞": {POS: NOUN},
"接尾辞-形状詞的": {POS: PART}, # がち, チック
-
"接尾辞-形容詞的": {POS: AUX}, # -らしい
-
"接尾辞-動詞的": {POS: PART}, # -じみ
"接尾辞-名詞的-サ変可能": {POS: NOUN}, # XXX see 名詞,普通名詞,サ変可能,*
"接尾辞-名詞的-一般": {POS: NOUN},
"接尾辞-名詞的-助数詞": {POS: NOUN},
"接尾辞-名詞的-副詞可能": {POS: NOUN}, # -後, -過ぎ
-
"代名詞": {POS: PRON},
-
"動詞-一般": {POS: VERB},
-
"動詞-非自立可能": {POS: AUX}, # XXX VERB if alone, AUX otherwise
-
"副詞": {POS: ADV},
-
"補助記号-AA-一般": {POS: SYM}, # text art
"補助記号-AA-顔文字": {POS: PUNCT}, # kaomoji
-
"補助記号-一般": {POS: SYM},
-
"補助記号-括弧開": {POS: PUNCT}, # open bracket
"補助記号-括弧閉": {POS: PUNCT}, # close bracket
"補助記号-句点": {POS: PUNCT}, # period or other EOS marker
"補助記号-読点": {POS: PUNCT}, # comma
-
"名詞-固有名詞-一般": {POS: PROPN}, # general proper noun
"名詞-固有名詞-人名-一般": {POS: PROPN}, # person's name
"名詞-固有名詞-人名-姓": {POS: PROPN}, # surname
"名詞-固有名詞-人名-名": {POS: PROPN}, # first name
"名詞-固有名詞-地名-一般": {POS: PROPN}, # place name
"名詞-固有名詞-地名-国": {POS: PROPN}, # country name
-
"名詞-助動詞語幹": {POS: AUX},
"名詞-数詞": {POS: NUM}, # includes Chinese numerals
-
"名詞-普通名詞-サ変可能": {POS: NOUN}, # XXX: sometimes VERB in UDv2; suru-verb noun
-
"名詞-普通名詞-サ変形状詞可能": {POS: NOUN},
-
"名詞-普通名詞-一般": {POS: NOUN},
-
"名詞-普通名詞-形状詞可能": {POS: NOUN}, # XXX: sometimes ADJ in UDv2
-
"名詞-普通名詞-助数詞可能": {POS: NOUN}, # counter / unit
-
"名詞-普通名詞-副詞可能": {POS: NOUN},
-
"連体詞": {POS: DET}, # XXX this has exceptions based on literal token
-
# GSD tags. These aren't in Unidic, but we need them for the GSD data.
"外国語": {POS: PROPN}, # Foreign words
-
"絵文字・記号等": {POS: SYM}, # emoji / kaomoji ^^;
-
}
diff --git a/spacy/lang/ja/tag_orth_map.py b/spacy/lang/ja/tag_orth_map.py
index 355cc655b..9d32cdea7 100644
--- a/spacy/lang/ja/tag_orth_map.py
+++ b/spacy/lang/ja/tag_orth_map.py
@@ -1,17 +1,9 @@
-# encoding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, ADJ, AUX, DET, PART, PRON, SPACE ,X
+from ...symbols import DET, PART, PRON, SPACE, X
# mapping from tag bi-gram to pos of previous token
TAG_ORTH_MAP = {
- "空白": {
- " ": SPACE,
- " ": X,
- },
- "助詞-副助詞": {
- "たり": PART,
- },
+ "空白": {" ": SPACE, " ": X},
+ "助詞-副助詞": {"たり": PART},
"連体詞": {
"あの": DET,
"かの": DET,
diff --git a/spacy/lang/kn/__init__.py b/spacy/lang/kn/__init__.py
index c86354248..8e53989e6 100644
--- a/spacy/lang/kn/__init__.py
+++ b/spacy/lang/kn/__init__.py
@@ -1,14 +1,8 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
from ...language import Language
-from ...attrs import LANG
class KannadaDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "kn"
stop_words = STOP_WORDS
diff --git a/spacy/lang/kn/examples.py b/spacy/lang/kn/examples.py
index d82630432..3e055752e 100644
--- a/spacy/lang/kn/examples.py
+++ b/spacy/lang/kn/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/kn/stop_words.py b/spacy/lang/kn/stop_words.py
index 652341e73..dba9740af 100644
--- a/spacy/lang/kn/stop_words.py
+++ b/spacy/lang/kn/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
STOP_WORDS = set(
"""
ಹಲವು
diff --git a/spacy/lang/ko/__init__.py b/spacy/lang/ko/__init__.py
index 21a754168..47a3887a6 100644
--- a/spacy/lang/ko/__init__.py
+++ b/spacy/lang/ko/__init__.py
@@ -1,65 +1,54 @@
-# encoding: utf8
-from __future__ import unicode_literals, print_function
+from typing import Optional, Any, Dict
+from thinc.api import Config
from .stop_words import STOP_WORDS
from .tag_map import TAG_MAP
-from ...attrs import LANG
+from .lex_attrs import LEX_ATTRS
from ...language import Language
from ...tokens import Doc
from ...compat import copy_reg
-from ...util import DummyTokenizer
+from ...symbols import POS
+from ...util import DummyTokenizer, registry
-def try_mecab_import():
- try:
- from natto import MeCab
+DEFAULT_CONFIG = """
+[nlp]
- return MeCab
- except ImportError:
- raise ImportError(
- "Korean support requires [mecab-ko](https://bitbucket.org/eunjeon/mecab-ko/src/master/README.md), "
- "[mecab-ko-dic](https://bitbucket.org/eunjeon/mecab-ko-dic), "
- "and [natto-py](https://github.com/buruzaemon/natto-py)"
- )
+[nlp.tokenizer]
+@tokenizers = "spacy.ko.KoreanTokenizer"
+"""
-# fmt: on
+@registry.tokenizers("spacy.ko.KoreanTokenizer")
+def create_tokenizer():
+ def korean_tokenizer_factory(nlp):
+ return KoreanTokenizer(nlp)
-
-def check_spaces(text, tokens):
- prev_end = -1
- start = 0
- for token in tokens:
- idx = text.find(token, start)
- if prev_end > 0:
- yield prev_end != idx
- prev_end = idx + len(token)
- start = prev_end
- if start > 0:
- yield False
+ return korean_tokenizer_factory
class KoreanTokenizer(DummyTokenizer):
- def __init__(self, cls, nlp=None):
- self.vocab = nlp.vocab if nlp is not None else cls.create_vocab(nlp)
+ def __init__(self, nlp: Optional[Language] = None):
+ self.vocab = nlp.vocab
MeCab = try_mecab_import()
self.mecab_tokenizer = MeCab("-F%f[0],%f[7]")
def __del__(self):
self.mecab_tokenizer.__del__()
- def __call__(self, text):
+ def __call__(self, text: str) -> Doc:
dtokens = list(self.detailed_tokens(text))
surfaces = [dt["surface"] for dt in dtokens]
doc = Doc(self.vocab, words=surfaces, spaces=list(check_spaces(text, surfaces)))
for token, dtoken in zip(doc, dtokens):
first_tag, sep, eomi_tags = dtoken["tag"].partition("+")
token.tag_ = first_tag # stem(어간) or pre-final(선어말 어미)
+ token.pos = TAG_MAP[token.tag_][POS]
token.lemma_ = dtoken["lemma"]
doc.user_data["full_tags"] = [dt["tag"] for dt in dtokens]
return doc
- def detailed_tokens(self, text):
+ def detailed_tokens(self, text: str) -> Dict[str, Any]:
# 품사 태그(POS)[0], 의미 부류(semantic class)[1], 종성 유무(jongseong)[2], 읽기(reading)[3],
# 타입(type)[4], 첫번째 품사(start pos)[5], 마지막 품사(end pos)[6], 표현(expression)[7], *
for node in self.mecab_tokenizer.parse(text, as_nodes=True):
@@ -75,23 +64,41 @@ class KoreanTokenizer(DummyTokenizer):
class KoreanDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda _text: "ko"
+ config = Config().from_str(DEFAULT_CONFIG)
+ lex_attr_getters = LEX_ATTRS
stop_words = STOP_WORDS
- tag_map = TAG_MAP
writing_system = {"direction": "ltr", "has_case": False, "has_letters": False}
- @classmethod
- def create_tokenizer(cls, nlp=None):
- return KoreanTokenizer(cls, nlp)
-
class Korean(Language):
lang = "ko"
Defaults = KoreanDefaults
- def make_doc(self, text):
- return self.tokenizer(text)
+
+def try_mecab_import() -> None:
+ try:
+ from natto import MeCab
+
+ return MeCab
+ except ImportError:
+ raise ImportError(
+ "Korean support requires [mecab-ko](https://bitbucket.org/eunjeon/mecab-ko/src/master/README.md), "
+ "[mecab-ko-dic](https://bitbucket.org/eunjeon/mecab-ko-dic), "
+ "and [natto-py](https://github.com/buruzaemon/natto-py)"
+ ) from None
+
+
+def check_spaces(text, tokens):
+ prev_end = -1
+ start = 0
+ for token in tokens:
+ idx = text.find(token, start)
+ if prev_end > 0:
+ yield prev_end != idx
+ prev_end = idx + len(token)
+ start = prev_end
+ if start > 0:
+ yield False
def pickle_korean(instance):
diff --git a/spacy/lang/ko/examples.py b/spacy/lang/ko/examples.py
index 0306e5db8..edb755eaa 100644
--- a/spacy/lang/ko/examples.py
+++ b/spacy/lang/ko/examples.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/ko/lex_attrs.py b/spacy/lang/ko/lex_attrs.py
index 1904a0ece..ac5bc7e48 100644
--- a/spacy/lang/ko/lex_attrs.py
+++ b/spacy/lang/ko/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ...attrs import LIKE_NUM
diff --git a/spacy/lang/ko/stop_words.py b/spacy/lang/ko/stop_words.py
index 676dca1b4..3eba9fc82 100644
--- a/spacy/lang/ko/stop_words.py
+++ b/spacy/lang/ko/stop_words.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
STOP_WORDS = set(
"""
이
diff --git a/spacy/lang/ko/tag_map.py b/spacy/lang/ko/tag_map.py
index 57317c969..26a8c56b9 100644
--- a/spacy/lang/ko/tag_map.py
+++ b/spacy/lang/ko/tag_map.py
@@ -1,6 +1,3 @@
-# encoding: utf8
-from __future__ import unicode_literals
-
from ...symbols import POS, PUNCT, INTJ, X, SYM, ADJ, AUX, ADP, CONJ, NOUN, PRON
from ...symbols import VERB, ADV, PROPN, NUM, DET
diff --git a/spacy/lang/lb/__init__.py b/spacy/lang/lb/__init__.py
index 8d85b8fc7..da6fe55d7 100644
--- a/spacy/lang/lb/__init__.py
+++ b/spacy/lang/lb/__init__.py
@@ -1,26 +1,15 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .punctuation import TOKENIZER_INFIXES
from .lex_attrs import LEX_ATTRS
-from .tag_map import TAG_MAP
from .stop_words import STOP_WORDS
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...language import Language
-from ...attrs import LANG
-from ...util import update_exc
class LuxembourgishDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters.update(LEX_ATTRS)
- lex_attr_getters[LANG] = lambda text: "lb"
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
- stop_words = STOP_WORDS
- tag_map = TAG_MAP
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
infixes = TOKENIZER_INFIXES
+ lex_attr_getters = LEX_ATTRS
+ stop_words = STOP_WORDS
class Luxembourgish(Language):
diff --git a/spacy/lang/lb/examples.py b/spacy/lang/lb/examples.py
index 3cbba31d9..a7a10489c 100644
--- a/spacy/lang/lb/examples.py
+++ b/spacy/lang/lb/examples.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/lb/lex_attrs.py b/spacy/lang/lb/lex_attrs.py
index e38c74974..d2d50d9dc 100644
--- a/spacy/lang/lb/lex_attrs.py
+++ b/spacy/lang/lb/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ...attrs import LIKE_NUM
diff --git a/spacy/lang/lb/punctuation.py b/spacy/lang/lb/punctuation.py
index 2a4587856..e382c56c5 100644
--- a/spacy/lang/lb/punctuation.py
+++ b/spacy/lang/lb/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..char_classes import LIST_ELLIPSES, LIST_ICONS, ALPHA, ALPHA_LOWER, ALPHA_UPPER
ELISION = " ' ’ ".strip().replace(" ", "")
diff --git a/spacy/lang/lb/stop_words.py b/spacy/lang/lb/stop_words.py
index 41e6f79d2..8f22ea6e6 100644
--- a/spacy/lang/lb/stop_words.py
+++ b/spacy/lang/lb/stop_words.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
STOP_WORDS = set(
"""
a
diff --git a/spacy/lang/lb/tag_map.py b/spacy/lang/lb/tag_map.py
deleted file mode 100644
index 424a83bb4..000000000
--- a/spacy/lang/lb/tag_map.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, PUNCT, ADJ, CONJ, NUM, DET, ADV, ADP, X, VERB
-from ...symbols import NOUN, PART, SPACE, AUX
-
-# TODO: tag map is still using POS tags from an internal training set.
-# These POS tags have to be modified to match those from Universal Dependencies
-
-TAG_MAP = {
- "$": {POS: PUNCT},
- "ADJ": {POS: ADJ},
- "AV": {POS: ADV},
- "APPR": {POS: ADP, "AdpType": "prep"},
- "APPRART": {POS: ADP, "AdpType": "prep", "PronType": "art"},
- "D": {POS: DET, "PronType": "art"},
- "KO": {POS: CONJ},
- "N": {POS: NOUN},
- "P": {POS: ADV},
- "TRUNC": {POS: X, "Hyph": "yes"},
- "AUX": {POS: AUX},
- "V": {POS: VERB},
- "MV": {POS: VERB, "VerbType": "mod"},
- "PTK": {POS: PART},
- "INTER": {POS: PART},
- "NUM": {POS: NUM},
- "_SP": {POS: SPACE},
-}
diff --git a/spacy/lang/lb/tokenizer_exceptions.py b/spacy/lang/lb/tokenizer_exceptions.py
index 1c9b2dde3..d00dc9610 100644
--- a/spacy/lang/lb/tokenizer_exceptions.py
+++ b/spacy/lang/lb/tokenizer_exceptions.py
@@ -1,7 +1,7 @@
-# coding: utf8
-from __future__ import unicode_literals
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import ORTH, NORM
+from ...util import update_exc
-from ...symbols import ORTH, LEMMA, NORM
# TODO
# treat other apostrophes within words as part of the word: [op d'mannst], [fir d'éischt] (= exceptions)
@@ -10,19 +10,19 @@ _exc = {}
# translate / delete what is not necessary
for exc_data in [
- {ORTH: "’t", LEMMA: "et", NORM: "et"},
- {ORTH: "’T", LEMMA: "et", NORM: "et"},
- {ORTH: "'t", LEMMA: "et", NORM: "et"},
- {ORTH: "'T", LEMMA: "et", NORM: "et"},
- {ORTH: "wgl.", LEMMA: "wannechgelift", NORM: "wannechgelift"},
- {ORTH: "M.", LEMMA: "Monsieur", NORM: "Monsieur"},
- {ORTH: "Mme.", LEMMA: "Madame", NORM: "Madame"},
- {ORTH: "Dr.", LEMMA: "Dokter", NORM: "Dokter"},
- {ORTH: "Tel.", LEMMA: "Telefon", NORM: "Telefon"},
- {ORTH: "asw.", LEMMA: "an sou weider", NORM: "an sou weider"},
- {ORTH: "etc.", LEMMA: "et cetera", NORM: "et cetera"},
- {ORTH: "bzw.", LEMMA: "bezéiungsweis", NORM: "bezéiungsweis"},
- {ORTH: "Jan.", LEMMA: "Januar", NORM: "Januar"},
+ {ORTH: "’t", NORM: "et"},
+ {ORTH: "’T", NORM: "et"},
+ {ORTH: "'t", NORM: "et"},
+ {ORTH: "'T", NORM: "et"},
+ {ORTH: "wgl.", NORM: "wannechgelift"},
+ {ORTH: "M.", NORM: "Monsieur"},
+ {ORTH: "Mme.", NORM: "Madame"},
+ {ORTH: "Dr.", NORM: "Dokter"},
+ {ORTH: "Tel.", NORM: "Telefon"},
+ {ORTH: "asw.", NORM: "an sou weider"},
+ {ORTH: "etc.", NORM: "et cetera"},
+ {ORTH: "bzw.", NORM: "bezéiungsweis"},
+ {ORTH: "Jan.", NORM: "Januar"},
]:
_exc[exc_data[ORTH]] = [exc_data]
@@ -50,4 +50,4 @@ for orth in [
]:
_exc[orth] = [{ORTH: orth}]
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
diff --git a/spacy/lang/lex_attrs.py b/spacy/lang/lex_attrs.py
index 254f8706d..12016c273 100644
--- a/spacy/lang/lex_attrs.py
+++ b/spacy/lang/lex_attrs.py
@@ -1,6 +1,4 @@
-# coding: utf8
-from __future__ import unicode_literals
-
+from typing import Set
import unicodedata
import re
@@ -24,21 +22,21 @@ _tlds = set(
)
-def is_punct(text):
+def is_punct(text: str) -> bool:
for char in text:
if not unicodedata.category(char).startswith("P"):
return False
return True
-def is_ascii(text):
+def is_ascii(text: str) -> bool:
for char in text:
if ord(char) >= 128:
return False
return True
-def like_num(text):
+def like_num(text: str) -> bool:
if text.startswith(("+", "-", "±", "~")):
text = text[1:]
# can be overwritten by lang with list of number words
@@ -52,64 +50,31 @@ def like_num(text):
return False
-def is_bracket(text):
+def is_bracket(text: str) -> bool:
brackets = ("(", ")", "[", "]", "{", "}", "<", ">")
return text in brackets
-def is_quote(text):
- quotes = (
- '"',
- "'",
- "`",
- "«",
- "»",
- "‘",
- "’",
- "‚",
- "‛",
- "“",
- "”",
- "„",
- "‟",
- "‹",
- "›",
- "❮",
- "❯",
- "''",
- "``",
- )
+def is_quote(text: str) -> bool:
+ # fmt: off
+ quotes = ('"', "'", "`", "«", "»", "‘", "’", "‚", "‛", "“", "”", "„", "‟", "‹", "›", "❮", "❯", "''", "``")
+ # fmt: on
return text in quotes
-def is_left_punct(text):
- left_punct = (
- "(",
- "[",
- "{",
- "<",
- '"',
- "'",
- "«",
- "‘",
- "‚",
- "‛",
- "“",
- "„",
- "‟",
- "‹",
- "❮",
- "``",
- )
+def is_left_punct(text: str) -> bool:
+ # fmt: off
+ left_punct = ("(", "[", "{", "<", '"', "'", "«", "‘", "‚", "‛", "“", "„", "‟", "‹", "❮", "``")
+ # fmt: on
return text in left_punct
-def is_right_punct(text):
+def is_right_punct(text: str) -> bool:
right_punct = (")", "]", "}", ">", '"', "'", "»", "’", "”", "›", "❯", "''")
return text in right_punct
-def is_currency(text):
+def is_currency(text: str) -> bool:
# can be overwritten by lang with list of currency words, e.g. dollar, euro
for char in text:
if unicodedata.category(char) != "Sc":
@@ -117,11 +82,11 @@ def is_currency(text):
return True
-def like_email(text):
+def like_email(text: str) -> bool:
return bool(_like_email(text))
-def like_url(text):
+def like_url(text: str) -> bool:
# We're looking for things that function in text like URLs. So, valid URL
# or not, anything they say http:// is going to be good.
if text.startswith("http://") or text.startswith("https://"):
@@ -147,7 +112,7 @@ def like_url(text):
return False
-def word_shape(text):
+def word_shape(text: str) -> str:
if len(text) >= 100:
return "LONG"
shape = []
@@ -174,46 +139,52 @@ def word_shape(text):
return "".join(shape)
-def lower(string):
+def lower(string: str) -> str:
return string.lower()
-def prefix(string):
+def prefix(string: str) -> str:
return string[0]
-def suffix(string):
+def suffix(string: str) -> str:
return string[-3:]
-def is_alpha(string):
+def is_alpha(string: str) -> bool:
return string.isalpha()
-def is_digit(string):
+def is_digit(string: str) -> bool:
return string.isdigit()
-def is_lower(string):
+def is_lower(string: str) -> bool:
return string.islower()
-def is_space(string):
+def is_space(string: str) -> bool:
return string.isspace()
-def is_title(string):
+def is_title(string: str) -> bool:
return string.istitle()
-def is_upper(string):
+def is_upper(string: str) -> bool:
return string.isupper()
-def is_stop(string, stops=set()):
+def is_stop(string: str, stops: Set[str] = set()) -> bool:
return string.lower() in stops
+def get_lang(text: str, lang: str = "") -> str:
+ # This function is partially applied so lang code can be passed in
+ # automatically while still allowing pickling
+ return lang
+
+
LEX_ATTRS = {
attrs.LOWER: lower,
attrs.NORM: lower,
diff --git a/spacy/lang/lij/__init__.py b/spacy/lang/lij/__init__.py
index 9b4b29798..5ae280324 100644
--- a/spacy/lang/lij/__init__.py
+++ b/spacy/lang/lij/__init__.py
@@ -1,26 +1,13 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .punctuation import TOKENIZER_INFIXES
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ..norm_exceptions import BASE_NORMS
from ...language import Language
-from ...attrs import LANG, NORM
-from ...util import update_exc, add_lookups
class LigurianDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "lij"
- lex_attr_getters[NORM] = add_lookups(
- Language.Defaults.lex_attr_getters[NORM], BASE_NORMS
- )
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
- stop_words = STOP_WORDS
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
infixes = TOKENIZER_INFIXES
+ stop_words = STOP_WORDS
class Ligurian(Language):
diff --git a/spacy/lang/lij/examples.py b/spacy/lang/lij/examples.py
index c4034ae7e..ba7fe43fd 100644
--- a/spacy/lang/lij/examples.py
+++ b/spacy/lang/lij/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/lij/punctuation.py b/spacy/lang/lij/punctuation.py
index 4439376c8..d50b75589 100644
--- a/spacy/lang/lij/punctuation.py
+++ b/spacy/lang/lij/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..punctuation import TOKENIZER_INFIXES
from ..char_classes import ALPHA
diff --git a/spacy/lang/lij/stop_words.py b/spacy/lang/lij/stop_words.py
index ffd53370d..1d6f09d27 100644
--- a/spacy/lang/lij/stop_words.py
+++ b/spacy/lang/lij/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
STOP_WORDS = set(
"""
a à â a-a a-e a-i a-o aiva aloa an ancheu ancon apreuvo ascì atra atre atri atro avanti avei
diff --git a/spacy/lang/lij/tokenizer_exceptions.py b/spacy/lang/lij/tokenizer_exceptions.py
index 2109add62..52eae2c89 100644
--- a/spacy/lang/lij/tokenizer_exceptions.py
+++ b/spacy/lang/lij/tokenizer_exceptions.py
@@ -1,52 +1,50 @@
-# coding: utf8
-from __future__ import unicode_literals
-from ...symbols import ORTH, LEMMA
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import ORTH
+from ...util import update_exc
+
_exc = {}
-for raw, lemma in [
- ("a-a", "a-o"),
- ("a-e", "a-o"),
- ("a-o", "a-o"),
- ("a-i", "a-o"),
- ("co-a", "co-o"),
- ("co-e", "co-o"),
- ("co-i", "co-o"),
- ("co-o", "co-o"),
- ("da-a", "da-o"),
- ("da-e", "da-o"),
- ("da-i", "da-o"),
- ("da-o", "da-o"),
- ("pe-a", "pe-o"),
- ("pe-e", "pe-o"),
- ("pe-i", "pe-o"),
- ("pe-o", "pe-o"),
+for raw in [
+ "a-e",
+ "a-o",
+ "a-i",
+ "a-a",
+ "co-a",
+ "co-e",
+ "co-i",
+ "co-o",
+ "da-a",
+ "da-e",
+ "da-i",
+ "da-o",
+ "pe-a",
+ "pe-e",
+ "pe-i",
+ "pe-o",
]:
for orth in [raw, raw.capitalize()]:
- _exc[orth] = [{ORTH: orth, LEMMA: lemma}]
+ _exc[orth] = [{ORTH: orth}]
# Prefix + prepositions with à (e.g. "sott'a-o")
-for prep, prep_lemma in [
- ("a-a", "a-o"),
- ("a-e", "a-o"),
- ("a-o", "a-o"),
- ("a-i", "a-o"),
+for prep in [
+ "a-a",
+ "a-e",
+ "a-o",
+ "a-i",
]:
- for prefix, prefix_lemma in [
- ("sott'", "sotta"),
- ("sott’", "sotta"),
- ("contr'", "contra"),
- ("contr’", "contra"),
- ("ch'", "che"),
- ("ch’", "che"),
- ("s'", "se"),
- ("s’", "se"),
+ for prefix in [
+ "sott'",
+ "sott’",
+ "contr'",
+ "contr’",
+ "ch'",
+ "ch’",
+ "s'",
+ "s’",
]:
for prefix_orth in [prefix, prefix.capitalize()]:
- _exc[prefix_orth + prep] = [
- {ORTH: prefix_orth, LEMMA: prefix_lemma},
- {ORTH: prep, LEMMA: prep_lemma},
- ]
+ _exc[prefix_orth + prep] = [{ORTH: prefix_orth}, {ORTH: prep}]
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
diff --git a/spacy/lang/lt/__init__.py b/spacy/lang/lt/__init__.py
index ce2c8d6a4..e395a8f62 100644
--- a/spacy/lang/lt/__init__.py
+++ b/spacy/lang/lt/__init__.py
@@ -1,42 +1,16 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
-from .tag_map import TAG_MAP
-from .morph_rules import MORPH_RULES
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ..norm_exceptions import BASE_NORMS
from ...language import Language
-from ...attrs import LANG, NORM
-from ...util import update_exc, add_lookups
-
-
-def _return_lt(_):
- return "lt"
class LithuanianDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = _return_lt
- lex_attr_getters[NORM] = add_lookups(
- Language.Defaults.lex_attr_getters[NORM], BASE_NORMS
- )
- lex_attr_getters.update(LEX_ATTRS)
-
infixes = TOKENIZER_INFIXES
suffixes = TOKENIZER_SUFFIXES
- mod_base_exceptions = {
- exc: val for exc, val in BASE_EXCEPTIONS.items() if not exc.endswith(".")
- }
- del mod_base_exceptions["8)"]
- tokenizer_exceptions = update_exc(mod_base_exceptions, TOKENIZER_EXCEPTIONS)
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
stop_words = STOP_WORDS
- tag_map = TAG_MAP
- morph_rules = MORPH_RULES
+ lex_attr_getters = LEX_ATTRS
class Lithuanian(Language):
diff --git a/spacy/lang/lt/examples.py b/spacy/lang/lt/examples.py
index 99dbe9d4d..eaf941f1a 100644
--- a/spacy/lang/lt/examples.py
+++ b/spacy/lang/lt/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/lt/lex_attrs.py b/spacy/lang/lt/lex_attrs.py
index 81879948f..28894a59b 100644
--- a/spacy/lang/lt/lex_attrs.py
+++ b/spacy/lang/lt/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ...attrs import LIKE_NUM
_num_words = {
diff --git a/spacy/lang/lt/morph_rules.py b/spacy/lang/lt/morph_rules.py
deleted file mode 100644
index 3bf26d9d8..000000000
--- a/spacy/lang/lt/morph_rules.py
+++ /dev/null
@@ -1,3075 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import LEMMA, PRON_LEMMA
-
-
-_coordinating_conjunctions = [
- "ar",
- "arba",
- "bei",
- "beigi",
- "bet",
- "betgi",
- "ir",
- "kadangi",
- "kuo",
- "ne",
- "o",
- "tad",
- "tai",
- "tačiau",
- "tegul",
- "tik",
- "visgi",
-]
-
-_subordinating_conjunctions = [
- "jei",
- "jeigu",
- "jog",
- "kad",
- "kai",
- "kaip",
- "kol",
- "lyg",
- "nebent",
- "negu",
- "nei",
- "nes",
- "nors",
- "tarsi",
- "tuo",
- "užuot",
-]
-
-MORPH_RULES = {
- "Cg": dict(
- [(word, {"POS": "CCONJ"}) for word in _coordinating_conjunctions]
- + [(word, {"POS": "SCONJ"}) for word in _subordinating_conjunctions]
- ),
- "Pg--an": {
- "keletą": {LEMMA: PRON_LEMMA, "POS": "PRON", "Case": "Acc", "PronType": "Ind"},
- "save": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "PronType": "Prs",
- "Reflex": "Yes",
- },
- },
- "Pg--dn": {
- "sau": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Dat",
- "PronType": "Prs",
- "Reflex": "Yes",
- }
- },
- "Pg--gn": {
- "keleto": {LEMMA: PRON_LEMMA, "POS": "PRON", "Case": "Gen", "PronType": "Ind"},
- "savo": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "PronType": "Prs",
- "Reflex": "Yes",
- },
- "savęs": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "PronType": "Prs",
- "Reflex": "Yes",
- },
- },
- "Pg--in": {
- "savimi": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Ins",
- "PronType": "Prs",
- "Reflex": "Yes",
- }
- },
- "Pg--nn": {
- "keletas": {LEMMA: PRON_LEMMA, "POS": "PRON", "Case": "Nom", "PronType": "Ind"}
- },
- "Pg-dnn": {
- "mudu": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Number": "Dual",
- "Person": "1",
- "PronType": "Prs",
- }
- },
- "Pg-pa-": {
- "jus": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Number": "Plur",
- "Person": "2",
- "PronType": "Prs",
- }
- },
- "Pg-pan": {
- "jus": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Number": "Plur",
- "Person": "2",
- "PronType": "Prs",
- },
- "mus": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Number": "Plur",
- "Person": "1",
- "PronType": "Prs",
- },
- },
- "Pg-pdn": {
- "jums": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Dat",
- "Number": "Plur",
- "Person": "2",
- "PronType": "Prs",
- },
- "mums": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Dat",
- "Number": "Plur",
- "Person": "1",
- "PronType": "Prs",
- },
- },
- "Pg-pgn": {
- "jūsų": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "Number": "Plur",
- "Person": "2",
- "PronType": "Prs",
- },
- "mūsų": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "Number": "Plur",
- "Person": "1",
- "PronType": "Prs",
- },
- },
- "Pg-pin": {
- "jumis": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Ins",
- "Number": "Plur",
- "Person": "2",
- "PronType": "Prs",
- },
- "mumis": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Ins",
- "Number": "Plur",
- "Person": "1",
- "PronType": "Prs",
- },
- },
- "Pg-pln": {
- "jumyse": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Loc",
- "Number": "Plur",
- "Person": "2",
- "PronType": "Prs",
- }
- },
- "Pg-pnn": {
- "jūs": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Number": "Plur",
- "Person": "2",
- "PronType": "Prs",
- },
- "mes": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Number": "Plur",
- "Person": "1",
- "PronType": "Prs",
- },
- },
- "Pg-san": {
- "mane": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Number": "Sing",
- "Person": "1",
- "PronType": "Prs",
- },
- "tave": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Number": "Sing",
- "Person": "2",
- "PronType": "Prs",
- },
- },
- "Pg-sd-": {
- "tau": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Dat",
- "Number": "Sing",
- "Person": "2",
- "PronType": "Prs",
- }
- },
- "Pg-sdn": {
- "man": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Dat",
- "Number": "Sing",
- "Person": "1",
- "PronType": "Prs",
- },
- "sau": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Dat",
- "Number": "Sing",
- "PronType": "Prs",
- "Reflex": "Yes",
- },
- "tau": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Dat",
- "Number": "Sing",
- "Person": "2",
- "PronType": "Prs",
- },
- },
- "Pg-sgn": {
- "mano": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "Number": "Sing",
- "Person": "1",
- "PronType": "Prs",
- },
- "manęs": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "Number": "Sing",
- "Person": "1",
- "PronType": "Prs",
- },
- "tavo": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "Number": "Sing",
- "Person": "2",
- "PronType": "Prs",
- },
- "tavęs": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "Number": "Sing",
- "Person": "2",
- "PronType": "Prs",
- },
- },
- "Pg-sin": {
- "manimi": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Ins",
- "Number": "Sing",
- "Person": "1",
- "PronType": "Prs",
- },
- "tavim": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Ins",
- "Number": "Sing",
- "Person": "2",
- "PronType": "Prs",
- },
- "tavimi": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Ins",
- "Number": "Sing",
- "Person": "2",
- "PronType": "Prs",
- },
- },
- "Pg-sln": {
- "manyje": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Loc",
- "Number": "Sing",
- "Person": "1",
- "PronType": "Prs",
- },
- "tavyje": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Loc",
- "Number": "Sing",
- "Person": "2",
- "PronType": "Prs",
- },
- },
- "Pg-snn": {
- "aš": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Number": "Sing",
- "Person": "1",
- "PronType": "Prs",
- },
- "tu": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Number": "Sing",
- "Person": "2",
- "PronType": "Prs",
- },
- },
- "Pgf-an": {
- "kelias": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Gender": "Fem",
- "PronType": "Ind",
- }
- },
- "Pgf-dn": {
- "kelioms": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Dat",
- "Gender": "Fem",
- "PronType": "Ind",
- }
- },
- "Pgf-nn": {
- "kelios": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Fem",
- "PronType": "Ind",
- }
- },
- "Pgfdn-": {
- "abi": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Dual",
- "PronType": "Ind",
- }
- },
- "Pgfpan": {
- "jas": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Plur",
- "Person": "3",
- "PronType": "Prs",
- },
- "kelias": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Ind",
- },
- "kitas": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Ind",
- },
- "kokias": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Int",
- },
- "kurias": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Int",
- },
- "savas": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Ind",
- },
- "tas": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Dem",
- },
- "tokias": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Dem",
- },
- },
- "Pgfpdn": {
- "joms": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Dat",
- "Gender": "Fem",
- "Number": "Plur",
- "Person": "3",
- "PronType": "Prs",
- },
- "kitoms": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Dat",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Ind",
- },
- "kurioms": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Dat",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Int",
- },
- "tokioms": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Dat",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Dem",
- },
- },
- "Pgfpgn": {
- "jokių": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Neg",
- },
- "jų": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "Gender": "Fem",
- "Number": "Plur",
- "Person": "3",
- "PronType": "Prs",
- },
- "kelių": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Ind",
- },
- "kitų": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Ind",
- },
- "kurių": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Int",
- },
- "pačių": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Emp",
- },
- "tokių": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Dem",
- },
- "tų": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Dem",
- },
- },
- "Pgfpin": {
- "jomis": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Ins",
- "Gender": "Fem",
- "Number": "Plur",
- "Person": "3",
- "PronType": "Prs",
- },
- "kitokiomis": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Ins",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Ind",
- },
- "kitomis": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Ins",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Ind",
- },
- "kokiomis": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Ins",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Int",
- },
- "kuriomis": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Ins",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Int",
- },
- "pačiomis": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Ins",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Emp",
- },
- "tomis": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Ins",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Dem",
- },
- },
- "Pgfpln": {
- "jose": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Loc",
- "Gender": "Fem",
- "Number": "Plur",
- "Person": "3",
- "PronType": "Prs",
- },
- "kitose": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Loc",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Ind",
- },
- "kuriose": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Loc",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Int",
- },
- "tokiose": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Loc",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Dem",
- },
- "tose": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Loc",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Dem",
- },
- },
- "Pgfpnn": {
- "jos": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Plur",
- "Person": "3",
- "PronType": "Prs",
- },
- "kitokios": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Ind",
- },
- "kitos": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Ind",
- },
- "kokios": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Int",
- },
- "kurios": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Int",
- },
- "pačios": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Emp",
- },
- "tokios": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Dem",
- },
- "tos": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Plur",
- "PronType": "Dem",
- },
- },
- "Pgfsan": {
- "ją": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Sing",
- "Person": "3",
- "PronType": "Prs",
- },
- "kiekvieną": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Tot",
- },
- "kitokią": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Ind",
- },
- "kitą": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Ind",
- },
- "kokią": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Int",
- },
- "kurią": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Int",
- },
- "pačią": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Emp",
- },
- "tokią": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Dem",
- },
- "tą": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Dem",
- },
- },
- "Pgfsdn": {
- "jai": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Dat",
- "Gender": "Fem",
- "Number": "Sing",
- "Person": "3",
- "PronType": "Prs",
- },
- "kiekvienai": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Dat",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Tot",
- },
- "kitai": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Dat",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Ind",
- },
- "pačiai": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Dat",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Emp",
- },
- },
- "Pgfsgn": {
- "jokios": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Neg",
- },
- "jos": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "Gender": "Fem",
- "Number": "Sing",
- "Person": "3",
- "PronType": "Prs",
- },
- "kiekvienos": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Tot",
- },
- "kokios": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Int",
- },
- "kurios": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Int",
- },
- "pačios": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Emp",
- },
- "tokios": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Dem",
- },
- "tos": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Dem",
- },
- },
- "Pgfsin": {
- "ja": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Ins",
- "Gender": "Fem",
- "Number": "Sing",
- "Person": "3",
- "PronType": "Prs",
- },
- "kiekviena": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Ins",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Tot",
- },
- "kita": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Ins",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Ind",
- },
- "kuria": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Ins",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Int",
- },
- "ta": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Ins",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Dem",
- },
- "tokia": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Ins",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Dem",
- },
- },
- "Pgfsln": {
- "joje": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Loc",
- "Gender": "Fem",
- "Number": "Sing",
- "Person": "3",
- "PronType": "Prs",
- },
- "kiekvienoje": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Loc",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Tot",
- },
- "kitoje": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Loc",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Ind",
- },
- "kurioje": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Loc",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Int",
- },
- "toje": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Loc",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Dem",
- },
- "tokioje": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Loc",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Dem",
- },
- },
- "Pgfsnn": {
- "ji": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Sing",
- "Person": "3",
- "PronType": "Prs",
- },
- "kiekviena": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Tot",
- },
- "kita": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Ind",
- },
- "kokia": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Int",
- },
- "kuri": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Int",
- },
- "pati": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Emp",
- },
- "sava": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Ind",
- },
- "ta": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Dem",
- },
- "tokia": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Dem",
- },
- },
- "Pgfsny": {
- "jinai": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Sing",
- "Person": "3",
- "PronType": "Prs",
- },
- "toji": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Sing",
- "PronType": "Dem",
- },
- },
- "Pgfsny-": {
- "jinai": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Sing",
- "Person": "3",
- "PronType": "Prs",
- }
- },
- "Pgm-a-": {
- "kelis": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Gender": "Masc",
- "PronType": "Ind",
- }
- },
- "Pgm-an": {
- "kelis": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Gender": "Masc",
- "PronType": "Ind",
- }
- },
- "Pgm-dn": {
- "keliems": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Dat",
- "Gender": "Masc",
- "PronType": "Ind",
- }
- },
- "Pgm-gn": {
- "kelių": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "Gender": "Masc",
- "PronType": "Ind",
- }
- },
- "Pgm-nn": {
- "keli": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Masc",
- "PronType": "Ind",
- }
- },
- "Pgmdan": {
- "mudu": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Dual",
- "Person": "1",
- "PronType": "Prs",
- }
- },
- "Pgmdgn": {
- "mudviejų": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Dual",
- "Person": "1",
- "PronType": "Prs",
- }
- },
- "Pgmdnn": {
- "jiedu": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Dual",
- "Person": "3",
- "PronType": "Prs",
- },
- "mudu": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Dual",
- "Person": "1",
- "PronType": "Prs",
- },
- },
- "Pgmpan": {
- "juos": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Plur",
- "Person": "3",
- "PronType": "Prs",
- },
- "jus": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Plur",
- "Person": "2",
- "PronType": "Prs",
- },
- "kitus": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Ind",
- },
- "kokius": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Int",
- },
- "kuriuos": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Int",
- },
- "pačius": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Emp",
- },
- "tokius": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Dem",
- },
- "tuos": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Dem",
- },
- },
- "Pgmpan-": {
- "juos": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Plur",
- "Person": "3",
- "PronType": "Prs",
- }
- },
- "Pgmpdn": {
- "jiems": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Dat",
- "Gender": "Masc",
- "Number": "Plur",
- "Person": "3",
- "PronType": "Prs",
- },
- "kitiems": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Dat",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Ind",
- },
- "kuriems": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Dat",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Int",
- },
- "patiems": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Dat",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Emp",
- },
- "tiems": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Dat",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Dem",
- },
- },
- "Pgmpgn": {
- "jokių": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Neg",
- },
- "jų": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Plur",
- "Person": "3",
- "PronType": "Prs",
- },
- "kitų": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Ind",
- },
- "kokių": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Int",
- },
- "kurių": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Int",
- },
- "pačių": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Emp",
- },
- "tokių": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Dem",
- },
- "tų": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Dem",
- },
- },
- "Pgmpin": {
- "jais": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Ins",
- "Gender": "Masc",
- "Number": "Plur",
- "Person": "3",
- "PronType": "Prs",
- },
- "jokiais": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Ins",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Neg",
- },
- "kitais": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Ins",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Ind",
- },
- "kokiais": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Ins",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Int",
- },
- "savais": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Ins",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Ind",
- },
- "tais": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Ins",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Dem",
- },
- "tokiais": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Ins",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Dem",
- },
- },
- "Pgmpln": {
- "juose": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Loc",
- "Gender": "Masc",
- "Number": "Plur",
- "Person": "3",
- "PronType": "Prs",
- },
- "kituose": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Loc",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Ind",
- },
- },
- "Pgmpnn": {
- "jie": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Plur",
- "Person": "3",
- "PronType": "Prs",
- },
- "jūs": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Plur",
- "Person": "2",
- "PronType": "Prs",
- },
- "kiti": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Ind",
- },
- "kokie": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Int",
- },
- "kurie": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Int",
- },
- "patys": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Emp",
- },
- "tie": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Dem",
- },
- "tokie": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Plur",
- "PronType": "Dem",
- },
- },
- "Pgmsan": {
- "jį": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Sing",
- "Person": "3",
- "PronType": "Prs",
- },
- "kiekvieną": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Tot",
- },
- "kitokį": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Ind",
- },
- "kitą": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Ind",
- },
- "kokį": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Int",
- },
- "kurį": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Int",
- },
- "tokį": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Dem",
- },
- "tą": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Dem",
- },
- },
- "Pgmsdn": {
- "jam": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Dat",
- "Gender": "Masc",
- "Number": "Sing",
- "Person": "3",
- "PronType": "Prs",
- },
- "kiekvienam": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Dat",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Tot",
- },
- "kitam": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Dat",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Ind",
- },
- "kokiam": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Dat",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Int",
- },
- "kuriam": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Dat",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Int",
- },
- "pačiam": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Dat",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Emp",
- },
- "tam": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Dat",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Dem",
- },
- },
- "Pgmsgn": {
- "jo": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Sing",
- "Person": "3",
- "PronType": "Prs",
- },
- "jokio": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Neg",
- },
- "kiekvieno": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Tot",
- },
- "kito": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Ind",
- },
- "kokio": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Int",
- },
- "kurio": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Int",
- },
- "paties": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Emp",
- },
- "savo": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Ind",
- },
- "to": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Dem",
- },
- "tokio": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Dem",
- },
- },
- "Pgmsin": {
- "juo": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Ins",
- "Gender": "Masc",
- "Number": "Sing",
- "Person": "3",
- "PronType": "Prs",
- },
- "kitu": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Ins",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Ind",
- },
- "kokiu": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Ins",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Int",
- },
- "kuriuo": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Ins",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Int",
- },
- "pačiu": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Ins",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Emp",
- },
- "tokiu": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Ins",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Dem",
- },
- "tuo": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Ins",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Dem",
- },
- },
- "Pgmsln": {
- "jame": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Loc",
- "Gender": "Masc",
- "Number": "Sing",
- "Person": "3",
- "PronType": "Prs",
- },
- "kiekvienam": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Loc",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Tot",
- },
- "kokiame": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Loc",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Int",
- },
- "kuriame": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Loc",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Int",
- },
- "tame": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Loc",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Dem",
- },
- },
- "Pgmsnn": {
- "jis": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Sing",
- "Person": "3",
- "PronType": "Prs",
- },
- "joks": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Neg",
- },
- "kiekvienas": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Tot",
- },
- "kitas": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Ind",
- },
- "kitoks": {
- LEMMA: PRON_LEMMA,
- "POS": "PRON",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Ind",
- },
- "koks": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Int",
- },
- "kuris": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Int",
- },
- "pats": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Emp",
- },
- "tas": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Dem",
- },
- "toks": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Dem",
- },
- },
- "Pgmsny": {
- "patsai": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Emp",
- },
- "tasai": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Dem",
- },
- "toksai": {
- LEMMA: PRON_LEMMA,
- "POS": "DET",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Sing",
- "PronType": "Dem",
- },
- },
- "Pgn--n": {
- "tai": {LEMMA: PRON_LEMMA, "POS": "DET", "Gender": "Neut", "PronType": "Dem"}
- },
- "Pgnn--n": {
- "tai": {LEMMA: PRON_LEMMA, "POS": "DET", "Gender": "Neut", "PronType": "Dem"}
- },
- "Pgsmdn": {
- "tam": {LEMMA: PRON_LEMMA, "POS": "DET", "Case": "Dat", "PronType": "Dem"}
- },
- "Qg": {"tai": {LEMMA: "tas", "POS": "PART"}},
- "Vgap----n--n--": {
- "esant": {
- LEMMA: "būti",
- "POS": "VERB",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Ger",
- },
- "turint": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Ger",
- },
- },
- "Vgh--pm-n--n--": {
- "būdami": {
- LEMMA: "būti",
- "POS": "VERB",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "POS",
- "VerbForm": "Conv",
- }
- },
- "Vgh--sm-n--n--": {
- "būdamas": {
- LEMMA: "būti",
- "POS": "VERB",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "POS",
- "VerbForm": "Conv",
- }
- },
- "Vgi-----n--n--": {
- "būti": {LEMMA: "būti", "POS": "VERB", "Polarity": "POS", "VerbForm": "Inf"},
- "daryti": {
- LEMMA: "daryti",
- "POS": "VERB",
- "Polarity": "POS",
- "VerbForm": "Inf",
- },
- "turėti": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Polarity": "POS",
- "VerbForm": "Inf",
- },
- },
- "Vgm-1p--n--ns-": {
- "turėtume": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Cnd",
- "Number": "Plur",
- "Person": "1",
- "Polarity": "POS",
- "VerbForm": "Fin",
- }
- },
- "Vgm-2p--n--nm-": {
- "būkite": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Imp",
- "Number": "Plur",
- "Person": "2",
- "Polarity": "POS",
- "VerbForm": "Fin",
- },
- "darykit": {
- LEMMA: "daryti",
- "POS": "VERB",
- "Mood": "Imp",
- "Number": "Plur",
- "Person": "2",
- "Polarity": "POS",
- "VerbForm": "Fin",
- },
- "darykite": {
- LEMMA: "daryti",
- "POS": "VERB",
- "Mood": "Imp",
- "Number": "Plur",
- "Person": "2",
- "Polarity": "POS",
- "VerbForm": "Fin",
- },
- "turėkite": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Imp",
- "Number": "Plur",
- "Person": "2",
- "Polarity": "POS",
- "VerbForm": "Fin",
- },
- },
- "Vgm-2p--n--ns-": {
- "turėtumėte": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Cnd",
- "Number": "Plur",
- "Person": "2",
- "Polarity": "POS",
- "VerbForm": "Fin",
- }
- },
- "Vgm-2s--n--ns-": {
- "turėtum": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "2",
- "Polarity": "POS",
- "VerbForm": "Fin",
- }
- },
- "Vgm-3---n--ns-": {
- "būtų": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Cnd",
- "Person": "3",
- "Polarity": "POS",
- "VerbForm": "Fin",
- },
- "turėtų": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Cnd",
- "Person": "3",
- "Polarity": "POS",
- "VerbForm": "Fin",
- },
- },
- "Vgm-3p--n--ns-": {
- "būtų": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Cnd",
- "Number": "Plur",
- "Person": "3",
- "Polarity": "POS",
- "VerbForm": "Fin",
- },
- "turėtų": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Cnd",
- "Number": "Plur",
- "Person": "3",
- "Polarity": "POS",
- "VerbForm": "Fin",
- },
- },
- "Vgm-3s--n--ns-": {
- "būtų": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "3",
- "Polarity": "POS",
- "VerbForm": "Fin",
- },
- "turėtų": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "3",
- "Polarity": "POS",
- "VerbForm": "Fin",
- },
- },
- "Vgma1p--n--ni-": {
- "turėjom": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "1",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Fin",
- }
- },
- "Vgma1s--n--ni-": {
- "turėjau": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "1",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Fin",
- }
- },
- "Vgma3---n--ni-": {
- "buvo": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Ind",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "turėjo": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Ind",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- },
- "Vgma3p--n--ni-": {
- "buvo": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "darė": {
- LEMMA: "daryti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "turėjo": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- },
- "Vgma3s--n--ni-": {
- "buvo": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "darė": {
- LEMMA: "daryti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "turėjo": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- },
- "Vgmf1s--n--ni-": {
- "turėsiu": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "1",
- "Polarity": "POS",
- "Tense": "Fut",
- "VerbForm": "Fin",
- }
- },
- "Vgmf2p--n--ni-": {
- "būsite": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "2",
- "Polarity": "POS",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "darysite": {
- LEMMA: "daryti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "2",
- "Polarity": "POS",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "turėsite": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "2",
- "Polarity": "POS",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- },
- "Vgmf3---n--ni-": {
- "bus": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Ind",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Fut",
- "VerbForm": "Fin",
- }
- },
- "Vgmf3p--n--ni-": {
- "bus": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "darys": {
- LEMMA: "daryti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "turės": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- },
- "Vgmf3s--n--ni-": {
- "bus": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "turės": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- },
- "Vgmp1p--n--ni-": {
- "darome": {
- LEMMA: "daryti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "1",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "esame": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "1",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "turime": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "1",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- },
- "Vgmp1s--n--ni-": {
- "būnu": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "1",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "esu": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "1",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "turiu": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "1",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- },
- "Vgmp2p--n--ni-": {
- "esate": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "2",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "turite": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "2",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- },
- "Vgmp2s--n--ni-": {
- "esi": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "2",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "turi": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "2",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- },
- "Vgmp3---n--ni-": {
- "būna": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Ind",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "turi": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Ind",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "yra": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Ind",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- },
- "Vgmp3p--n--ni-": {
- "būna": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "daro": {
- LEMMA: "daryti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "turi": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "yra": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- },
- "Vgmp3s--n--ni-": {
- "būna": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "daro": {
- LEMMA: "daryti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "turi": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "yra": {
- LEMMA: "būti",
- "POS": "VERB",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- },
- "Vgmq2s--n--ni-": {
- "turėdavai": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Aspect": "Hab",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "2",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Fin",
- }
- },
- "Vgmq3---n--ni-": {
- "būdavo": {
- LEMMA: "būti",
- "POS": "VERB",
- "Aspect": "Hab",
- "Mood": "Ind",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Fin",
- }
- },
- "Vgmq3s--n--ni-": {
- "turėdavo": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Aspect": "Hab",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "3",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Fin",
- }
- },
- "Vgp--pfnnnnn-p": {
- "darytinos": {
- LEMMA: "daryti",
- "POS": "VERB",
- "Case": "Nom",
- "Degree": "POS",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "POS",
- "VerbForm": "Part",
- }
- },
- "Vgpa--nann-n-p": {
- "buvę": {
- LEMMA: "būti",
- "POS": "VERB",
- "Degree": "POS",
- "Gender": "Neut",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- }
- },
- "Vgpa-pmanngn-p": {
- "buvusių": {
- LEMMA: "būti",
- "POS": "VERB",
- "Case": "Gen",
- "Degree": "POS",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- }
- },
- "Vgpa-smanngn-p": {
- "buvusio": {
- LEMMA: "būti",
- "POS": "VERB",
- "Case": "Gen",
- "Degree": "POS",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- }
- },
- "Vgpa-smannnn-p": {
- "buvęs": {
- LEMMA: "būti",
- "POS": "VERB",
- "Case": "Nom",
- "Degree": "POS",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "turėjęs": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Case": "Nom",
- "Degree": "POS",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- },
- "Vgpa-smanyin-p": {
- "buvusiuoju": {
- LEMMA: "būti",
- "POS": "VERB",
- "Case": "Ins",
- "Degree": "POS",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- }
- },
- "Vgpf-smpnnan-p": {
- "būsimą": {
- LEMMA: "būti",
- "POS": "VERB",
- "Case": "Acc",
- "Degree": "POS",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "POS",
- "Tense": "Fut",
- "VerbForm": "Part",
- "Voice": "Pass",
- }
- },
- "Vgpf-smpnndn-p": {
- "būsimam": {
- LEMMA: "būti",
- "POS": "VERB",
- "Case": "Dat",
- "Degree": "POS",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "POS",
- "Tense": "Fut",
- "VerbForm": "Part",
- "Voice": "Pass",
- }
- },
- "Vgpp--npnn-n-p": {
- "esama": {
- LEMMA: "būti",
- "POS": "VERB",
- "Degree": "POS",
- "Gender": "Neut",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- }
- },
- "Vgpp-pfannan-p": {
- "esančias": {
- LEMMA: "būti",
- "POS": "VERB",
- "Case": "Acc",
- "Degree": "POS",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- }
- },
- "Vgpp-pfanndn-p": {
- "turinčioms": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Case": "Dat",
- "Degree": "POS",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- }
- },
- "Vgpp-pfannin-p": {
- "esančiomis": {
- LEMMA: "būti",
- "POS": "VERB",
- "Case": "Ins",
- "Degree": "POS",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- }
- },
- "Vgpp-pfpnnan-p": {
- "daromas": {
- LEMMA: "daryti",
- "POS": "VERB",
- "Case": "Acc",
- "Degree": "POS",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "turimas": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Case": "Acc",
- "Degree": "POS",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- },
- "Vgpp-pfpnnin-p": {
- "turimomis": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Case": "Ins",
- "Degree": "POS",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- }
- },
- "Vgpp-pmannan-p": {
- "turinčius": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Case": "Acc",
- "Degree": "POS",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- }
- },
- "Vgpp-pmanngn-p": {
- "esančių": {
- LEMMA: "būti",
- "POS": "VERB",
- "Case": "Gen",
- "Degree": "POS",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- }
- },
- "Vgpp-pmannin-p": {
- "esančiais": {
- LEMMA: "būti",
- "POS": "VERB",
- "Case": "Ins",
- "Degree": "POS",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- }
- },
- "Vgpp-pmannnn-p": {
- "esantys": {
- LEMMA: "būti",
- "POS": "VERB",
- "Case": "Nom",
- "Degree": "POS",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- }
- },
- "Vgpp-pmpnnan-p": {
- "turimus": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Case": "Acc",
- "Degree": "POS",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- }
- },
- "Vgpp-pmpnngn-p": {
- "esamų": {
- LEMMA: "būti",
- "POS": "VERB",
- "Case": "Gen",
- "Degree": "POS",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- }
- },
- "Vgpp-sfanngn-p": {
- "turinčios": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Case": "Gen",
- "Degree": "POS",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- }
- },
- "Vgpp-sfannln-p": {
- "esančioje": {
- LEMMA: "būti",
- "POS": "VERB",
- "Case": "Loc",
- "Degree": "POS",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- }
- },
- "Vgpp-sfannnn-p": {
- "esanti": {
- LEMMA: "būti",
- "POS": "VERB",
- "Case": "Nom",
- "Degree": "POS",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- }
- },
- "Vgpp-sfpnnnn-p": {
- "daroma": {
- LEMMA: "daryti",
- "POS": "VERB",
- "Case": "Nom",
- "Degree": "POS",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- }
- },
- "Vgpp-smanngn-p": {
- "esančio": {
- LEMMA: "būti",
- "POS": "VERB",
- "Case": "Gen",
- "Degree": "POS",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- }
- },
- "Vgpp-smannnn-p": {
- "esantis": {
- LEMMA: "būti",
- "POS": "VERB",
- "Case": "Nom",
- "Degree": "POS",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "esąs": {
- LEMMA: "būti",
- "POS": "VERB",
- "Case": "Nom",
- "Degree": "POS",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "turintis": {
- LEMMA: "turėti",
- "POS": "VERB",
- "Case": "Nom",
- "Degree": "POS",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "POS",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- },
- "Vgps--npnn-n-p": {
- "daryta": {
- LEMMA: "daryti",
- "POS": "VERB",
- "Aspect": "Perf",
- "Degree": "POS",
- "Gender": "Neut",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- }
- },
- "Vgps-pmpnnnn-p": {
- "daryti": {
- LEMMA: "daryti",
- "POS": "VERB",
- "Aspect": "Perf",
- "Case": "Nom",
- "Degree": "POS",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "POS",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- }
- },
-}
-
-
-for tag, rules in MORPH_RULES.items():
- for key, attrs in dict(rules).items():
- rules[key.title()] = attrs
diff --git a/spacy/lang/lt/punctuation.py b/spacy/lang/lt/punctuation.py
index 5eedc8116..506aa8f32 100644
--- a/spacy/lang/lt/punctuation.py
+++ b/spacy/lang/lt/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..char_classes import LIST_ICONS, LIST_ELLIPSES
from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA
from ..char_classes import HYPHENS
diff --git a/spacy/lang/lt/stop_words.py b/spacy/lang/lt/stop_words.py
index fed05d80d..8c11b3f7b 100644
--- a/spacy/lang/lt/stop_words.py
+++ b/spacy/lang/lt/stop_words.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
STOP_WORDS = {
"a",
"abejais",
diff --git a/spacy/lang/lt/tag_map.py b/spacy/lang/lt/tag_map.py
deleted file mode 100644
index 6ea4f8ae0..000000000
--- a/spacy/lang/lt/tag_map.py
+++ /dev/null
@@ -1,4798 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, ADJ, ADP, ADV, CONJ, INTJ, NOUN, NUM, PART
-from ...symbols import PRON, PROPN, PUNCT, SYM, VERB, X
-
-
-TAG_MAP = {
- "Agcfpan": {
- POS: ADJ,
- "Case": "Acc",
- "Degree": "Cmp",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agcfpgn": {
- POS: ADJ,
- "Case": "Gen",
- "Degree": "Cmp",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agcfpin": {
- POS: ADJ,
- "Case": "Ins",
- "Degree": "Cmp",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agcfpln": {
- POS: ADJ,
- "Case": "Loc",
- "Degree": "Cmp",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agcfpnn": {
- POS: ADJ,
- "Case": "Nom",
- "Degree": "Cmp",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agcfsan": {
- POS: ADJ,
- "Case": "Acc",
- "Degree": "Cmp",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agcfsnn": {
- POS: ADJ,
- "Case": "Nom",
- "Degree": "Cmp",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agcfsny": {
- POS: ADJ,
- "Case": "Nom",
- "Degree": "Cmp",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agcmpan": {
- POS: ADJ,
- "Case": "Acc",
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agcmpgn": {
- POS: ADJ,
- "Case": "Gen",
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agcmpin": {
- POS: ADJ,
- "Case": "Ins",
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agcmpnn": {
- POS: ADJ,
- "Case": "Nom",
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agcmsa-": {
- POS: ADJ,
- "Case": "Acc",
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agcmsan": {
- POS: ADJ,
- "Case": "Acc",
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agcmsay": {
- POS: ADJ,
- "Case": "Acc",
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agcmsgn": {
- POS: ADJ,
- "Case": "Gen",
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agcmsnn": {
- POS: ADJ,
- "Case": "Nom",
- "Degree": "Cmp",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agpfpan": {
- POS: ADJ,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agpfpay": {
- POS: ADJ,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agpfpdn": {
- POS: ADJ,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agpfpdy": {
- POS: ADJ,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agpfpgn": {
- POS: ADJ,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agpfpgy": {
- POS: ADJ,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agpfpin": {
- POS: ADJ,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agpfpiy": {
- POS: ADJ,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agpfpln": {
- POS: ADJ,
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agpfpnn": {
- POS: ADJ,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agpfpny": {
- POS: ADJ,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agpfsan": {
- POS: ADJ,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agpfsay": {
- POS: ADJ,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agpfsdn": {
- POS: ADJ,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agpfsdy": {
- POS: ADJ,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agpfsgn": {
- POS: ADJ,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agpfsgy": {
- POS: ADJ,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agpfsin": {
- POS: ADJ,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agpfsiy": {
- POS: ADJ,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agpfsln": {
- POS: ADJ,
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agpfsly": {
- POS: ADJ,
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agpfsnn": {
- POS: ADJ,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agpfsny": {
- POS: ADJ,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agpmpan": {
- POS: ADJ,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agpmpay": {
- POS: ADJ,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agpmpdn": {
- POS: ADJ,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agpmpdy": {
- POS: ADJ,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agpmpgn": {
- POS: ADJ,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agpmpgy": {
- POS: ADJ,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agpmpin": {
- POS: ADJ,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agpmpiy": {
- POS: ADJ,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agpmpln": {
- POS: ADJ,
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agpmply": {
- POS: ADJ,
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agpmpnn": {
- POS: ADJ,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agpmpny": {
- POS: ADJ,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agpmsan": {
- POS: ADJ,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agpmsay": {
- POS: ADJ,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agpmsdn": {
- POS: ADJ,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agpmsdy": {
- POS: ADJ,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agpmsgn": {
- POS: ADJ,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agpmsgy": {
- POS: ADJ,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agpmsin": {
- POS: ADJ,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agpmsiy": {
- POS: ADJ,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agpmsln": {
- POS: ADJ,
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agpmsly": {
- POS: ADJ,
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agpmsnn": {
- POS: ADJ,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agpmsny": {
- POS: ADJ,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agpmsvn": {
- POS: ADJ,
- "Case": "Voc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agpn--n": {POS: ADJ, "Degree": "Pos", "Gender": "Neut"},
- "Agpn-nn": {POS: ADJ, "Case": "Nom", "Degree": "Pos", "Gender": "Neut"},
- "Agsfpan": {
- POS: ADJ,
- "Case": "Acc",
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agsfpdn": {
- POS: ADJ,
- "Case": "Dat",
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agsfpgn": {
- POS: ADJ,
- "Case": "Gen",
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agsfpin": {
- POS: ADJ,
- "Case": "Ins",
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agsfpnn": {
- POS: ADJ,
- "Case": "Nom",
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Plur",
- },
- "Agsfsgn": {
- POS: ADJ,
- "Case": "Gen",
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agsfsgy": {
- POS: ADJ,
- "Case": "Gen",
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agsfsin": {
- POS: ADJ,
- "Case": "Ins",
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agsfsln": {
- POS: ADJ,
- "Case": "Loc",
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agsfsnn": {
- POS: ADJ,
- "Case": "Nom",
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agsfsny": {
- POS: ADJ,
- "Case": "Nom",
- "Degree": "Sup",
- "Gender": "Fem",
- "Number": "Sing",
- },
- "Agsmpan": {
- POS: ADJ,
- "Case": "Acc",
- "Degree": "Sup",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agsmpgn": {
- POS: ADJ,
- "Case": "Gen",
- "Degree": "Sup",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agsmpin": {
- POS: ADJ,
- "Case": "Ins",
- "Degree": "Sup",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agsmpnn": {
- POS: ADJ,
- "Case": "Nom",
- "Degree": "Sup",
- "Gender": "Masc",
- "Number": "Plur",
- },
- "Agsmsan": {
- POS: ADJ,
- "Case": "Acc",
- "Degree": "Sup",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agsmsgn": {
- POS: ADJ,
- "Case": "Gen",
- "Degree": "Sup",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agsmsin": {
- POS: ADJ,
- "Case": "Ins",
- "Degree": "Sup",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agsmsnn": {
- POS: ADJ,
- "Case": "Nom",
- "Degree": "Sup",
- "Gender": "Masc",
- "Number": "Sing",
- },
- "Agsn--n": {POS: ADJ, "Degree": "Sup", "Gender": "Neut"},
- "Cg": {POS: CONJ},
- "Ig": {POS: INTJ},
- "M----d-": {POS: NUM, "NumForm": "Digit"},
- "M----r-": {POS: NUM, "NumForm": "Roman"},
- "M----rn": {POS: NUM, "NumForm": "Roman"},
- "Mc---l-": {POS: NUM, "NumForm": "Word", "NumType": "Card"},
- "Mc--gl-": {POS: NUM, "Case": "Gen", "NumForm": "Word", "NumType": "Card"},
- "Mcf-al-": {
- POS: NUM,
- "Case": "Acc",
- "Gender": "Fem",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcf-aln": {
- POS: NUM,
- "Case": "Acc",
- "Gender": "Fem",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcf-dl-": {
- POS: NUM,
- "Case": "Dat",
- "Gender": "Fem",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcf-gl-": {
- POS: NUM,
- "Case": "Gen",
- "Gender": "Fem",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcf-gln": {
- POS: NUM,
- "Case": "Gen",
- "Gender": "Fem",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcf-il-": {
- POS: NUM,
- "Case": "Ins",
- "Gender": "Fem",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcf-iln": {
- POS: NUM,
- "Case": "Ins",
- "Gender": "Fem",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcf-nl-": {
- POS: NUM,
- "Case": "Nom",
- "Gender": "Fem",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcf-nln": {
- POS: NUM,
- "Case": "Nom",
- "Gender": "Fem",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcfpnl-": {
- POS: NUM,
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Plur",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcfsal-": {
- POS: NUM,
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcfsdl-": {
- POS: NUM,
- "Case": "Dat",
- "Gender": "Fem",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcfsgl-": {
- POS: NUM,
- "Case": "Gen",
- "Gender": "Fem",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcfsgln": {
- POS: NUM,
- "Case": "Gen",
- "Gender": "Fem",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcfsil-": {
- POS: NUM,
- "Case": "Ins",
- "Gender": "Fem",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcm-al-": {
- POS: NUM,
- "Case": "Acc",
- "Gender": "Masc",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcm-aln": {
- POS: NUM,
- "Case": "Acc",
- "Gender": "Masc",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcm-dl-": {
- POS: NUM,
- "Case": "Dat",
- "Gender": "Masc",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcm-dln": {
- POS: NUM,
- "Case": "Dat",
- "Gender": "Masc",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcm-gl-": {
- POS: NUM,
- "Case": "Gen",
- "Gender": "Masc",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcm-gln": {
- POS: NUM,
- "Case": "Gen",
- "Gender": "Masc",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcm-il-": {
- POS: NUM,
- "Case": "Ins",
- "Gender": "Masc",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcm-nl-": {
- POS: NUM,
- "Case": "Nom",
- "Gender": "Masc",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcm-nln": {
- POS: NUM,
- "Case": "Nom",
- "Gender": "Masc",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcmpal-": {
- POS: NUM,
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Plur",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcmpaln": {
- POS: NUM,
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Plur",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcmpgl-": {
- POS: NUM,
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Plur",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcmpgln": {
- POS: NUM,
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Plur",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcmpnl-": {
- POS: NUM,
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Plur",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcmpnln": {
- POS: NUM,
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Plur",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcmsal-": {
- POS: NUM,
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcmsaln": {
- POS: NUM,
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcmsgl-": {
- POS: NUM,
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcmsgln": {
- POS: NUM,
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcmsnln": {
- POS: NUM,
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Mcnsnln": {
- POS: NUM,
- "Case": "Nom",
- "Gender": "Neut",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Card",
- },
- "Ml--aln": {POS: NUM, "Case": "Acc", "NumForm": "Word", "NumType": "Card"},
- "Mmm-aln": {
- POS: ADV,
- "Case": "Acc",
- "Gender": "Masc",
- "NumForm": "Word",
- "NumType": "Mult",
- },
- "Mmm-dln": {
- POS: ADV,
- "Case": "Dat",
- "Gender": "Masc",
- "NumForm": "Word",
- "NumType": "Mult",
- },
- "Mmm-gl-": {
- POS: ADV,
- "Case": "Gen",
- "Gender": "Masc",
- "NumForm": "Word",
- "NumType": "Mult",
- },
- "Mmm-nln": {
- POS: ADV,
- "Case": "Nom",
- "Gender": "Masc",
- "NumForm": "Word",
- "NumType": "Mult",
- },
- "Mofpily": {
- POS: ADJ,
- "Case": "Ins",
- "Gender": "Fem",
- "Number": "Plur",
- "NumForm": "Word",
- "NumType": "Ord",
- },
- "Mofsaly": {
- POS: ADJ,
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Ord",
- },
- "Mofsamn": {
- POS: ADJ,
- "Case": "Acc",
- "Gender": "Fem",
- "Number": "Sing",
- "NumForm": "Combi",
- "NumType": "Ord",
- },
- "Mofsily": {
- POS: ADJ,
- "Case": "Ins",
- "Gender": "Fem",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Ord",
- },
- "Mofsnly": {
- POS: ADJ,
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Ord",
- },
- "Mofsnmy": {
- POS: ADJ,
- "Case": "Nom",
- "Gender": "Fem",
- "Number": "Sing",
- "NumForm": "Combi",
- "NumType": "Ord",
- },
- "Mompgln": {
- POS: ADJ,
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Plur",
- "NumForm": "Word",
- "NumType": "Ord",
- },
- "Mompily": {
- POS: ADJ,
- "Case": "Ins",
- "Gender": "Masc",
- "Number": "Plur",
- "NumForm": "Word",
- "NumType": "Ord",
- },
- "Mompnln": {
- POS: ADJ,
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Plur",
- "NumForm": "Word",
- "NumType": "Ord",
- },
- "Mompnly": {
- POS: ADJ,
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Plur",
- "NumForm": "Word",
- "NumType": "Ord",
- },
- "Momsaln": {
- POS: ADJ,
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Ord",
- },
- "Momsaly": {
- POS: ADJ,
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Ord",
- },
- "Momsgln": {
- POS: ADJ,
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Ord",
- },
- "Momsgly": {
- POS: ADJ,
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Ord",
- },
- "Momslly": {
- POS: ADJ,
- "Case": "Loc",
- "Gender": "Masc",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Ord",
- },
- "Momsnln": {
- POS: ADJ,
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Ord",
- },
- "Momsnly": {
- POS: ADJ,
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Sing",
- "NumForm": "Word",
- "NumType": "Ord",
- },
- "Mon--ln": {POS: ADJ, "Gender": "Neut", "NumForm": "Word", "NumType": "Ord"},
- "Nccpnn-": {POS: NOUN, "Case": "Nom", "Number": "Plur"},
- "Nccsdn-": {POS: NOUN, "Case": "Dat", "Number": "Sing"},
- "Nccsgn-": {POS: NOUN, "Case": "Gen", "Number": "Sing"},
- "Nccsnn-": {POS: NOUN, "Case": "Nom", "Number": "Sing"},
- "Ncf--n-": {POS: NOUN, "Gender": "Fem"},
- "Ncfpan-": {POS: NOUN, "Case": "Acc", "Gender": "Fem", "Number": "Plur"},
- "Ncfpdn-": {POS: NOUN, "Case": "Dat", "Gender": "Fem", "Number": "Plur"},
- "Ncfpgn-": {POS: NOUN, "Case": "Gen", "Gender": "Fem", "Number": "Plur"},
- "Ncfpin-": {POS: NOUN, "Case": "Ins", "Gender": "Fem", "Number": "Plur"},
- "Ncfpln-": {POS: NOUN, "Case": "Loc", "Gender": "Fem", "Number": "Plur"},
- "Ncfpnn-": {POS: NOUN, "Case": "Nom", "Gender": "Fem", "Number": "Plur"},
- "Ncfsan-": {POS: NOUN, "Case": "Acc", "Gender": "Fem", "Number": "Sing"},
- "Ncfsdn-": {POS: NOUN, "Case": "Dat", "Gender": "Fem", "Number": "Sing"},
- "Ncfsgn-": {POS: NOUN, "Case": "Gen", "Gender": "Fem", "Number": "Sing"},
- "Ncfsin-": {POS: NOUN, "Case": "Ins", "Gender": "Fem", "Number": "Sing"},
- "Ncfsln-": {POS: NOUN, "Case": "Loc", "Gender": "Fem", "Number": "Sing"},
- "Ncfsnn-": {POS: NOUN, "Case": "Nom", "Gender": "Fem", "Number": "Sing"},
- "Ncfsvn-": {POS: NOUN, "Case": "Voc", "Gender": "Fem", "Number": "Sing"},
- "Ncfsxn-": {POS: NOUN, "Gender": "Fem", "Number": "Sing"},
- "Ncm--a-": {POS: NOUN, "Gender": "Masc"},
- "Ncm--n-": {POS: NOUN, "Gender": "Masc"},
- "Ncmpan-": {POS: NOUN, "Case": "Acc", "Gender": "Masc", "Number": "Plur"},
- "Ncmpdn-": {POS: NOUN, "Case": "Dat", "Gender": "Masc", "Number": "Plur"},
- "Ncmpgn-": {POS: NOUN, "Case": "Gen", "Gender": "Masc", "Number": "Plur"},
- "Ncmpin-": {POS: NOUN, "Case": "Ins", "Gender": "Masc", "Number": "Plur"},
- "Ncmpln-": {POS: NOUN, "Case": "Loc", "Gender": "Masc", "Number": "Plur"},
- "Ncmpnn-": {POS: NOUN, "Case": "Nom", "Gender": "Masc", "Number": "Plur"},
- "Ncmpny-": {
- POS: NOUN,
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Plur",
- "Reflex": "Yes",
- },
- "Ncmsan-": {POS: NOUN, "Case": "Acc", "Gender": "Masc", "Number": "Sing"},
- "Ncmsay-": {
- POS: NOUN,
- "Case": "Acc",
- "Gender": "Masc",
- "Number": "Sing",
- "Reflex": "Yes",
- },
- "Ncmsdn-": {POS: NOUN, "Case": "Dat", "Gender": "Masc", "Number": "Sing"},
- "Ncmsdy-": {
- POS: NOUN,
- "Case": "Dat",
- "Gender": "Masc",
- "Number": "Sing",
- "Reflex": "Yes",
- },
- "Ncmsgn-": {POS: NOUN, "Case": "Gen", "Gender": "Masc", "Number": "Sing"},
- "Ncmsgy-": {
- POS: NOUN,
- "Case": "Gen",
- "Gender": "Masc",
- "Number": "Sing",
- "Reflex": "Yes",
- },
- "Ncmsin-": {POS: NOUN, "Case": "Ins", "Gender": "Masc", "Number": "Sing"},
- "Ncmsiy-": {
- POS: NOUN,
- "Case": "Ins",
- "Gender": "Masc",
- "Number": "Sing",
- "Reflex": "Yes",
- },
- "Ncmsln-": {POS: NOUN, "Case": "Loc", "Gender": "Masc", "Number": "Sing"},
- "Ncmsnn-": {POS: NOUN, "Case": "Nom", "Gender": "Masc", "Number": "Sing"},
- "Ncmsny-": {
- POS: NOUN,
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Sing",
- "Reflex": "Yes",
- },
- "Ncmsvn-": {POS: NOUN, "Case": "Voc", "Gender": "Masc", "Number": "Sing"},
- "Ncmsxn-": {POS: NOUN, "Gender": "Masc", "Number": "Sing"},
- "Np---n-": {POS: PROPN},
- "Npc--n-": {POS: PROPN},
- "Npfpgn-": {POS: PROPN, "Case": "Gen", "Gender": "Fem", "Number": "Plur"},
- "Npfpgng": {
- POS: PROPN,
- "Case": "Gen",
- "Gender": "Fem",
- "NameType": "Geo",
- "Number": "Plur",
- },
- "Npfpln-": {POS: PROPN, "Case": "Loc", "Gender": "Fem", "Number": "Plur"},
- "Npfsan-": {POS: PROPN, "Case": "Acc", "Gender": "Fem", "Number": "Sing"},
- "Npfsanf": {
- POS: PROPN,
- "Case": "Acc",
- "Gender": "Fem",
- "NameType": "Giv",
- "Number": "Sing",
- },
- "Npfsang": {
- POS: PROPN,
- "Case": "Acc",
- "Gender": "Fem",
- "NameType": "Geo",
- "Number": "Sing",
- },
- "Npfsdn-": {POS: PROPN, "Case": "Dat", "Gender": "Fem", "Number": "Sing"},
- "Npfsdnf": {
- POS: PROPN,
- "Case": "Dat",
- "Gender": "Fem",
- "NameType": "Giv",
- "Number": "Sing",
- },
- "Npfsdng": {
- POS: PROPN,
- "Case": "Dat",
- "Gender": "Fem",
- "NameType": "Geo",
- "Number": "Sing",
- },
- "Npfsdns": {
- POS: PROPN,
- "Case": "Dat",
- "Gender": "Fem",
- "NameType": "Sur",
- "Number": "Sing",
- },
- "Npfsgn-": {POS: PROPN, "Case": "Gen", "Gender": "Fem", "Number": "Sing"},
- "Npfsgnf": {
- POS: PROPN,
- "Case": "Gen",
- "Gender": "Fem",
- "NameType": "Giv",
- "Number": "Sing",
- },
- "Npfsgng": {
- POS: PROPN,
- "Case": "Gen",
- "Gender": "Fem",
- "NameType": "Geo",
- "Number": "Sing",
- },
- "Npfsgns": {
- POS: PROPN,
- "Case": "Gen",
- "Gender": "Fem",
- "NameType": "Sur",
- "Number": "Sing",
- },
- "Npfsin-": {POS: PROPN, "Case": "Ins", "Gender": "Fem", "Number": "Sing"},
- "Npfsinf": {
- POS: PROPN,
- "Case": "Ins",
- "Gender": "Fem",
- "NameType": "Giv",
- "Number": "Sing",
- },
- "Npfsing": {
- POS: PROPN,
- "Case": "Ins",
- "Gender": "Fem",
- "NameType": "Geo",
- "Number": "Sing",
- },
- "Npfsins": {
- POS: PROPN,
- "Case": "Ins",
- "Gender": "Fem",
- "NameType": "Sur",
- "Number": "Sing",
- },
- "Npfslng": {
- POS: PROPN,
- "Case": "Loc",
- "Gender": "Fem",
- "NameType": "Geo",
- "Number": "Sing",
- },
- "Npfsnn-": {POS: PROPN, "Case": "Nom", "Gender": "Fem", "Number": "Sing"},
- "Npfsnnf": {
- POS: PROPN,
- "Case": "Nom",
- "Gender": "Fem",
- "NameType": "Giv",
- "Number": "Sing",
- },
- "Npfsnng": {
- POS: PROPN,
- "Case": "Nom",
- "Gender": "Fem",
- "NameType": "Geo",
- "Number": "Sing",
- },
- "Npfsnns": {
- POS: PROPN,
- "Case": "Nom",
- "Gender": "Fem",
- "NameType": "Sur",
- "Number": "Sing",
- },
- "Npm--nf": {POS: PROPN, "Gender": "Masc", "NameType": "Giv"},
- "Npmpgng": {
- POS: PROPN,
- "Case": "Gen",
- "Gender": "Masc",
- "NameType": "Geo",
- "Number": "Plur",
- },
- "Npmplng": {
- POS: PROPN,
- "Case": "Loc",
- "Gender": "Masc",
- "NameType": "Geo",
- "Number": "Plur",
- },
- "Npms-nf": {POS: PROPN, "Gender": "Masc", "NameType": "Giv", "Number": "Sing"},
- "Npmsan-": {POS: PROPN, "Case": "Acc", "Gender": "Masc", "Number": "Sing"},
- "Npmsanf": {
- POS: PROPN,
- "Case": "Acc",
- "Gender": "Masc",
- "NameType": "Giv",
- "Number": "Sing",
- },
- "Npmsang": {
- POS: PROPN,
- "Case": "Acc",
- "Gender": "Masc",
- "NameType": "Geo",
- "Number": "Sing",
- },
- "Npmsans": {
- POS: PROPN,
- "Case": "Acc",
- "Gender": "Masc",
- "NameType": "Sur",
- "Number": "Sing",
- },
- "Npmsdnf": {
- POS: PROPN,
- "Case": "Dat",
- "Gender": "Masc",
- "NameType": "Giv",
- "Number": "Sing",
- },
- "Npmsdng": {
- POS: PROPN,
- "Case": "Dat",
- "Gender": "Masc",
- "NameType": "Geo",
- "Number": "Sing",
- },
- "Npmsdns": {
- POS: PROPN,
- "Case": "Dat",
- "Gender": "Masc",
- "NameType": "Sur",
- "Number": "Sing",
- },
- "Npmsgn-": {POS: PROPN, "Case": "Gen", "Gender": "Masc", "Number": "Sing"},
- "Npmsgnf": {
- POS: PROPN,
- "Case": "Gen",
- "Gender": "Masc",
- "NameType": "Giv",
- "Number": "Sing",
- },
- "Npmsgng": {
- POS: PROPN,
- "Case": "Gen",
- "Gender": "Masc",
- "NameType": "Geo",
- "Number": "Sing",
- },
- "Npmsgns": {
- POS: PROPN,
- "Case": "Gen",
- "Gender": "Masc",
- "NameType": "Sur",
- "Number": "Sing",
- },
- "Npmsing": {
- POS: PROPN,
- "Case": "Ins",
- "Gender": "Masc",
- "NameType": "Geo",
- "Number": "Sing",
- },
- "Npmsins": {
- POS: PROPN,
- "Case": "Ins",
- "Gender": "Masc",
- "NameType": "Sur",
- "Number": "Sing",
- },
- "Npmslng": {
- POS: PROPN,
- "Case": "Loc",
- "Gender": "Masc",
- "NameType": "Geo",
- "Number": "Sing",
- },
- "Npmsngf": {
- POS: PROPN,
- "Case": "Nom",
- "Gender": "Masc",
- "NameType": "Giv",
- "Number": "Sing",
- },
- "Npmsnn-": {POS: PROPN, "Case": "Nom", "Gender": "Masc", "Number": "Sing"},
- "Npmsnnf": {
- POS: PROPN,
- "Case": "Nom",
- "Gender": "Masc",
- "NameType": "Giv",
- "Number": "Sing",
- },
- "Npmsnng": {
- POS: PROPN,
- "Case": "Nom",
- "Gender": "Masc",
- "NameType": "Geo",
- "Number": "Sing",
- },
- "Npmsnns": {
- POS: PROPN,
- "Case": "Nom",
- "Gender": "Masc",
- "NameType": "Sur",
- "Number": "Sing",
- },
- "Pg--an": {POS: PRON, "Case": "Acc"},
- "Pg--dn": {POS: PRON, "Case": "Dat"},
- "Pg--gn": {POS: PRON, "Case": "Gen"},
- "Pg--i-": {POS: PRON, "Case": "Ins"},
- "Pg--in": {POS: PRON, "Case": "Ins"},
- "Pg--nn": {POS: PRON, "Case": "Nom"},
- "Pg-dnn": {POS: PRON, "Case": "Nom", "Number": "Dual"},
- "Pg-pa-": {POS: PRON, "Case": "Acc", "Number": "Plur"},
- "Pg-pan": {POS: PRON, "Case": "Acc", "Number": "Plur"},
- "Pg-pdn": {POS: PRON, "Case": "Dat", "Number": "Plur"},
- "Pg-pgn": {POS: PRON, "Case": "Gen", "Number": "Plur"},
- "Pg-pin": {POS: PRON, "Case": "Ins", "Number": "Plur"},
- "Pg-pln": {POS: PRON, "Case": "Loc", "Number": "Plur"},
- "Pg-pnn": {POS: PRON, "Case": "Nom", "Number": "Plur"},
- "Pg-san": {POS: PRON, "Case": "Acc", "Number": "Sing"},
- "Pg-sd-": {POS: PRON, "Case": "Dat", "Number": "Sing"},
- "Pg-sdn": {POS: PRON, "Case": "Dat", "Number": "Sing"},
- "Pg-sgn": {POS: PRON, "Case": "Gen", "Number": "Sing"},
- "Pg-sin": {POS: PRON, "Case": "Ins", "Number": "Sing"},
- "Pg-sln": {POS: PRON, "Case": "Loc", "Number": "Sing"},
- "Pg-snn": {POS: PRON, "Case": "Nom", "Number": "Sing"},
- "Pgf-an": {POS: PRON, "Case": "Acc", "Gender": "Fem"},
- "Pgf-dn": {POS: PRON, "Case": "Dat", "Gender": "Fem"},
- "Pgf-nn": {POS: PRON, "Case": "Nom", "Gender": "Fem"},
- "Pgfpan": {POS: PRON, "Case": "Acc", "Gender": "Fem", "Number": "Plur"},
- "Pgfpdn": {POS: PRON, "Case": "Dat", "Gender": "Fem", "Number": "Plur"},
- "Pgfpgn": {POS: PRON, "Case": "Gen", "Gender": "Fem", "Number": "Plur"},
- "Pgfpin": {POS: PRON, "Case": "Ins", "Gender": "Fem", "Number": "Plur"},
- "Pgfpln": {POS: PRON, "Case": "Loc", "Gender": "Fem", "Number": "Plur"},
- "Pgfpnn": {POS: PRON, "Case": "Nom", "Gender": "Fem", "Number": "Plur"},
- "Pgfsan": {POS: PRON, "Case": "Acc", "Gender": "Fem", "Number": "Sing"},
- "Pgfsdn": {POS: PRON, "Case": "Dat", "Gender": "Fem", "Number": "Sing"},
- "Pgfsgn": {POS: PRON, "Case": "Gen", "Gender": "Fem", "Number": "Sing"},
- "Pgfsin": {POS: PRON, "Case": "Ins", "Gender": "Fem", "Number": "Sing"},
- "Pgfsln": {POS: PRON, "Case": "Loc", "Gender": "Fem", "Number": "Sing"},
- "Pgfsnn": {POS: PRON, "Case": "Nom", "Gender": "Fem", "Number": "Sing"},
- "Pgfsny": {POS: PRON, "Case": "Nom", "Gender": "Fem", "Number": "Sing"},
- "Pgfsny-": {POS: PRON, "Case": "Nom", "Gender": "Fem", "Number": "Sing"},
- "Pgm-a-": {POS: PRON, "Case": "Acc", "Gender": "Masc"},
- "Pgm-an": {POS: PRON, "Case": "Acc", "Gender": "Masc"},
- "Pgm-dn": {POS: PRON, "Case": "Dat", "Gender": "Masc"},
- "Pgm-gn": {POS: PRON, "Case": "Gen", "Gender": "Masc"},
- "Pgm-nn": {POS: PRON, "Case": "Nom", "Gender": "Masc"},
- "Pgmdan": {POS: PRON, "Case": "Acc", "Gender": "Masc", "Number": "Dual"},
- "Pgmdgn": {POS: PRON, "Case": "Gen", "Gender": "Masc", "Number": "Dual"},
- "Pgmdnn": {POS: PRON, "Case": "Nom", "Gender": "Masc", "Number": "Dual"},
- "Pgmpan": {POS: PRON, "Case": "Acc", "Gender": "Masc", "Number": "Plur"},
- "Pgmpan-": {POS: PRON, "Case": "Acc", "Gender": "Masc", "Number": "Plur"},
- "Pgmpdn": {POS: PRON, "Case": "Dat", "Gender": "Masc", "Number": "Plur"},
- "Pgmpgn": {POS: PRON, "Case": "Gen", "Gender": "Masc", "Number": "Plur"},
- "Pgmpin": {POS: PRON, "Case": "Ins", "Gender": "Masc", "Number": "Plur"},
- "Pgmpln": {POS: PRON, "Case": "Loc", "Gender": "Masc", "Number": "Plur"},
- "Pgmpnn": {POS: PRON, "Case": "Nom", "Gender": "Masc", "Number": "Plur"},
- "Pgmsan": {POS: PRON, "Case": "Acc", "Gender": "Masc", "Number": "Sing"},
- "Pgmsdn": {POS: PRON, "Case": "Dat", "Gender": "Masc", "Number": "Sing"},
- "Pgmsgn": {POS: PRON, "Case": "Gen", "Gender": "Masc", "Number": "Sing"},
- "Pgmsin": {POS: PRON, "Case": "Ins", "Gender": "Masc", "Number": "Sing"},
- "Pgmsln": {POS: PRON, "Case": "Loc", "Gender": "Masc", "Number": "Sing"},
- "Pgmsnn": {POS: PRON, "Case": "Nom", "Gender": "Masc", "Number": "Sing"},
- "Pgn--n": {POS: PRON, "Gender": "Neut"},
- "Pgnn--n": {POS: PRON, "Gender": "Neut"},
- "Pgsmdn": {POS: PRON, "Case": "Dat"},
- "Qg": {POS: PART},
- "Rgc": {POS: ADV, "Degree": "Cmp"},
- "Rgp": {POS: ADV, "Degree": "Pos"},
- "Rgs": {POS: ADV, "Degree": "Sup"},
- "Sag": {POS: ADP, "AdpType": "Prep", "Case": "Gen"},
- "Sga": {POS: ADP, "AdpType": "Prep", "Case": "Acc"},
- "Sgg": {POS: ADP, "AdpType": "Prep", "Case": "Gen"},
- "Sgi": {POS: ADP, "AdpType": "Prep", "Case": "Ins"},
- "Vgaa----n--n--": {
- POS: VERB,
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Ger",
- },
- "Vgaa----n--y--": {
- POS: VERB,
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Ger",
- },
- "Vgaa----y--n--": {
- POS: VERB,
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Ger",
- },
- "Vgaa----y--y--": {
- POS: VERB,
- "Polarity": "Neg",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Ger",
- },
- "Vgap----n--n--": {
- POS: VERB,
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Ger",
- },
- "Vgap----n--y": {
- POS: VERB,
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Ger",
- },
- "Vgap----n--y--": {
- POS: VERB,
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Ger",
- },
- "Vgap----y--n--": {
- POS: VERB,
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Ger",
- },
- "Vgap----y--y--": {
- POS: VERB,
- "Polarity": "Neg",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Ger",
- },
- "Vgas----n--y--": {
- POS: VERB,
- "Aspect": "Perf",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Ger",
- },
- "Vgb-----n--n--": {POS: ADV, "Polarity": "Pos", "VerbForm": "Conv"},
- "Vgh--pf-n--n--": {
- POS: VERB,
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "VerbForm": "Conv",
- },
- "Vgh--pf-y--n--": {
- POS: VERB,
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Neg",
- "VerbForm": "Conv",
- },
- "Vgh--pm-n--n--": {
- POS: VERB,
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "VerbForm": "Conv",
- },
- "Vgh--pm-n--y--": {
- POS: VERB,
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "VerbForm": "Conv",
- },
- "Vgh--pm-y--n--": {
- POS: VERB,
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Neg",
- "VerbForm": "Conv",
- },
- "Vgh--sf-n--n--": {
- POS: VERB,
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "VerbForm": "Conv",
- },
- "Vgh--sf-n--y--": {
- POS: VERB,
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "VerbForm": "Conv",
- },
- "Vgh--sf-y--n--": {
- POS: VERB,
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Neg",
- "VerbForm": "Conv",
- },
- "Vgh--sm-n--n--": {
- POS: VERB,
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "VerbForm": "Conv",
- },
- "Vgh--sm-n--y--": {
- POS: VERB,
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "VerbForm": "Conv",
- },
- "Vgh--sm-y--n--": {
- POS: VERB,
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Neg",
- "VerbForm": "Conv",
- },
- "Vgi-----n--n--": {POS: VERB, "Polarity": "Pos", "VerbForm": "Inf"},
- "Vgi-----n--y--": {
- POS: VERB,
- "Polarity": "Pos",
- "Reflex": "Yes",
- "VerbForm": "Inf",
- },
- "Vgi-----y--n--": {POS: VERB, "Polarity": "Neg", "VerbForm": "Inf"},
- "Vgi-----y--y--": {
- POS: VERB,
- "Polarity": "Neg",
- "Reflex": "Yes",
- "VerbForm": "Inf",
- },
- "Vgm-1p--n--nm-": {
- POS: VERB,
- "Mood": "Imp",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Pos",
- "VerbForm": "Fin",
- },
- "Vgm-1p--n--ns-": {
- POS: VERB,
- "Mood": "Cnd",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Pos",
- "VerbForm": "Fin",
- },
- "Vgm-1p--n--ym-": {
- POS: VERB,
- "Mood": "Imp",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "VerbForm": "Fin",
- },
- "Vgm-1p--y--nm-": {
- POS: VERB,
- "Mood": "Imp",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Neg",
- "VerbForm": "Fin",
- },
- "Vgm-1p--y--ys-": {
- POS: VERB,
- "Mood": "Cnd",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "VerbForm": "Fin",
- },
- "Vgm-1s--n--ns-": {
- POS: VERB,
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "VerbForm": "Fin",
- },
- "Vgm-1s--n--ys-": {
- POS: VERB,
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "VerbForm": "Fin",
- },
- "Vgm-1s--y--ns-": {
- POS: VERB,
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Neg",
- "VerbForm": "Fin",
- },
- "Vgm-1s--y--ys-": {
- POS: VERB,
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "VerbForm": "Fin",
- },
- "Vgm-2p--n--nm-": {
- POS: VERB,
- "Mood": "Imp",
- "Number": "Plur",
- "Person": "two",
- "Polarity": "Pos",
- "VerbForm": "Fin",
- },
- "Vgm-2p--n--ns-": {
- POS: VERB,
- "Mood": "Cnd",
- "Number": "Plur",
- "Person": "two",
- "Polarity": "Pos",
- "VerbForm": "Fin",
- },
- "Vgm-2p--n--ym-": {
- POS: VERB,
- "Mood": "Imp",
- "Number": "Plur",
- "Person": "two",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "VerbForm": "Fin",
- },
- "Vgm-2p--y--nm-": {
- POS: VERB,
- "Mood": "Imp",
- "Number": "Plur",
- "Person": "two",
- "Polarity": "Neg",
- "VerbForm": "Fin",
- },
- "Vgm-2p--y--ym-": {
- POS: VERB,
- "Mood": "Imp",
- "Number": "Plur",
- "Person": "two",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "VerbForm": "Fin",
- },
- "Vgm-2s--n--nm-": {
- POS: VERB,
- "Mood": "Imp",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Pos",
- "VerbForm": "Fin",
- },
- "Vgm-2s--n--ns-": {
- POS: VERB,
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Pos",
- "VerbForm": "Fin",
- },
- "Vgm-2s--n--ym-": {
- POS: VERB,
- "Mood": "Imp",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "VerbForm": "Fin",
- },
- "Vgm-2s--y--nm-": {
- POS: VERB,
- "Mood": "Imp",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Neg",
- "VerbForm": "Fin",
- },
- "Vgm-2s--y--ns-": {
- POS: VERB,
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Neg",
- "VerbForm": "Fin",
- },
- "Vgm-3---n--ns-": {
- POS: VERB,
- "Mood": "Cnd",
- "Person": "three",
- "Polarity": "Pos",
- "VerbForm": "Fin",
- },
- "Vgm-3---n--ys-": {
- POS: VERB,
- "Mood": "Cnd",
- "Person": "three",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "VerbForm": "Fin",
- },
- "Vgm-3---y--ns-": {
- POS: VERB,
- "Mood": "Cnd",
- "Person": "three",
- "Polarity": "Neg",
- "VerbForm": "Fin",
- },
- "Vgm-3---y--ys-": {
- POS: VERB,
- "Mood": "Cnd",
- "Person": "three",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "VerbForm": "Fin",
- },
- "Vgm-3p--n--ns-": {
- POS: VERB,
- "Mood": "Cnd",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Pos",
- "VerbForm": "Fin",
- },
- "Vgm-3p--n--ys-": {
- POS: VERB,
- "Mood": "Cnd",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "VerbForm": "Fin",
- },
- "Vgm-3p--y--ns-": {
- POS: VERB,
- "Mood": "Cnd",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Neg",
- "VerbForm": "Fin",
- },
- "Vgm-3s--n--ns-": {
- POS: VERB,
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "VerbForm": "Fin",
- },
- "Vgm-3s--n--ys-": {
- POS: VERB,
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "VerbForm": "Fin",
- },
- "Vgm-3s--y--ns-": {
- POS: VERB,
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Neg",
- "VerbForm": "Fin",
- },
- "Vgm-3s--y--ys-": {
- POS: VERB,
- "Mood": "Cnd",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "VerbForm": "Fin",
- },
- "Vgma1p--n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma1p--n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma1p--y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma1p--y--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma1s--n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma1s--n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma1s--y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma1s--y--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma2p--n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "two",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma2p--n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "two",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma2p--y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "two",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma2s--n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma2s--y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma3---n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma3---n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Person": "three",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma3---y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma3--y--ni-": {
- POS: VERB,
- "Case": "Nom",
- "Person": "three",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma3p--n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma3p--n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma3p--y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma3p--y--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma3s--n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma3s--n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma3s--n--yi--": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma3s--y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgma3s--y--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgmf1p--n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Pos",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf1p--n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf1p--y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Neg",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf1s--n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf1s--n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf1s--y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Neg",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf2p--n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "two",
- "Polarity": "Pos",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf2p--n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "two",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf2s--n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Pos",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf2s--n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf2s--y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Neg",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf2s--y--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf3---n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf3---y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf3p--n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf3p--n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf3p--y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf3s--n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf3s--n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf3s--y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmf3s--y--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "Tense": "Fut",
- "VerbForm": "Fin",
- },
- "Vgmp1p--n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp1p--n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp1p--y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp1p--y--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "one",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp1s--n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp1s--n--ni--": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp1s--n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp1s--y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp1s--y--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp2p--n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "two",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp2p--n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "two",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp2p--y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "two",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp2p--y--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "two",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp2s--n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp2s--n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp2s--y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp3---n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp3---n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Person": "three",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp3---y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp3---y--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Person": "three",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp3p--n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp3p--n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp3p--y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp3p--y--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp3s--n--ni": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp3s--n--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp3s--n--ni--": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp3s--n--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp3s--y--ni-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmp3s--y--yi-": {
- POS: VERB,
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Fin",
- },
- "Vgmq1s--n--ni-": {
- POS: VERB,
- "Aspect": "Hab",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgmq1s--n--yi-": {
- POS: VERB,
- "Aspect": "Hab",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgmq1s--y--ni-": {
- POS: VERB,
- "Aspect": "Hab",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "one",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgmq2s--n--ni-": {
- POS: VERB,
- "Aspect": "Hab",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "two",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgmq3---n--ni-": {
- POS: VERB,
- "Aspect": "Hab",
- "Mood": "Ind",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgmq3p--n--ni-": {
- POS: VERB,
- "Aspect": "Hab",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgmq3p--n--yi-": {
- POS: VERB,
- "Aspect": "Hab",
- "Mood": "Ind",
- "Number": "Plur",
- "Person": "three",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgmq3s--n--ni-": {
- POS: VERB,
- "Aspect": "Hab",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgmq3s--n--yi-": {
- POS: VERB,
- "Aspect": "Hab",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgmq3s--y--ni-": {
- POS: VERB,
- "Aspect": "Hab",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgms3s--n--ni-": {
- POS: VERB,
- "Aspect": "Perf",
- "Mood": "Ind",
- "Number": "Sing",
- "Person": "three",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Fin",
- },
- "Vgp---nnnn-n-p": {
- POS: VERB,
- "Degree": "Pos",
- "Gender": "Neut",
- "Polarity": "Pos",
- "VerbForm": "Part",
- },
- "Vgp---nnyn-n-p": {
- POS: VERB,
- "Degree": "Pos",
- "Gender": "Neut",
- "Polarity": "Neg",
- "VerbForm": "Part",
- },
- "Vgp--pfnnnnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "VerbForm": "Part",
- },
- "Vgp--sfnnnnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "VerbForm": "Part",
- },
- "Vgp--smnnnvn-p": {
- POS: VERB,
- "Case": "Voc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "VerbForm": "Part",
- },
- "Vgp--smnynnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Neg",
- "VerbForm": "Part",
- },
- "Vgpa--nann-n-p": {
- POS: VERB,
- "Degree": "Pos",
- "Gender": "Neut",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa--nann-y-p": {
- POS: VERB,
- "Degree": "Pos",
- "Gender": "Neut",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa--nayn-n-p": {
- POS: VERB,
- "Degree": "Pos",
- "Gender": "Neut",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-pfannan-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-pfannay-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-pfanngn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-pfannin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-pfannnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-pfannny-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-pmannan-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-pmanndn-p": {
- POS: VERB,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-pmanngn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-pmannin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-pmannnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-pmannny-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-pmanygn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-pmaynny-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-pmpnnnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpa-sfannan-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-sfannay-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-sfanndn-p": {
- POS: VERB,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-sfanngn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-sfanngy-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-sfannin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-sfannnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-sfannny-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-sfannny-p-": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-sfanynn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-sfaynnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-smannan-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-smannay-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-smanngn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-smanngy-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-smannin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-smanniy-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-smannln-p": {
- POS: VERB,
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-smannnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-smannny-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-smanygn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-smanyin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-smanynn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpa-smaynnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpf-smannln-p": {
- POS: VERB,
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Fut",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpf-smpnnan-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Fut",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpf-smpnndn-p": {
- POS: VERB,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Fut",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp--fpnn-n-p": {
- POS: VERB,
- "Degree": "Pos",
- "Gender": "Fem",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp--npnn-n-p": {
- POS: VERB,
- "Degree": "Pos",
- "Gender": "Neut",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp--npnn-y-p": {
- POS: VERB,
- "Degree": "Pos",
- "Gender": "Neut",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp--npyn-n-p": {
- POS: VERB,
- "Degree": "Pos",
- "Gender": "Neut",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp--npyn-y-p": {
- POS: VERB,
- "Degree": "Pos",
- "Gender": "Neut",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pfannan-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pfanndn-p": {
- POS: VERB,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pfanngn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pfanngy-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pfannin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pfannnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pfannny-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pfpnnan-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pfpnndn-p": {
- POS: VERB,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pfpnngn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pfpnnin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pfpnnln-p": {
- POS: VERB,
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pfpnnnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pfpnnny-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pfpnygn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pfpnyin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pfpynan-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pfpyngn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pfpynin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pfpynnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pmannan-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pmannay-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pmanndn-p": {
- POS: VERB,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pmanngn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pmanngy-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pmannin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pmannln-p": {
- POS: VERB,
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pmannnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pmannny-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pmanyan-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pmayndn-p": {
- POS: VERB,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pmaynin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pmaynnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-pmpnnan-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pmpnndn-p": {
- POS: VERB,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pmpnngn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pmpnnin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pmpnniy-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pmpnnnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pmpnygn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pmpnyin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pmpynan-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pmpyngn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pmpynnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-pmpyygn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-sfannan-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-sfannay-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-sfanndn-p": {
- POS: VERB,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-sfanndn-p-": {
- POS: VERB,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-sfanngn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-sfanngy-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-sfannin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-sfannln-p": {
- POS: VERB,
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-sfannnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-sfanyny-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-sfaynin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-sfaynnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-sfpnnan-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-sfpnndn-p": {
- POS: VERB,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-sfpnngn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-sfpnnin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-sfpnnln-p": {
- POS: VERB,
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-sfpnnnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-sfpnyan-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-sfpnygn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-sfpnyin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-sfpnynn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-sfpyngn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-sfpynnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-smannan-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-smanndy-p": {
- POS: VERB,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-smanngn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-smannin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-smannln-p": {
- POS: VERB,
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-smannly-p": {
- POS: VERB,
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-smannnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-smaynin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-smaynnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-smaynny-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Neg",
- "Reflex": "Yes",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Act",
- },
- "Vgpp-smpnnan-p": {
- POS: VERB,
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-smpnndn-p": {
- POS: VERB,
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-smpnngn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-smpnnin-p": {
- POS: VERB,
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-smpnnnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-smpnygn-p": {
- POS: VERB,
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-smpnynn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgpp-smpynnn-p": {
- POS: VERB,
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Neg",
- "Tense": "Pres",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps--mpnngn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps--npnn-n-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Degree": "Pos",
- "Gender": "Neut",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps--npnn-y-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Degree": "Pos",
- "Gender": "Neut",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps--npyn-n-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Degree": "Pos",
- "Gender": "Neut",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pfpnnan-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pfpnndn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pfpnngn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pfpnnin-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pfpnnln-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pfpnnnn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pmpnnan-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pmpnnay-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pmpnndn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pmpnngn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pmpnnin-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pmpnnln-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pmpnnnn-": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pmpnnnn-n": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Nom",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pmpnnnn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pmpnygn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pmpnynn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pmpynin-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pmpynnn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-pmsnnnn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Plur",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- },
- "Vgps-sfpnnan-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-sfpnndn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-sfpnngn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-sfpnnin-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-sfpnnln-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-sfpnnnn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-sfpynan-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-sfpyngn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-sfpynnn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Fem",
- "Number": "Sing",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-smpnnan-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-smpnnay-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-smpnndn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Dat",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-smpnngn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-smpnnin-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Ins",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-smpnnln-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Loc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-smpnnnn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-smpnnny-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Reflex": "Yes",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-smpnynn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-smpynan-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Acc",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-smpyngn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Gen",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-smpynnn-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Case": "Nom",
- "Degree": "Pos",
- "Gender": "Masc",
- "Number": "Sing",
- "Polarity": "Neg",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "Vgps-snpnn-n-p": {
- POS: VERB,
- "Aspect": "Perf",
- "Degree": "Pos",
- "Gender": "Neut",
- "Number": "Sing",
- "Polarity": "Pos",
- "Tense": "Past",
- "VerbForm": "Part",
- "Voice": "Pass",
- },
- "X-": {POS: X},
- "Xf": {POS: X, "Foreign": "Yes"},
- "Xh": {POS: SYM},
- "Ya": {POS: X, "Abbr": "Yes"},
- "Ys": {POS: X, "Abbr": "Yes"},
- "Z": {POS: PUNCT},
-}
diff --git a/spacy/lang/lt/tokenizer_exceptions.py b/spacy/lang/lt/tokenizer_exceptions.py
index 4287b26dd..118fb2190 100644
--- a/spacy/lang/lt/tokenizer_exceptions.py
+++ b/spacy/lang/lt/tokenizer_exceptions.py
@@ -1,270 +1,15 @@
-# coding: utf8
-from __future__ import unicode_literals
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...symbols import ORTH
+from ...util import update_exc
+
_exc = {}
-for orth in [
- "n-tosios",
- "?!",
- # "G.",
- # "J. E.",
- # "J. Em.",
- # "J.E.",
- # "J.Em.",
- # "K.",
- # "N.",
- # "V.",
- # "Vt.",
- # "a.",
- # "a.k.",
- # "a.s.",
- # "adv.",
- # "akad.",
- # "aklg.",
- # "akt.",
- # "al.",
- # "ang.",
- # "angl.",
- # "aps.",
- # "apskr.",
- # "apyg.",
- # "arbat.",
- # "asist.",
- # "asm.",
- # "asm.k.",
- # "asmv.",
- # "atk.",
- # "atsak.",
- # "atsisk.",
- # "atsisk.sąsk.",
- # "atv.",
- # "aut.",
- # "avd.",
- # "b.k.",
- # "baud.",
- # "biol.",
- # "bkl.",
- # "bot.",
- # "bt.",
- # "buv.",
- # "ch.",
- # "chem.",
- # "corp.",
- # "d.",
- # "dab.",
- # "dail.",
- # "dek.",
- # "deš.",
- # "dir.",
- # "dirig.",
- # "doc.",
- # "dol.",
- # "dr.",
- # "drp.",
- # "dvit.",
- # "dėst.",
- # "dš.",
- # "dž.",
- # "e.b.",
- # "e.bankas",
- # "e.p.",
- # "e.parašas",
- # "e.paštas",
- # "e.v.",
- # "e.valdžia",
- # "egz.",
- # "eil.",
- # "ekon.",
- # "el.",
- # "el.bankas",
- # "el.p.",
- # "el.parašas",
- # "el.paštas",
- # "el.valdžia",
- # "etc.",
- # "ež.",
- # "fak.",
- # "faks.",
- # "feat.",
- # "filol.",
- # "filos.",
- # "g.",
- # "gen.",
- # "geol.",
- # "gerb.",
- # "gim.",
- # "gr.",
- # "gv.",
- # "gyd.",
- # "gyv.",
- # "habil.",
- # "inc.",
- # "insp.",
- # "inž.",
- # "ir pan.",
- # "ir t. t.",
- # "isp.",
- # "istor.",
- # "it.",
- # "just.",
- # "k.",
- # "k. a.",
- # "k.a.",
- # "kab.",
- # "kand.",
- # "kart.",
- # "kat.",
- # "ketv.",
- # "kh.",
- # "kl.",
- # "kln.",
- # "km.",
- # "kn.",
- # "koresp.",
- # "kpt.",
- # "kr.",
- # "kt.",
- # "kub.",
- # "kun.",
- # "kv.",
- # "kyš.",
- # "l. e. p.",
- # "l.e.p.",
- # "lenk.",
- # "liet.",
- # "lot.",
- # "lt.",
- # "ltd.",
- # "ltn.",
- # "m.",
- # "m.e..",
- # "m.m.",
- # "mat.",
- # "med.",
- # "mgnt.",
- # "mgr.",
- # "min.",
- # "mjr.",
- # "ml.",
- # "mln.",
- # "mlrd.",
- # "mob.",
- # "mok.",
- # "moksl.",
- # "mokyt.",
- # "mot.",
- # "mr.",
- # "mst.",
- # "mstl.",
- # "mėn.",
- # "nkt.",
- # "no.",
- # "nr.",
- # "ntk.",
- # "nuotr.",
- # "op.",
- # "org.",
- # "orig.",
- # "p.",
- # "p.d.",
- # "p.m.e.",
- # "p.s.",
- # "pab.",
- # "pan.",
- # "past.",
- # "pav.",
- # "pavad.",
- # "per.",
- # "perd.",
- # "pirm.",
- # "pl.",
- # "plg.",
- # "plk.",
- # "pr.",
- # "pr.Kr.",
- # "pranc.",
- # "proc.",
- # "prof.",
- # "prom.",
- # "prot.",
- # "psl.",
- # "pss.",
- # "pvz.",
- # "pšt.",
- # "r.",
- # "raj.",
- # "red.",
- # "rez.",
- # "rež.",
- # "rus.",
- # "rš.",
- # "s.",
- # "sav.",
- # "saviv.",
- # "sek.",
- # "sekr.",
- # "sen.",
- # "sh.",
- # "sk.",
- # "skg.",
- # "skv.",
- # "skyr.",
- # "sp.",
- # "spec.",
- # "sr.",
- # "st.",
- # "str.",
- # "stud.",
- # "sąs.",
- # "t.",
- # "t. p.",
- # "t. y.",
- # "t.p.",
- # "t.t.",
- # "t.y.",
- # "techn.",
- # "tel.",
- # "teol.",
- # "th.",
- # "tir.",
- # "trit.",
- # "trln.",
- # "tšk.",
- # "tūks.",
- # "tūkst.",
- # "up.",
- # "upl.",
- # "v.s.",
- # "vad.",
- # "val.",
- # "valg.",
- # "ved.",
- # "vert.",
- # "vet.",
- # "vid.",
- # "virš.",
- # "vlsč.",
- # "vnt.",
- # "vok.",
- # "vs.",
- # "vtv.",
- # "vv.",
- # "vyr.",
- # "vyresn.",
- # "zool.",
- # "Įn",
- # "įl.",
- # "š.m.",
- # "šnek.",
- # "šv.",
- # "švč.",
- # "ž.ū.",
- # "žin.",
- # "žml.",
- # "žr.",
-]:
+for orth in ["n-tosios", "?!"]:
_exc[orth] = [{ORTH: orth}]
-TOKENIZER_EXCEPTIONS = _exc
+mod_base_exceptions = {
+ exc: val for exc, val in BASE_EXCEPTIONS.items() if not exc.endswith(".")
+}
+del mod_base_exceptions["8)"]
+TOKENIZER_EXCEPTIONS = update_exc(mod_base_exceptions, _exc)
diff --git a/spacy/lang/lv/__init__.py b/spacy/lang/lv/__init__.py
index bb8c0763b..142bc706e 100644
--- a/spacy/lang/lv/__init__.py
+++ b/spacy/lang/lv/__init__.py
@@ -1,14 +1,8 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
from ...language import Language
-from ...attrs import LANG
class LatvianDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "lv"
stop_words = STOP_WORDS
diff --git a/spacy/lang/lv/stop_words.py b/spacy/lang/lv/stop_words.py
index 075ad6347..2685c2430 100644
--- a/spacy/lang/lv/stop_words.py
+++ b/spacy/lang/lv/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
# Source: https://github.com/stopwords-iso/stopwords-lv
STOP_WORDS = set(
diff --git a/spacy/lang/ml/__init__.py b/spacy/lang/ml/__init__.py
index d052ded1b..cfad52261 100644
--- a/spacy/lang/ml/__init__.py
+++ b/spacy/lang/ml/__init__.py
@@ -1,12 +1,10 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
-
+from .lex_attrs import LEX_ATTRS
from ...language import Language
class MalayalamDefaults(Language.Defaults):
+ lex_attr_getters = LEX_ATTRS
stop_words = STOP_WORDS
diff --git a/spacy/lang/ml/examples.py b/spacy/lang/ml/examples.py
index a2a0ed10e..9794eab29 100644
--- a/spacy/lang/ml/examples.py
+++ b/spacy/lang/ml/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/ml/lex_attrs.py b/spacy/lang/ml/lex_attrs.py
index 468ad88f8..9ac19b6a7 100644
--- a/spacy/lang/ml/lex_attrs.py
+++ b/spacy/lang/ml/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ...attrs import LIKE_NUM
diff --git a/spacy/lang/ml/stop_words.py b/spacy/lang/ml/stop_words.py
index 8bd6a7e02..441e93586 100644
--- a/spacy/lang/ml/stop_words.py
+++ b/spacy/lang/ml/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
STOP_WORDS = set(
"""
അത്
diff --git a/spacy/lang/mr/__init__.py b/spacy/lang/mr/__init__.py
index fd95f9354..af0c49878 100644
--- a/spacy/lang/mr/__init__.py
+++ b/spacy/lang/mr/__init__.py
@@ -1,14 +1,8 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
from ...language import Language
-from ...attrs import LANG
class MarathiDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "mr"
stop_words = STOP_WORDS
diff --git a/spacy/lang/mr/stop_words.py b/spacy/lang/mr/stop_words.py
index 0b0cd035d..9b0cee951 100644
--- a/spacy/lang/mr/stop_words.py
+++ b/spacy/lang/mr/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
# Source: https://github.com/stopwords-iso/stopwords-mr/blob/master/stopwords-mr.txt, https://github.com/6/stopwords-json/edit/master/dist/mr.json
STOP_WORDS = set(
"""
diff --git a/spacy/lang/nb/__init__.py b/spacy/lang/nb/__init__.py
index e6c58b7de..d2bb92072 100644
--- a/spacy/lang/nb/__init__.py
+++ b/spacy/lang/nb/__init__.py
@@ -1,35 +1,18 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
from .punctuation import TOKENIZER_SUFFIXES
from .stop_words import STOP_WORDS
-from .morph_rules import MORPH_RULES
from .syntax_iterators import SYNTAX_ITERATORS
-from .tag_map import TAG_MAP
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ..norm_exceptions import BASE_NORMS
from ...language import Language
-from ...attrs import LANG, NORM
-from ...util import update_exc, add_lookups
class NorwegianDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "nb"
- lex_attr_getters[NORM] = add_lookups(
- Language.Defaults.lex_attr_getters[NORM], BASE_NORMS
- )
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
prefixes = TOKENIZER_PREFIXES
infixes = TOKENIZER_INFIXES
suffixes = TOKENIZER_SUFFIXES
- stop_words = STOP_WORDS
- morph_rules = MORPH_RULES
- tag_map = TAG_MAP
syntax_iterators = SYNTAX_ITERATORS
+ stop_words = STOP_WORDS
class Norwegian(Language):
diff --git a/spacy/lang/nb/examples.py b/spacy/lang/nb/examples.py
index c15426ded..b1a63ad74 100644
--- a/spacy/lang/nb/examples.py
+++ b/spacy/lang/nb/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/nb/morph_rules.py b/spacy/lang/nb/morph_rules.py
deleted file mode 100644
index e20814535..000000000
--- a/spacy/lang/nb/morph_rules.py
+++ /dev/null
@@ -1,668 +0,0 @@
-# encoding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import LEMMA, PRON_LEMMA
-
-# This dict includes all the PRON and DET tag combinations found in the
-# dataset developed by Schibsted, Nasjonalbiblioteket and LTG (to be published
-# autumn 2018) and the rarely used polite form.
-
-MORPH_RULES = {
- "PRON__Animacy=Anim|Case=Nom|Number=Sing|Person=1|PronType=Prs": {
- "jeg": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "One",
- "Number": "Sing",
- "Case": "Nom",
- }
- },
- "PRON__Animacy=Anim|Case=Nom|Number=Sing|Person=2|PronType=Prs": {
- "du": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Two",
- "Number": "Sing",
- "Case": "Nom",
- },
- # polite form, not sure about the tag
- "De": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Two",
- "Number": "Sing",
- "Case": "Nom",
- "Polite": "Form",
- },
- },
- "PRON__Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs": {
- "hun": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Fem",
- "Case": "Nom",
- }
- },
- "PRON__Animacy=Anim|Case=Nom|Gender=Masc|Number=Sing|Person=3|PronType=Prs": {
- "han": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Masc",
- "Case": "Nom",
- }
- },
- "PRON__Gender=Neut|Number=Sing|Person=3|PronType=Prs": {
- "det": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Neut",
- },
- "alt": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Neut",
- },
- "intet": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Neut",
- },
- "noe": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Number": "Sing",
- "Person": "Three",
- "Gender": "Neut",
- },
- },
- "PRON__Animacy=Anim|Case=Nom|Number=Plur|Person=1|PronType=Prs": {
- "vi": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "One",
- "Number": "Plur",
- "Case": "Nom",
- }
- },
- "PRON__Animacy=Anim|Case=Nom|Number=Plur|Person=2|PronType=Prs": {
- "dere": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Two",
- "Number": "Plur",
- "Case": "Nom",
- }
- },
- "PRON__Case=Nom|Number=Plur|Person=3|PronType=Prs": {
- "de": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Plur",
- "Case": "Nom",
- }
- },
- "PRON__Animacy=Anim|Case=Acc|Number=Sing|Person=1|PronType=Prs": {
- "meg": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "One",
- "Number": "Sing",
- "Case": "Acc",
- }
- },
- "PRON__Animacy=Anim|Case=Acc|Number=Sing|Person=2|PronType=Prs": {
- "deg": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Two",
- "Number": "Sing",
- "Case": "Acc",
- },
- # polite form, not sure about the tag
- "Dem": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Two",
- "Number": "Sing",
- "Case": "Acc",
- "Polite": "Form",
- },
- },
- "PRON__Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs": {
- "henne": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Fem",
- "Case": "Acc",
- }
- },
- "PRON__Animacy=Anim|Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs": {
- "ham": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Masc",
- "Case": "Acc",
- },
- "han": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Sing",
- "Gender": "Masc",
- "Case": "Acc",
- },
- },
- "PRON__Animacy=Anim|Case=Acc|Number=Plur|Person=1|PronType=Prs": {
- "oss": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "One",
- "Number": "Plur",
- "Case": "Acc",
- }
- },
- "PRON__Animacy=Anim|Case=Acc|Number=Plur|Person=2|PronType=Prs": {
- "dere": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Two",
- "Number": "Plur",
- "Case": "Acc",
- }
- },
- "PRON__Case=Acc|Number=Plur|Person=3|PronType=Prs": {
- "dem": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Person": "Three",
- "Number": "Plur",
- "Case": "Acc",
- }
- },
- "PRON__Case=Acc|Reflex=Yes": {
- "seg": {
- LEMMA: PRON_LEMMA,
- "Person": "Three",
- "Number": ("Sing", "Plur"),
- "Reflex": "Yes",
- }
- },
- "PRON__Animacy=Anim|Case=Nom|Number=Sing|PronType=Prs": {
- "man": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Number": "Sing", "Case": "Nom"}
- },
- "DET__Gender=Masc|Number=Sing|Poss=Yes": {
- "min": {
- LEMMA: "min",
- "Person": "One",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Masc",
- },
- "din": {
- LEMMA: "din",
- "Person": "Two",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Masc",
- },
- "hennes": {
- LEMMA: "hennes",
- "Person": "Three",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Masc",
- },
- "hans": {
- LEMMA: "hans",
- "Person": "Three",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Masc",
- },
- "sin": {
- LEMMA: "sin",
- "Person": "Three",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Masc",
- "Reflex": "Yes",
- },
- "vår": {
- LEMMA: "vår",
- "Person": "One",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Masc",
- },
- "deres": {
- LEMMA: "deres",
- "Person": ("Two", "Three"),
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Masc",
- },
- # polite form, not sure about the tag
- "Deres": {
- LEMMA: "Deres",
- "Person": "Three",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Masc",
- "Polite": "Form",
- },
- },
- "DET__Gender=Fem|Number=Sing|Poss=Yes": {
- "mi": {
- LEMMA: "min",
- "Person": "One",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Fem",
- },
- "di": {
- LEMMA: "din",
- "Person": "Two",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Fem",
- },
- "hennes": {
- LEMMA: "hennes",
- "Person": "Three",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Fem",
- },
- "hans": {
- LEMMA: "hans",
- "Person": "Three",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Fem",
- },
- "si": {
- LEMMA: "sin",
- "Person": "Three",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Fem",
- "Reflex": "Yes",
- },
- "vår": {
- LEMMA: "vår",
- "Person": "One",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Fem",
- },
- "deres": {
- LEMMA: "deres",
- "Person": ("Two", "Three"),
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Fem",
- },
- # polite form, not sure about the tag
- "Deres": {
- LEMMA: "Deres",
- "Person": "Three",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Fem",
- "Polite": "Form",
- },
- },
- "DET__Gender=Neut|Number=Sing|Poss=Yes": {
- "mitt": {
- LEMMA: "min",
- "Person": "One",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Neut",
- },
- "ditt": {
- LEMMA: "din",
- "Person": "Two",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Neut",
- },
- "hennes": {
- LEMMA: "hennes",
- "Person": "Three",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Neut",
- },
- "hans": {
- LEMMA: "hans",
- "Person": "Three",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Neut",
- },
- "sitt": {
- LEMMA: "sin",
- "Person": "Three",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Neut",
- "Reflex": "Yes",
- },
- "vårt": {
- LEMMA: "vår",
- "Person": "One",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Neut",
- },
- "deres": {
- LEMMA: "deres",
- "Person": ("Two", "Three"),
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Neut",
- },
- # polite form, not sure about the tag
- "Deres": {
- LEMMA: "Deres",
- "Person": "Three",
- "Number": "Sing",
- "Poss": "Yes",
- "Gender": "Neut",
- "Polite": "Form",
- },
- },
- "DET__Number=Plur|Poss=Yes": {
- "mine": {LEMMA: "min", "Person": "One", "Number": "Plur", "Poss": "Yes"},
- "dine": {LEMMA: "din", "Person": "Two", "Number": "Plur", "Poss": "Yes"},
- "hennes": {LEMMA: "hennes", "Person": "Three", "Number": "Plur", "Poss": "Yes"},
- "hans": {LEMMA: "hans", "Person": "Three", "Number": "Plur", "Poss": "Yes"},
- "sine": {
- LEMMA: "sin",
- "Person": "Three",
- "Number": "Plur",
- "Poss": "Yes",
- "Reflex": "Yes",
- },
- "våre": {LEMMA: "vår", "Person": "One", "Number": "Plur", "Poss": "Yes"},
- "deres": {
- LEMMA: "deres",
- "Person": ("Two", "Three"),
- "Number": "Plur",
- "Poss": "Yes",
- },
- },
- "PRON__Animacy=Anim|Number=Plur|PronType=Rcp": {
- "hverandre": {LEMMA: PRON_LEMMA, "PronType": "Rcp", "Number": "Plur"}
- },
- "DET__Number=Plur|Poss=Yes|PronType=Rcp": {
- "hverandres": {
- LEMMA: "hverandres",
- "PronType": "Rcp",
- "Number": "Plur",
- "Poss": "Yes",
- }
- },
- "PRON___": {"som": {LEMMA: PRON_LEMMA}, "ikkenoe": {LEMMA: PRON_LEMMA}},
- "PRON__PronType=Int": {"hva": {LEMMA: PRON_LEMMA, "PronType": "Int"}},
- "PRON__Animacy=Anim|PronType=Int": {"hvem": {LEMMA: PRON_LEMMA, "PronType": "Int"}},
- "PRON__Animacy=Anim|Poss=Yes|PronType=Int": {
- "hvis": {LEMMA: PRON_LEMMA, "PronType": "Int", "Poss": "Yes"}
- },
- "PRON__Number=Plur|Person=3|PronType=Prs": {
- "noen": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Number": "Plur",
- "Person": "Three",
- },
- "ingen": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Number": "Plur",
- "Person": "Three",
- },
- "alle": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Number": "Plur",
- "Person": "Three",
- },
- },
- "PRON__Gender=Fem,Masc|Number=Sing|Person=3|PronType=Prs": {
- "noen": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Number": "Sing",
- "Person": "Three",
- "Gender": ("Fem", "Masc"),
- },
- "den": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Number": "Sing",
- "Person": "Three",
- "Gender": ("Fem", "Masc"),
- },
- "ingen": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Number": "Sing",
- "Person": "Three",
- "Gender": ("Fem", "Masc"),
- "Polarity": "Neg",
- },
- },
- "PRON__Number=Sing": {"ingenting": {LEMMA: PRON_LEMMA, "Number": "Sing"}},
- "PRON__Animacy=Anim|Number=Sing|PronType=Prs": {
- "en": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Number": "Sing"}
- },
- "PRON__Animacy=Anim|Case=Gen,Nom|Number=Sing|PronType=Prs": {
- "ens": {
- LEMMA: PRON_LEMMA,
- "PronType": "Prs",
- "Number": "Sing",
- "Case": ("Gen", "Nom"),
- }
- },
- "PRON__Animacy=Anim|Case=Gen|Number=Sing|PronType=Prs": {
- "ens": {LEMMA: PRON_LEMMA, "PronType": "Prs", "Number": "Sing", "Case": "Gen"}
- },
- "DET__Case=Gen|Gender=Masc|Number=Sing": {
- "ens": {LEMMA: "en", "Number": "Sing", "Case": "Gen"}
- },
- "DET__Gender=Masc|Number=Sing": {
- "enhver": {LEMMA: "enhver", "Number": "Sing", "Gender": "Masc"},
- "all": {LEMMA: "all", "Number": "Sing", "Gender": "Masc"},
- "hver": {LEMMA: "hver", "Number": "Sing", "Gender": "Masc"},
- "noen": {LEMMA: "noen", "Gender": "Masc", "Number": "Sing"},
- "noe": {LEMMA: "noen", "Gender": "Masc", "Number": "Sing"},
- "en": {LEMMA: "en", "Number": "Sing", "Gender": "Neut"},
- "ingen": {LEMMA: "ingen", "Gender": "Masc", "Number": "Sing"},
- },
- "DET__Gender=Fem|Number=Sing": {
- "enhver": {LEMMA: "enhver", "Number": "Sing", "Gender": "Fem"},
- "all": {LEMMA: "all", "Number": "Sing", "Gender": "Fem"},
- "hver": {LEMMA: "hver", "Number": "Sing", "Gender": "Fem"},
- "noen": {LEMMA: "noen", "Gender": "Fem", "Number": "Sing"},
- "noe": {LEMMA: "noen", "Gender": "Fem", "Number": "Sing"},
- "ei": {LEMMA: "en", "Number": "Sing", "Gender": "Fem"},
- },
- "DET__Gender=Neut|Number=Sing": {
- "ethvert": {LEMMA: "enhver", "Number": "Sing", "Gender": "Neut"},
- "alt": {LEMMA: "all", "Number": "Sing", "Gender": "Neut"},
- "hvert": {LEMMA: "hver", "Number": "Sing", "Gender": "Neut"},
- "noe": {LEMMA: "noen", "Number": "Sing", "Gender": "Neut"},
- "intet": {LEMMA: "ingen", "Gender": "Neut", "Number": "Sing"},
- "et": {LEMMA: "en", "Number": "Sing", "Gender": "Neut"},
- },
- "DET__Gender=Neut|Number=Sing|PronType=Int": {
- "hvilket": {
- LEMMA: "hvilken",
- "PronType": "Int",
- "Number": "Sing",
- "Gender": "Neut",
- }
- },
- "DET__Gender=Fem|Number=Sing|PronType=Int": {
- "hvilken": {
- LEMMA: "hvilken",
- "PronType": "Int",
- "Number": "Sing",
- "Gender": "Fem",
- }
- },
- "DET__Gender=Masc|Number=Sing|PronType=Int": {
- "hvilken": {
- LEMMA: "hvilken",
- "PronType": "Int",
- "Number": "Sing",
- "Gender": "Masc",
- }
- },
- "DET__Number=Plur|PronType=Int": {
- "hvilke": {LEMMA: "hvilken", "PronType": "Int", "Number": "Plur"}
- },
- "DET__Number=Plur": {
- "alle": {LEMMA: "all", "Number": "Plur"},
- "noen": {LEMMA: "noen", "Number": "Plur"},
- "egne": {LEMMA: "egen", "Number": "Plur"},
- "ingen": {LEMMA: "ingen", "Number": "Plur"},
- },
- "DET__Gender=Masc|Number=Sing|PronType=Dem": {
- "den": {LEMMA: "den", "PronType": "Dem", "Number": "Sing", "Gender": "Masc"},
- "slik": {LEMMA: "slik", "PronType": "Dem", "Number": "Sing", "Gender": "Masc"},
- "denne": {
- LEMMA: "denne",
- "PronType": "Dem",
- "Number": "Sing",
- "Gender": "Masc",
- },
- },
- "DET__Gender=Fem|Number=Sing|PronType=Dem": {
- "den": {LEMMA: "den", "PronType": "Dem", "Number": "Sing", "Gender": "Fem"},
- "slik": {LEMMA: "slik", "PronType": "Dem", "Number": "Sing", "Gender": "Fem"},
- "denne": {LEMMA: "denne", "PronType": "Dem", "Number": "Sing", "Gender": "Fem"},
- },
- "DET__Gender=Neut|Number=Sing|PronType=Dem": {
- "det": {LEMMA: "det", "PronType": "Dem", "Number": "Sing", "Gender": "Neut"},
- "slikt": {LEMMA: "slik", "PronType": "Dem", "Number": "Sing", "Gender": "Neut"},
- "dette": {
- LEMMA: "dette",
- "PronType": "Dem",
- "Number": "Sing",
- "Gender": "Neut",
- },
- },
- "DET__Number=Plur|PronType=Dem": {
- "disse": {LEMMA: "disse", "PronType": "Dem", "Number": "Plur"},
- "andre": {LEMMA: "annen", "PronType": "Dem", "Number": "Plur"},
- "de": {LEMMA: "de", "PronType": "Dem", "Number": "Plur"},
- "slike": {LEMMA: "slik", "PronType": "Dem", "Number": "Plur"},
- },
- "DET__Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem": {
- "annen": {LEMMA: "annen", "PronType": "Dem", "Number": "Sing", "Gender": "Masc"}
- },
- "DET__Definite=Ind|Gender=Fem|Number=Sing|PronType=Dem": {
- "annen": {LEMMA: "annen", "PronType": "Dem", "Number": "Sing", "Gender": "Fem"}
- },
- "DET__Definite=Ind|Gender=Neut|Number=Sing|PronType=Dem": {
- "annet": {LEMMA: "annen", "PronType": "Dem", "Number": "Sing", "Gender": "Neut"}
- },
- "DET__Case=Gen|Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem": {
- "annens": {
- LEMMA: "annnen",
- "PronType": "Dem",
- "Number": "Sing",
- "Gender": "Masc",
- "Case": "Gen",
- }
- },
- "DET__Case=Gen|Number=Plur|PronType=Dem": {
- "andres": {LEMMA: "annen", "PronType": "Dem", "Number": "Plur", "Case": "Gen"}
- },
- "DET__Case=Gen|Gender=Fem|Number=Sing|PronType=Dem": {
- "dens": {
- LEMMA: "den",
- "PronType": "Dem",
- "Number": "Sing",
- "Gender": "Fem",
- "Case": "Gen",
- }
- },
- "DET__Case=Gen|Gender=Masc|Number=Sing|PronType=Dem": {
- "hvis": {
- LEMMA: "hvis",
- "PronType": "Dem",
- "Number": "Sing",
- "Gender": "Masc",
- "Case": "Gen",
- },
- "dens": {
- LEMMA: "den",
- "PronType": "Dem",
- "Number": "Sing",
- "Gender": "Masc",
- "Case": "Gen",
- },
- },
- "DET__Case=Gen|Gender=Neut|Number=Sing|PronType=Dem": {
- "dets": {
- LEMMA: "det",
- "PronType": "Dem",
- "Number": "Sing",
- "Gender": "Neut",
- "Case": "Gen",
- }
- },
- "DET__Case=Gen|Number=Plur": {
- "alles": {LEMMA: "all", "Number": "Plur", "Case": "Gen"}
- },
- "DET__Definite=Def|Number=Sing|PronType=Dem": {
- "andre": {LEMMA: "annen", "Number": "Sing", "PronType": "Dem"}
- },
- "DET__Definite=Def|PronType=Dem": {
- "samme": {LEMMA: "samme", "PronType": "Dem"},
- "forrige": {LEMMA: "forrige", "PronType": "Dem"},
- "neste": {LEMMA: "neste", "PronType": "Dem"},
- },
- "DET__Definite=Def": {"selve": {LEMMA: "selve"}, "selveste": {LEMMA: "selveste"}},
- "DET___": {"selv": {LEMMA: "selv"}, "endel": {LEMMA: "endel"}},
- "DET__Definite=Ind|Gender=Fem|Number=Sing": {
- "egen": {LEMMA: "egen", "Gender": "Fem", "Number": "Sing"}
- },
- "DET__Definite=Ind|Gender=Masc|Number=Sing": {
- "egen": {LEMMA: "egen", "Gender": "Masc", "Number": "Sing"}
- },
- "DET__Definite=Ind|Gender=Neut|Number=Sing": {
- "eget": {LEMMA: "egen", "Gender": "Neut", "Number": "Sing"}
- },
- # same wordform and pos (verb), have to specify the exact features in order to not mix them up
- "VERB__Mood=Ind|Tense=Pres|VerbForm=Fin": {
- "så": {LEMMA: "så", "VerbForm": "Fin", "Tense": "Pres", "Mood": "Ind"}
- },
- "VERB__Mood=Ind|Tense=Past|VerbForm=Fin": {
- "så": {LEMMA: "se", "VerbForm": "Fin", "Tense": "Past", "Mood": "Ind"}
- },
-}
-
-# copied from the English morph_rules.py
-for tag, rules in MORPH_RULES.items():
- for key, attrs in dict(rules).items():
- rules[key.title()] = attrs
diff --git a/spacy/lang/nb/punctuation.py b/spacy/lang/nb/punctuation.py
index 4c10b5a68..9b800029c 100644
--- a/spacy/lang/nb/punctuation.py
+++ b/spacy/lang/nb/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..char_classes import LIST_ELLIPSES, LIST_ICONS, LIST_PUNCT, LIST_QUOTES
from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
from ..char_classes import CURRENCY, PUNCT, UNITS, LIST_CURRENCY
diff --git a/spacy/lang/nb/stop_words.py b/spacy/lang/nb/stop_words.py
index caa2012e7..fd65dd788 100644
--- a/spacy/lang/nb/stop_words.py
+++ b/spacy/lang/nb/stop_words.py
@@ -1,7 +1,3 @@
-# encoding: utf8
-from __future__ import unicode_literals
-
-
STOP_WORDS = set(
"""
alle allerede alt and andre annen annet at av
diff --git a/spacy/lang/nb/syntax_iterators.py b/spacy/lang/nb/syntax_iterators.py
index d6c12e69f..d297203e3 100644
--- a/spacy/lang/nb/syntax_iterators.py
+++ b/spacy/lang/nb/syntax_iterators.py
@@ -1,29 +1,18 @@
-# coding: utf8
-from __future__ import unicode_literals
+from typing import Union, Iterator
from ...symbols import NOUN, PROPN, PRON
from ...errors import Errors
+from ...tokens import Doc, Span
-def noun_chunks(doclike):
- """
- Detect base noun phrases from a dependency parse. Works on both Doc and Span.
- """
- labels = [
- "nsubj",
- "nsubj:pass",
- "obj",
- "iobj",
- "ROOT",
- "appos",
- "nmod",
- "nmod:poss",
- ]
+def noun_chunks(doclike: Union[Doc, Span]) -> Iterator[Span]:
+ """Detect base noun phrases from a dependency parse. Works on Doc and Span."""
+ # fmt: off
+ labels = ["nsubj", "nsubj:pass", "obj", "iobj", "ROOT", "appos", "nmod", "nmod:poss"]
+ # fmt: on
doc = doclike.doc # Ensure works on both Doc and Span.
-
if not doc.is_parsed:
raise ValueError(Errors.E029)
-
np_deps = [doc.vocab.strings[label] for label in labels]
conj = doc.vocab.strings.add("conj")
np_label = doc.vocab.strings.add("NP")
diff --git a/spacy/lang/nb/tag_map.py b/spacy/lang/nb/tag_map.py
deleted file mode 100644
index ca0ece265..000000000
--- a/spacy/lang/nb/tag_map.py
+++ /dev/null
@@ -1,761 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, PUNCT, ADJ, CONJ, CCONJ, SCONJ, SYM, NUM, DET, ADV, ADP, X
-from ...symbols import VERB, NOUN, PROPN, PART, INTJ, PRON, AUX
-
-
-# Tags are a combination of POS and morphological features from a
-# https://github.com/ltgoslo/norne developed by Schibsted, Nasjonalbiblioteket and LTG. The
-# data format is .conllu and follows the Universal Dependencies annotation.
-# (There are some annotation differences compared to this dataset:
-# https://github.com/UniversalDependencies/UD_Norwegian-Bokmaal
-# mainly in the way determiners and pronouns are tagged).
-TAG_MAP = {
- "ADJ__Case=Gen|Definite=Def|Degree=Pos|Number=Sing": {
- "morph": "Case=Gen|Definite=Def|Degree=Pos|Number=Sing",
- POS: ADJ,
- },
- "ADJ__Case=Gen|Definite=Def|Number=Sing": {
- "morph": "Case=Gen|Definite=Def|Number=Sing",
- POS: ADJ,
- },
- "ADJ__Case=Gen|Definite=Ind|Degree=Pos|Gender=Neut|Number=Sing": {
- "morph": "Case=Gen|Definite=Ind|Degree=Pos|Gender=Neut|Number=Sing",
- POS: ADJ,
- },
- "ADJ__Case=Gen|Definite=Ind|Degree=Pos|Number=Sing": {
- "morph": "Case=Gen|Definite=Ind|Degree=Pos|Number=Sing",
- POS: ADJ,
- },
- "ADJ__Case=Gen|Degree=Cmp": {"morph": "Case=Gen|Degree=Cmp", POS: ADJ},
- "ADJ__Case=Gen|Degree=Pos|Number=Plur": {
- "morph": "Case=Gen|Degree=Pos|Number=Plur",
- POS: ADJ,
- },
- "ADJ__Definite=Def|Degree=Pos|Gender=Masc|Number=Sing": {
- "morph": "Definite=Def|Degree=Pos|Gender=Masc|Number=Sing",
- POS: ADJ,
- },
- "ADJ__Definite=Def|Degree=Pos|Number=Sing": {
- "morph": "Definite=Def|Degree=Pos|Number=Sing",
- POS: ADJ,
- },
- "ADJ__Definite=Def|Degree=Sup": {"morph": "Definite=Def|Degree=Sup", POS: ADJ},
- "ADJ__Definite=Def|Number=Sing": {"morph": "Definite=Def|Number=Sing", POS: ADJ},
- "ADJ__Definite=Ind|Degree=Pos": {"morph": "Definite=Ind|Degree=Pos", POS: ADJ},
- "ADJ__Definite=Ind|Degree=Pos|Gender=Masc|Number=Sing": {
- "morph": "Definite=Ind|Degree=Pos|Gender=Masc|Number=Sing",
- POS: ADJ,
- },
- "ADJ__Definite=Ind|Degree=Pos|Gender=Neut|Number=Sing": {
- "morph": "Definite=Ind|Degree=Pos|Gender=Neut|Number=Sing",
- POS: ADJ,
- },
- "ADJ__Definite=Ind|Degree=Pos|Number=Sing": {
- "morph": "Definite=Ind|Degree=Pos|Number=Sing",
- POS: ADJ,
- },
- "ADJ__Definite=Ind|Degree=Sup": {"morph": "Definite=Ind|Degree=Sup", POS: ADJ},
- "ADJ__Definite=Ind|Gender=Masc|Number=Sing": {
- "morph": "Definite=Ind|Gender=Masc|Number=Sing",
- POS: ADJ,
- },
- "ADJ__Definite=Ind|Gender=Neut|Number=Sing": {
- "morph": "Definite=Ind|Gender=Neut|Number=Sing",
- POS: ADJ,
- },
- "ADJ__Definite=Ind|Number=Sing": {"morph": "Definite=Ind|Number=Sing", POS: ADJ},
- "ADJ__Degree=Cmp": {"morph": "Degree=Cmp", POS: ADJ},
- "ADJ__Degree=Pos": {"morph": "Degree=Pos", POS: ADJ},
- "ADJ__Degree=Pos|Number=Plur": {"morph": "Degree=Pos|Number=Plur", POS: ADJ},
- "ADJ__Degree=Sup": {"morph": "Degree=Sup", POS: ADJ},
- "ADJ__Number=Plur": {"morph": "Number=Plur", POS: ADJ},
- "ADJ__Number=Plur|VerbForm=Part": {"morph": "Number=Plur|VerbForm=Part", POS: ADJ},
- "ADJ__Number=Sing": {"morph": "Number=Sing", POS: ADJ},
- "ADJ___": {"morph": "_", POS: ADJ},
- "ADP___": {"morph": "_", POS: ADP},
- "ADV___": {"morph": "_", POS: ADV},
- "ADV__Gender=Masc": {"morph": "Gender=Masc", POS: ADV},
- "AUX__Mood=Imp|VerbForm=Fin": {"morph": "Mood=Imp|VerbForm=Fin", POS: AUX},
- "AUX__Mood=Ind|Tense=Past|VerbForm=Fin": {
- "morph": "Mood=Ind|Tense=Past|VerbForm=Fin",
- POS: AUX,
- },
- "AUX__Mood=Ind|Tense=Pres|VerbForm=Fin": {
- "morph": "Mood=Ind|Tense=Pres|VerbForm=Fin",
- POS: AUX,
- },
- "AUX__Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Pass": {
- "morph": "Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Pass",
- POS: AUX,
- },
- "AUX__VerbForm=Inf": {"morph": "VerbForm=Inf", POS: AUX},
- "AUX__VerbForm=Inf|Voice=Pass": {"morph": "VerbForm=Inf|Voice=Pass", POS: AUX},
- "AUX__VerbForm=Part": {"morph": "VerbForm=Part", POS: AUX},
- "CONJ___": {"morph": "_", POS: CONJ},
- "DET__Case=Gen|Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem": {
- "morph": "Case=Gen|Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem",
- POS: DET,
- },
- "DET__Case=Gen|Gender=Fem|Number=Sing|PronType=Dem": {
- "morph": "Case=Gen|Gender=Fem|Number=Sing|PronType=Dem",
- POS: DET,
- },
- "DET__Case=Gen|Gender=Masc|Number=Sing": {
- "morph": "Case=Gen|Gender=Masc|Number=Sing",
- POS: DET,
- },
- "DET__Case=Gen|Gender=Masc|Number=Sing|PronType=Dem": {
- "morph": "Case=Gen|Gender=Masc|Number=Sing|PronType=Dem",
- POS: DET,
- },
- "DET__Case=Gen|Gender=Neut|Number=Sing|PronType=Dem": {
- "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Dem",
- POS: DET,
- },
- "DET__Case=Gen|Number=Plur": {"morph": "Case=Gen|Number=Plur", POS: DET},
- "DET__Case=Gen|Number=Plur|PronType=Dem": {
- "morph": "Case=Gen|Number=Plur|PronType=Dem",
- POS: DET,
- },
- "DET__Definite=Def": {"morph": "Definite=Def", POS: DET},
- "DET__Definite=Def|Number=Sing|PronType=Dem": {
- "morph": "Definite=Def|Number=Sing|PronType=Dem",
- POS: DET,
- },
- "DET__Definite=Def|PronType=Dem": {"morph": "Definite=Def|PronType=Dem", POS: DET},
- "DET__Definite=Ind|Gender=Fem|Number=Sing": {
- "morph": "Definite=Ind|Gender=Fem|Number=Sing",
- POS: DET,
- },
- "DET__Definite=Ind|Gender=Fem|Number=Sing|PronType=Dem": {
- "morph": "Definite=Ind|Gender=Fem|Number=Sing|PronType=Dem",
- POS: DET,
- },
- "DET__Definite=Ind|Gender=Masc|Number=Sing": {
- "morph": "Definite=Ind|Gender=Masc|Number=Sing",
- POS: DET,
- },
- "DET__Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem": {
- "morph": "Definite=Ind|Gender=Masc|Number=Sing|PronType=Dem",
- POS: DET,
- },
- "DET__Definite=Ind|Gender=Neut|Number=Sing": {
- "morph": "Definite=Ind|Gender=Neut|Number=Sing",
- POS: DET,
- },
- "DET__Definite=Ind|Gender=Neut|Number=Sing|PronType=Dem": {
- "morph": "Definite=Ind|Gender=Neut|Number=Sing|PronType=Dem",
- POS: DET,
- },
- "DET__Degree=Pos|Number=Plur": {"morph": "Degree=Pos|Number=Plur", POS: DET},
- "DET__Gender=Fem|Number=Sing": {"morph": "Gender=Fem|Number=Sing", POS: DET},
- "DET__Gender=Fem|Number=Sing|Poss=Yes": {
- "morph": "Gender=Fem|Number=Sing|Poss=Yes",
- POS: DET,
- },
- "DET__Gender=Fem|Number=Sing|PronType=Dem": {
- "morph": "Gender=Fem|Number=Sing|PronType=Dem",
- POS: DET,
- },
- "DET__Gender=Fem|Number=Sing|PronType=Int": {
- "morph": "Gender=Fem|Number=Sing|PronType=Int",
- POS: DET,
- },
- "DET__Gender=Masc|Number=Sing": {"morph": "Gender=Masc|Number=Sing", POS: DET},
- "DET__Gender=Masc|Number=Sing|Poss=Yes": {
- "morph": "Gender=Masc|Number=Sing|Poss=Yes",
- POS: DET,
- },
- "DET__Gender=Masc|Number=Sing|PronType=Dem": {
- "morph": "Gender=Masc|Number=Sing|PronType=Dem",
- POS: DET,
- },
- "DET__Gender=Masc|Number=Sing|PronType=Int": {
- "morph": "Gender=Masc|Number=Sing|PronType=Int",
- POS: DET,
- },
- "DET__Gender=Neut|Number=Sing": {"morph": "Gender=Neut|Number=Sing", POS: DET},
- "DET__Gender=Neut|Number=Sing|Poss=Yes": {
- "morph": "Gender=Neut|Number=Sing|Poss=Yes",
- POS: DET,
- },
- "DET__Gender=Neut|Number=Sing|PronType=Dem": {
- "morph": "Gender=Neut|Number=Sing|PronType=Dem",
- POS: DET,
- },
- "DET__Gender=Neut|Number=Sing|PronType=Int": {
- "morph": "Gender=Neut|Number=Sing|PronType=Int",
- POS: DET,
- },
- "DET__Number=Plur": {"morph": "Number=Plur", POS: DET},
- "DET__Number=Plur|Poss=Yes": {"morph": "Number=Plur|Poss=Yes", POS: DET},
- "DET__Number=Plur|Poss=Yes|PronType=Rcp": {
- "morph": "Number=Plur|Poss=Yes|PronType=Rcp",
- POS: DET,
- },
- "DET__Number=Plur|PronType=Dem": {"morph": "Number=Plur|PronType=Dem", POS: DET},
- "DET__Number=Plur|PronType=Int": {"morph": "Number=Plur|PronType=Int", POS: DET},
- "DET___": {"morph": "_", POS: DET},
- "INTJ___": {"morph": "_", POS: INTJ},
- "NOUN__Case=Gen": {"morph": "Case=Gen", POS: NOUN},
- "NOUN__Case=Gen|Definite=Def|Gender=Fem|Number=Plur": {
- "morph": "Case=Gen|Definite=Def|Gender=Fem|Number=Plur",
- POS: NOUN,
- },
- "NOUN__Case=Gen|Definite=Def|Gender=Fem|Number=Sing": {
- "morph": "Case=Gen|Definite=Def|Gender=Fem|Number=Sing",
- POS: NOUN,
- },
- "NOUN__Case=Gen|Definite=Def|Gender=Masc|Number=Plur": {
- "morph": "Case=Gen|Definite=Def|Gender=Masc|Number=Plur",
- POS: NOUN,
- },
- "NOUN__Case=Gen|Definite=Def|Gender=Masc|Number=Sing": {
- "morph": "Case=Gen|Definite=Def|Gender=Masc|Number=Sing",
- POS: NOUN,
- },
- "NOUN__Case=Gen|Definite=Def|Gender=Neut|Number=Plur": {
- "morph": "Case=Gen|Definite=Def|Gender=Neut|Number=Plur",
- POS: NOUN,
- },
- "NOUN__Case=Gen|Definite=Def|Gender=Neut|Number=Sing": {
- "morph": "Case=Gen|Definite=Def|Gender=Neut|Number=Sing",
- POS: NOUN,
- },
- "NOUN__Case=Gen|Definite=Ind|Gender=Fem|Number=Plur": {
- "morph": "Case=Gen|Definite=Ind|Gender=Fem|Number=Plur",
- POS: NOUN,
- },
- "NOUN__Case=Gen|Definite=Ind|Gender=Fem|Number=Sing": {
- "morph": "Case=Gen|Definite=Ind|Gender=Fem|Number=Sing",
- POS: NOUN,
- },
- "NOUN__Case=Gen|Definite=Ind|Gender=Masc|Number=Plur": {
- "morph": "Case=Gen|Definite=Ind|Gender=Masc|Number=Plur",
- POS: NOUN,
- },
- "NOUN__Case=Gen|Definite=Ind|Gender=Masc|Number=Sing": {
- "morph": "Case=Gen|Definite=Ind|Gender=Masc|Number=Sing",
- POS: NOUN,
- },
- "NOUN__Case=Gen|Definite=Ind|Gender=Neut|Number=Plur": {
- "morph": "Case=Gen|Definite=Ind|Gender=Neut|Number=Plur",
- POS: NOUN,
- },
- "NOUN__Case=Gen|Definite=Ind|Gender=Neut|Number=Sing": {
- "morph": "Case=Gen|Definite=Ind|Gender=Neut|Number=Sing",
- POS: NOUN,
- },
- "NOUN__Case=Gen|Gender=Fem": {"morph": "Case=Gen|Gender=Fem", POS: NOUN},
- "NOUN__Definite=Def,Ind|Gender=Masc|Number=Plur,Sing": {
- "morph": "Definite=Def",
- POS: NOUN,
- },
- "NOUN__Definite=Def,Ind|Gender=Masc|Number=Sing": {
- "morph": "Definite=Def",
- POS: NOUN,
- },
- "NOUN__Definite=Def,Ind|Gender=Neut|Number=Plur,Sing": {
- "morph": "Definite=Def",
- POS: NOUN,
- },
- "NOUN__Definite=Def|Gender=Fem|Number=Plur": {
- "morph": "Definite=Def|Gender=Fem|Number=Plur",
- POS: NOUN,
- },
- "NOUN__Definite=Def|Gender=Fem|Number=Sing": {
- "morph": "Definite=Def|Gender=Fem|Number=Sing",
- POS: NOUN,
- },
- "NOUN__Definite=Def|Gender=Masc|Number=Plur": {
- "morph": "Definite=Def|Gender=Masc|Number=Plur",
- POS: NOUN,
- },
- "NOUN__Definite=Def|Gender=Masc|Number=Sing": {
- "morph": "Definite=Def|Gender=Masc|Number=Sing",
- POS: NOUN,
- },
- "NOUN__Definite=Def|Gender=Neut|Number=Plur": {
- "morph": "Definite=Def|Gender=Neut|Number=Plur",
- POS: NOUN,
- },
- "NOUN__Definite=Def|Gender=Neut|Number=Sing": {
- "morph": "Definite=Def|Gender=Neut|Number=Sing",
- POS: NOUN,
- },
- "NOUN__Definite=Def|Number=Plur": {"morph": "Definite=Def|Number=Plur", POS: NOUN},
- "NOUN__Definite=Ind|Gender=Fem|Number=Plur": {
- "morph": "Definite=Ind|Gender=Fem|Number=Plur",
- POS: NOUN,
- },
- "NOUN__Definite=Ind|Gender=Fem|Number=Sing": {
- "morph": "Definite=Ind|Gender=Fem|Number=Sing",
- POS: NOUN,
- },
- "NOUN__Definite=Ind|Gender=Masc": {"morph": "Definite=Ind|Gender=Masc", POS: NOUN},
- "NOUN__Definite=Ind|Gender=Masc|Number=Plur": {
- "morph": "Definite=Ind|Gender=Masc|Number=Plur",
- POS: NOUN,
- },
- "NOUN__Definite=Ind|Gender=Masc|Number=Sing": {
- "morph": "Definite=Ind|Gender=Masc|Number=Sing",
- POS: NOUN,
- },
- "NOUN__Definite=Ind|Gender=Neut|Number=Plur": {
- "morph": "Definite=Ind|Gender=Neut|Number=Plur",
- POS: NOUN,
- },
- "NOUN__Definite=Ind|Gender=Neut|Number=Sing": {
- "morph": "Definite=Ind|Gender=Neut|Number=Sing",
- POS: NOUN,
- },
- "NOUN__Definite=Ind|Number=Plur": {"morph": "Definite=Ind|Number=Plur", POS: NOUN},
- "NOUN__Definite=Ind|Number=Sing": {"morph": "Definite=Ind|Number=Sing", POS: NOUN},
- "NOUN__Gender=Fem": {"morph": "Gender=Fem", POS: NOUN},
- "NOUN__Gender=Masc": {"morph": "Gender=Masc", POS: NOUN},
- "NOUN__Gender=Masc|Number=Sing": {"morph": "Gender=Masc|Number=Sing", POS: NOUN},
- "NOUN__Gender=Neut": {"morph": "Gender=Neut", POS: NOUN},
- "NOUN__Number=Plur": {"morph": "Number=Plur", POS: NOUN},
- "NOUN___": {"morph": "_", POS: NOUN},
- "NUM__Case=Gen|Number=Plur": {"morph": "Case=Gen|Number=Plur", POS: NUM},
- "NUM__Definite=Def": {"morph": "Definite=Def", POS: NUM},
- "NUM__Definite=Def|Number=Sing": {"morph": "Definite=Def|Number=Sing", POS: NUM},
- "NUM__Gender=Fem|Number=Sing": {"morph": "Gender=Fem|Number=Sing", POS: NUM},
- "NUM__Gender=Masc|Number=Sing": {"morph": "Gender=Masc|Number=Sing", POS: NUM},
- "NUM__Gender=Neut|Number=Sing": {"morph": "Gender=Neut|Number=Sing", POS: NUM},
- "NUM__Number=Plur": {"morph": "Number=Plur", POS: NUM},
- "NUM__Number=Sing": {"morph": "Number=Sing", POS: NUM},
- "NUM___": {"morph": "_", POS: NUM},
- "PART___": {"morph": "_", POS: PART},
- "PRON__Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs": {
- "morph": "Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing|Person=",
- POS: PRON,
- },
- "PRON__Animacy=Anim|Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs": {
- "morph": "Animacy=Anim|Case=Acc|Gender=Masc|Number=Sing|Person=",
- POS: PRON,
- },
- "PRON__Animacy=Anim|Case=Acc|Number=Plur|Person=1|PronType=Prs": {
- "morph": "Animacy=Anim|Case=Acc|Number=Plur|Person=",
- POS: PRON,
- },
- "PRON__Animacy=Anim|Case=Acc|Number=Plur|Person=2|PronType=Prs": {
- "morph": "Animacy=Anim|Case=Acc|Number=Plur|Person=",
- POS: PRON,
- },
- "PRON__Animacy=Anim|Case=Acc|Number=Sing|Person=1|PronType=Prs": {
- "morph": "Animacy=Anim|Case=Acc|Number=Sing|Person=",
- POS: PRON,
- },
- "PRON__Animacy=Anim|Case=Acc|Number=Sing|Person=2|PronType=Prs": {
- "morph": "Animacy=Anim|Case=Acc|Number=Sing|Person=",
- POS: PRON,
- },
- "PRON__Animacy=Anim|Case=Gen,Nom|Number=Sing|PronType=Prs": {
- "morph": "Animacy=Anim|Case=Gen",
- POS: PRON,
- },
- "PRON__Animacy=Anim|Case=Gen|Number=Sing|PronType=Prs": {
- "morph": "Animacy=Anim|Case=Gen|Number=Sing|PronType=Prs",
- POS: PRON,
- },
- "PRON__Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs": {
- "morph": "Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing|Person=",
- POS: PRON,
- },
- "PRON__Animacy=Anim|Case=Nom|Gender=Masc|Number=Sing|Person=3|PronType=Prs": {
- "morph": "Animacy=Anim|Case=Nom|Gender=Masc|Number=Sing|Person=",
- POS: PRON,
- },
- "PRON__Animacy=Anim|Case=Nom|Number=Plur|Person=1|PronType=Prs": {
- "morph": "Animacy=Anim|Case=Nom|Number=Plur|Person=",
- POS: PRON,
- },
- "PRON__Animacy=Anim|Case=Nom|Number=Plur|Person=2|PronType=Prs": {
- "morph": "Animacy=Anim|Case=Nom|Number=Plur|Person=",
- POS: PRON,
- },
- "PRON__Animacy=Anim|Case=Nom|Number=Sing|Person=1|PronType=Prs": {
- "morph": "Animacy=Anim|Case=Nom|Number=Sing|Person=",
- POS: PRON,
- },
- "PRON__Animacy=Anim|Case=Nom|Number=Sing|Person=2|PronType=Prs": {
- "morph": "Animacy=Anim|Case=Nom|Number=Sing|Person=",
- POS: PRON,
- },
- "PRON__Animacy=Anim|Case=Nom|Number=Sing|PronType=Prs": {
- "morph": "Animacy=Anim|Case=Nom|Number=Sing|PronType=Prs",
- POS: PRON,
- },
- "PRON__Animacy=Anim|Number=Plur|PronType=Rcp": {
- "morph": "Animacy=Anim|Number=Plur|PronType=Rcp",
- POS: PRON,
- },
- "PRON__Animacy=Anim|Number=Sing|PronType=Prs": {
- "morph": "Animacy=Anim|Number=Sing|PronType=Prs",
- POS: PRON,
- },
- "PRON__Animacy=Anim|Poss=Yes|PronType=Int": {
- "morph": "Animacy=Anim|Poss=Yes|PronType=Int",
- POS: PRON,
- },
- "PRON__Animacy=Anim|PronType=Int": {
- "morph": "Animacy=Anim|PronType=Int",
- POS: PRON,
- },
- "PRON__Case=Acc|Number=Plur|Person=3|PronType=Prs": {
- "morph": "Case=Acc|Number=Plur|Person=",
- POS: PRON,
- },
- "PRON__Case=Acc|Reflex=Yes": {"morph": "Case=Acc|Reflex=Yes", POS: PRON},
- "PRON__Case=Nom|Number=Plur|Person=3|PronType=Prs": {
- "morph": "Case=Nom|Number=Plur|Person=",
- POS: PRON,
- },
- "PRON__Case=Gen|Number=Plur|Person=3|PronType=Prs": {
- "morph": "Case=Gen|Number=Plur|Person=3|PronType=Prs",
- POS: PRON,
- },
- "PRON__Gender=Fem,Masc|Number=Sing|Person=3|PronType=Prs": {
- "morph": "Gender=Fem",
- POS: PRON,
- },
- "PRON__Gender=Neut|Number=Sing|Person=3|PronType=Prs": {
- "morph": "Gender=Neut|Number=Sing|Person=",
- POS: PRON,
- },
- "PRON__Number=Plur|Person=3|PronType=Prs": {
- "morph": "Number=Plur|Person=",
- POS: PRON,
- },
- "PRON__Number=Sing": {"morph": "Number=Sing", POS: PRON},
- "PRON__PronType=Int": {"morph": "PronType=Int", POS: PRON},
- "PRON___": {"morph": "_", POS: PRON},
- "PROPN__Case=Gen": {"morph": "Case=Gen", POS: PROPN},
- "PROPN__Case=Gen|Gender=Fem": {"morph": "Case=Gen|Gender=Fem", POS: PROPN},
- "PROPN__Case=Gen|Gender=Masc": {"morph": "Case=Gen|Gender=Masc", POS: PROPN},
- "PROPN__Case=Gen|Gender=Neut": {"morph": "Case=Gen|Gender=Neut", POS: PROPN},
- "PROPN__Gender=Fem": {"morph": "Gender=Fem", POS: PROPN},
- "PROPN__Gender=Masc": {"morph": "Gender=Masc", POS: PROPN},
- "PROPN__Gender=Neut": {"morph": "Gender=Neut", POS: PROPN},
- "PROPN___": {"morph": "_", POS: PROPN},
- "PUNCT___": {"morph": "_", POS: PUNCT},
- "SCONJ___": {"morph": "_", POS: SCONJ},
- "SYM___": {"morph": "_", POS: SYM},
- "VERB__Definite=Ind|Number=Sing": {"morph": "Definite=Ind|Number=Sing", POS: VERB},
- "VERB__Mood=Imp|VerbForm=Fin": {"morph": "Mood=Imp|VerbForm=Fin", POS: VERB},
- "VERB__Mood=Ind|Tense=Past|VerbForm=Fin": {
- "morph": "Mood=Ind|Tense=Past|VerbForm=Fin",
- POS: VERB,
- },
- "VERB__Mood=Ind|Tense=Past|VerbForm=Fin|Voice=Pass": {
- "morph": "Mood=Ind|Tense=Past|VerbForm=Fin|Voice=Pass",
- POS: VERB,
- },
- "VERB__Mood=Ind|Tense=Pres|VerbForm=Fin": {
- "morph": "Mood=Ind|Tense=Pres|VerbForm=Fin",
- POS: VERB,
- },
- "VERB__Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Pass": {
- "morph": "Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Pass",
- POS: VERB,
- },
- "VERB__VerbForm=Inf": {"morph": "VerbForm=Inf", POS: VERB},
- "VERB__VerbForm=Inf|Voice=Pass": {"morph": "VerbForm=Inf|Voice=Pass", POS: VERB},
- "VERB__VerbForm=Part": {"morph": "VerbForm=Part", POS: VERB},
- "VERB___": {"morph": "_", POS: VERB},
- "X___": {"morph": "_", POS: X},
- "CCONJ___": {"morph": "_", POS: CCONJ},
- "ADJ__Abbr=Yes": {"morph": "Abbr=Yes", POS: ADJ},
- "ADJ__Abbr=Yes|Degree=Pos": {"morph": "Abbr=Yes|Degree=Pos", POS: ADJ},
- "ADJ__Case=Gen|Definite=Def|Number=Sing|VerbForm=Part": {
- "morph": "Case=Gen|Definite=Def|Number=Sing|VerbForm=Part",
- POS: ADJ,
- },
- "ADJ__Definite=Def|Number=Sing|VerbForm=Part": {
- "morph": "Definite=Def|Number=Sing|VerbForm=Part",
- POS: ADJ,
- },
- "ADJ__Definite=Ind|Gender=Masc|Number=Sing|VerbForm=Part": {
- "morph": "Definite=Ind|Gender=Masc|Number=Sing|VerbForm=Part",
- POS: ADJ,
- },
- "ADJ__Definite=Ind|Gender=Neut|Number=Sing|VerbForm=Part": {
- "morph": "Definite=Ind|Gender=Neut|Number=Sing|VerbForm=Part",
- POS: ADJ,
- },
- "ADJ__Definite=Ind|Number=Sing|VerbForm=Part": {
- "morph": "Definite=Ind|Number=Sing|VerbForm=Part",
- POS: ADJ,
- },
- "ADJ__Number=Sing|VerbForm=Part": {"morph": "Number=Sing|VerbForm=Part", POS: ADJ},
- "ADJ__VerbForm=Part": {"morph": "VerbForm=Part", POS: ADJ},
- "ADP__Abbr=Yes": {"morph": "Abbr=Yes", POS: ADP},
- "ADV__Abbr=Yes": {"morph": "Abbr=Yes", POS: ADV},
- "DET__Case=Gen|Gender=Masc|Number=Sing|PronType=Art": {
- "morph": "Case=Gen|Gender=Masc|Number=Sing|PronType=Art",
- POS: DET,
- },
- "DET__Case=Gen|Number=Plur|PronType=Tot": {
- "morph": "Case=Gen|Number=Plur|PronType=Tot",
- POS: DET,
- },
- "DET__Definite=Def|PronType=Prs": {"morph": "Definite=Def|PronType=Prs", POS: DET},
- "DET__Definite=Ind|Gender=Fem|Number=Sing|PronType=Prs": {
- "morph": "Definite=Ind|Gender=Fem|Number=Sing|PronType=Prs",
- POS: DET,
- },
- "DET__Definite=Ind|Gender=Masc|Number=Sing|PronType=Prs": {
- "morph": "Definite=Ind|Gender=Masc|Number=Sing|PronType=Prs",
- POS: DET,
- },
- "DET__Definite=Ind|Gender=Neut|Number=Sing|PronType=Prs": {
- "morph": "Definite=Ind|Gender=Neut|Number=Sing|PronType=Prs",
- POS: DET,
- },
- "DET__Gender=Fem|Number=Sing|PronType=Art": {
- "morph": "Gender=Fem|Number=Sing|PronType=Art",
- POS: DET,
- },
- "DET__Gender=Fem|Number=Sing|PronType=Ind": {
- "morph": "Gender=Fem|Number=Sing|PronType=Ind",
- POS: DET,
- },
- "DET__Gender=Fem|Number=Sing|PronType=Prs": {
- "morph": "Gender=Fem|Number=Sing|PronType=Prs",
- POS: DET,
- },
- "DET__Gender=Fem|Number=Sing|PronType=Tot": {
- "morph": "Gender=Fem|Number=Sing|PronType=Tot",
- POS: DET,
- },
- "DET__Gender=Masc|Number=Sing|Polarity=Neg|PronType=Neg": {
- "morph": "Gender=Masc|Number=Sing|Polarity=Neg|PronType=Neg",
- POS: DET,
- },
- "DET__Gender=Masc|Number=Sing|PronType=Art": {
- "morph": "Gender=Masc|Number=Sing|PronType=Art",
- POS: DET,
- },
- "DET__Gender=Masc|Number=Sing|PronType=Ind": {
- "morph": "Gender=Masc|Number=Sing|PronType=Ind",
- POS: DET,
- },
- "DET__Gender=Masc|Number=Sing|PronType=Tot": {
- "morph": "Gender=Masc|Number=Sing|PronType=Tot",
- POS: DET,
- },
- "DET__Gender=Neut|Number=Sing|Polarity=Neg|PronType=Neg": {
- "morph": "Gender=Neut|Number=Sing|Polarity=Neg|PronType=Neg",
- POS: DET,
- },
- "DET__Gender=Neut|Number=Sing|PronType=Art": {
- "morph": "Gender=Neut|Number=Sing|PronType=Art",
- POS: DET,
- },
- "DET__Gender=Neut|Number=Sing|PronType=Dem,Ind": {
- "morph": "Gender=Neut|Number=Sing|PronType=Dem,Ind",
- POS: DET,
- },
- "DET__Gender=Neut|Number=Sing|PronType=Ind": {
- "morph": "Gender=Neut|Number=Sing|PronType=Ind",
- POS: DET,
- },
- "DET__Gender=Neut|Number=Sing|PronType=Tot": {
- "morph": "Gender=Neut|Number=Sing|PronType=Tot",
- POS: DET,
- },
- "DET__Number=Plur|Polarity=Neg|PronType=Neg": {
- "morph": "Number=Plur|Polarity=Neg|PronType=Neg",
- POS: DET,
- },
- "DET__Number=Plur|PronType=Art": {"morph": "Number=Plur|PronType=Art", POS: DET},
- "DET__Number=Plur|PronType=Ind": {"morph": "Number=Plur|PronType=Ind", POS: DET},
- "DET__Number=Plur|PronType=Prs": {"morph": "Number=Plur|PronType=Prs", POS: DET},
- "DET__Number=Plur|PronType=Tot": {"morph": "Number=Plur|PronType=Tot", POS: DET},
- "DET__PronType=Ind": {"morph": "PronType=Ind", POS: DET},
- "DET__PronType=Prs": {"morph": "PronType=Prs", POS: DET},
- "NOUN__Abbr=Yes": {"morph": "Abbr=Yes", POS: NOUN},
- "NOUN__Abbr=Yes|Case=Gen": {"morph": "Abbr=Yes|Case=Gen", POS: NOUN},
- "NOUN__Abbr=Yes|Definite=Def,Ind|Gender=Masc|Number=Plur,Sing": {
- "morph": "Abbr=Yes|Definite=Def,Ind|Gender=Masc|Number=Plur,Sing",
- POS: NOUN,
- },
- "NOUN__Abbr=Yes|Definite=Def,Ind|Gender=Masc|Number=Sing": {
- "morph": "Abbr=Yes|Definite=Def,Ind|Gender=Masc|Number=Sing",
- POS: NOUN,
- },
- "NOUN__Abbr=Yes|Definite=Def,Ind|Gender=Neut|Number=Plur,Sing": {
- "morph": "Abbr=Yes|Definite=Def,Ind|Gender=Neut|Number=Plur,Sing",
- POS: NOUN,
- },
- "NOUN__Abbr=Yes|Gender=Masc": {"morph": "Abbr=Yes|Gender=Masc", POS: NOUN},
- "NUM__Case=Gen|Number=Plur|NumType=Card": {
- "morph": "Case=Gen|Number=Plur|NumType=Card",
- POS: NUM,
- },
- "NUM__Definite=Def|Number=Sing|NumType=Card": {
- "morph": "Definite=Def|Number=Sing|NumType=Card",
- POS: NUM,
- },
- "NUM__Definite=Def|NumType=Card": {"morph": "Definite=Def|NumType=Card", POS: NUM},
- "NUM__Gender=Fem|Number=Sing|NumType=Card": {
- "morph": "Gender=Fem|Number=Sing|NumType=Card",
- POS: NUM,
- },
- "NUM__Gender=Masc|Number=Sing|NumType=Card": {
- "morph": "Gender=Masc|Number=Sing|NumType=Card",
- POS: NUM,
- },
- "NUM__Gender=Neut|Number=Sing|NumType=Card": {
- "morph": "Gender=Neut|Number=Sing|NumType=Card",
- POS: NUM,
- },
- "NUM__Number=Plur|NumType=Card": {"morph": "Number=Plur|NumType=Card", POS: NUM},
- "NUM__Number=Sing|NumType=Card": {"morph": "Number=Sing|NumType=Card", POS: NUM},
- "NUM__NumType=Card": {"morph": "NumType=Card", POS: NUM},
- "PART__Polarity=Neg": {"morph": "Polarity=Neg", POS: PART},
- "PRON__Animacy=Hum|Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs": {
- "morph": "Animacy=Hum|Case=Acc|Gender=Fem|Number=Sing|Person=3|PronType=Prs",
- POS: PRON,
- },
- "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs": {
- "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PronType=Prs",
- POS: PRON,
- },
- "PRON__Animacy=Hum|Case=Acc|Number=Plur|Person=1|PronType=Prs": {
- "morph": "Animacy=Hum|Case=Acc|Number=Plur|Person=1|PronType=Prs",
- POS: PRON,
- },
- "PRON__Animacy=Hum|Case=Acc|Number=Plur|Person=2|PronType=Prs": {
- "morph": "Animacy=Hum|Case=Acc|Number=Plur|Person=2|PronType=Prs",
- POS: PRON,
- },
- "PRON__Animacy=Hum|Case=Acc|Number=Sing|Person=1|PronType=Prs": {
- "morph": "Animacy=Hum|Case=Acc|Number=Sing|Person=1|PronType=Prs",
- POS: PRON,
- },
- "PRON__Animacy=Hum|Case=Acc|Number=Sing|Person=2|PronType=Prs": {
- "morph": "Animacy=Hum|Case=Acc|Number=Sing|Person=2|PronType=Prs",
- POS: PRON,
- },
- "PRON__Animacy=Hum|Case=Gen,Nom|Number=Sing|PronType=Art,Prs": {
- "morph": "Animacy=Hum|Case=Gen,Nom|Number=Sing|PronType=Art,Prs",
- POS: PRON,
- },
- "PRON__Animacy=Hum|Case=Gen|Number=Sing|PronType=Art,Prs": {
- "morph": "Animacy=Hum|Case=Gen|Number=Sing|PronType=Art,Prs",
- POS: PRON,
- },
- "PRON__Animacy=Hum|Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs": {
- "morph": "Animacy=Hum|Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs",
- POS: PRON,
- },
- "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Person=3|PronType=Prs": {
- "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Person=3|PronType=Prs",
- POS: PRON,
- },
- "PRON__Animacy=Hum|Case=Nom|Number=Plur|Person=1|PronType=Prs": {
- "morph": "Animacy=Hum|Case=Nom|Number=Plur|Person=1|PronType=Prs",
- POS: PRON,
- },
- "PRON__Animacy=Hum|Case=Nom|Number=Plur|Person=2|PronType=Prs": {
- "morph": "Animacy=Hum|Case=Nom|Number=Plur|Person=2|PronType=Prs",
- POS: PRON,
- },
- "PRON__Animacy=Hum|Case=Nom|Number=Sing|Person=1|PronType=Prs": {
- "morph": "Animacy=Hum|Case=Nom|Number=Sing|Person=1|PronType=Prs",
- POS: PRON,
- },
- "PRON__Animacy=Hum|Case=Nom|Number=Sing|Person=2|PronType=Prs": {
- "morph": "Animacy=Hum|Case=Nom|Number=Sing|Person=2|PronType=Prs",
- POS: PRON,
- },
- "PRON__Animacy=Hum|Case=Nom|Number=Sing|PronType=Prs": {
- "morph": "Animacy=Hum|Case=Nom|Number=Sing|PronType=Prs",
- POS: PRON,
- },
- "PRON__Animacy=Hum|Number=Plur|PronType=Rcp": {
- "morph": "Animacy=Hum|Number=Plur|PronType=Rcp",
- POS: PRON,
- },
- "PRON__Animacy=Hum|Number=Sing|PronType=Art,Prs": {
- "morph": "Animacy=Hum|Number=Sing|PronType=Art,Prs",
- POS: PRON,
- },
- "PRON__Animacy=Hum|Poss=Yes|PronType=Int": {
- "morph": "Animacy=Hum|Poss=Yes|PronType=Int",
- POS: PRON,
- },
- "PRON__Animacy=Hum|PronType=Int": {"morph": "Animacy=Hum|PronType=Int", POS: PRON},
- "PRON__Case=Acc|PronType=Prs|Reflex=Yes": {
- "morph": "Case=Acc|PronType=Prs|Reflex=Yes",
- POS: PRON,
- },
- "PRON__Gender=Fem,Masc|Number=Sing|Person=3|Polarity=Neg|PronType=Neg,Prs": {
- "morph": "Gender=Fem,Masc|Number=Sing|Person=3|Polarity=Neg|PronType=Neg,Prs",
- POS: PRON,
- },
- "PRON__Gender=Fem,Masc|Number=Sing|Person=3|PronType=Ind,Prs": {
- "morph": "Gender=Fem,Masc|Number=Sing|Person=3|PronType=Ind,Prs",
- POS: PRON,
- },
- "PRON__Gender=Fem,Masc|Number=Sing|Person=3|PronType=Prs,Tot": {
- "morph": "Gender=Fem,Masc|Number=Sing|Person=3|PronType=Prs,Tot",
- POS: PRON,
- },
- "PRON__Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs": {
- "morph": "Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs",
- POS: PRON,
- },
- "PRON__Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs": {
- "morph": "Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs",
- POS: PRON,
- },
- "PRON__Gender=Neut|Number=Sing|Person=3|PronType=Ind,Prs": {
- "morph": "Gender=Neut|Number=Sing|Person=3|PronType=Ind,Prs",
- POS: PRON,
- },
- "PRON__Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs": {
- "morph": "Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs",
- POS: PRON,
- },
- "PRON__Number=Plur|Person=3|Polarity=Neg|PronType=Neg,Prs": {
- "morph": "Number=Plur|Person=3|Polarity=Neg|PronType=Neg,Prs",
- POS: PRON,
- },
- "PRON__Number=Plur|Person=3|PronType=Ind,Prs": {
- "morph": "Number=Plur|Person=3|PronType=Ind,Prs",
- POS: PRON,
- },
- "PRON__Number=Plur|Person=3|PronType=Prs,Tot": {
- "morph": "Number=Plur|Person=3|PronType=Prs,Tot",
- POS: PRON,
- },
- "PRON__Number=Plur|Poss=Yes|PronType=Prs": {
- "morph": "Number=Plur|Poss=Yes|PronType=Prs",
- POS: PRON,
- },
- "PRON__Number=Plur|Poss=Yes|PronType=Rcp": {
- "morph": "Number=Plur|Poss=Yes|PronType=Rcp",
- POS: PRON,
- },
- "PRON__Number=Sing|Polarity=Neg|PronType=Neg": {
- "morph": "Number=Sing|Polarity=Neg|PronType=Neg",
- POS: PRON,
- },
- "PRON__PronType=Prs": {"morph": "PronType=Prs", POS: PRON},
- "PRON__PronType=Rel": {"morph": "PronType=Rel", POS: PRON},
- "PROPN__Abbr=Yes": {"morph": "Abbr=Yes", POS: PROPN},
- "PROPN__Abbr=Yes|Case=Gen": {"morph": "Abbr=Yes|Case=Gen", POS: PROPN},
- "VERB__Abbr=Yes|Mood=Ind|Tense=Pres|VerbForm=Fin": {
- "morph": "Abbr=Yes|Mood=Ind|Tense=Pres|VerbForm=Fin",
- POS: VERB,
- },
- "VERB__Definite=Ind|Number=Sing|VerbForm=Part": {
- "morph": "Definite=Ind|Number=Sing|VerbForm=Part",
- POS: VERB,
- },
-}
diff --git a/spacy/lang/nb/tokenizer_exceptions.py b/spacy/lang/nb/tokenizer_exceptions.py
index 3f4aa79f6..0be436ae4 100644
--- a/spacy/lang/nb/tokenizer_exceptions.py
+++ b/spacy/lang/nb/tokenizer_exceptions.py
@@ -1,24 +1,23 @@
-# encoding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import ORTH, LEMMA
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import ORTH, NORM
+from ...util import update_exc
_exc = {}
for exc_data in [
- {ORTH: "jan.", LEMMA: "januar"},
- {ORTH: "feb.", LEMMA: "februar"},
- {ORTH: "mar.", LEMMA: "mars"},
- {ORTH: "apr.", LEMMA: "april"},
- {ORTH: "jun.", LEMMA: "juni"},
- {ORTH: "jul.", LEMMA: "juli"},
- {ORTH: "aug.", LEMMA: "august"},
- {ORTH: "sep.", LEMMA: "september"},
- {ORTH: "okt.", LEMMA: "oktober"},
- {ORTH: "nov.", LEMMA: "november"},
- {ORTH: "des.", LEMMA: "desember"},
+ {ORTH: "jan.", NORM: "januar"},
+ {ORTH: "feb.", NORM: "februar"},
+ {ORTH: "mar.", NORM: "mars"},
+ {ORTH: "apr.", NORM: "april"},
+ {ORTH: "jun.", NORM: "juni"},
+ {ORTH: "jul.", NORM: "juli"},
+ {ORTH: "aug.", NORM: "august"},
+ {ORTH: "sep.", NORM: "september"},
+ {ORTH: "okt.", NORM: "oktober"},
+ {ORTH: "nov.", NORM: "november"},
+ {ORTH: "des.", NORM: "desember"},
]:
_exc[exc_data[ORTH]] = [exc_data]
@@ -221,4 +220,4 @@ for orth in [
_exc[orth] = [{ORTH: orth}]
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
diff --git a/spacy/lang/ne/__init__.py b/spacy/lang/ne/__init__.py
index 21556277d..68632e9ad 100644
--- a/spacy/lang/ne/__init__.py
+++ b/spacy/lang/ne/__init__.py
@@ -1,18 +1,11 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
-
from ...language import Language
-from ...attrs import LANG
class NepaliDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters.update(LEX_ATTRS)
- lex_attr_getters[LANG] = lambda text: "ne" # Nepali language ISO code
stop_words = STOP_WORDS
+ lex_attr_getters = LEX_ATTRS
class Nepali(Language):
diff --git a/spacy/lang/ne/examples.py b/spacy/lang/ne/examples.py
index b3c4f9e73..a29b77c2f 100644
--- a/spacy/lang/ne/examples.py
+++ b/spacy/lang/ne/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/ne/lex_attrs.py b/spacy/lang/ne/lex_attrs.py
index 652307577..7cb01c515 100644
--- a/spacy/lang/ne/lex_attrs.py
+++ b/spacy/lang/ne/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..norm_exceptions import BASE_NORMS
from ...attrs import NORM, LIKE_NUM
diff --git a/spacy/lang/nl/__init__.py b/spacy/lang/nl/__init__.py
index 407d23f73..1526e41f5 100644
--- a/spacy/lang/nl/__init__.py
+++ b/spacy/lang/nl/__init__.py
@@ -1,40 +1,24 @@
-# coding: utf8
-from __future__ import unicode_literals
+from typing import Optional
+
+from thinc.api import Model
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
-from .tag_map import TAG_MAP
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
from .punctuation import TOKENIZER_SUFFIXES
from .lemmatizer import DutchLemmatizer
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ..norm_exceptions import BASE_NORMS
-from ...language import Language
from ...lookups import Lookups
-from ...attrs import LANG, NORM
-from ...util import update_exc, add_lookups
+from ...language import Language
class DutchDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters.update(LEX_ATTRS)
- lex_attr_getters[LANG] = lambda text: "nl"
- lex_attr_getters[NORM] = add_lookups(
- Language.Defaults.lex_attr_getters[NORM], BASE_NORMS
- )
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
- stop_words = STOP_WORDS
- tag_map = TAG_MAP
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
prefixes = TOKENIZER_PREFIXES
infixes = TOKENIZER_INFIXES
suffixes = TOKENIZER_SUFFIXES
-
- @classmethod
- def create_lemmatizer(cls, nlp=None, lookups=None):
- if lookups is None:
- lookups = Lookups()
- return DutchLemmatizer(lookups)
+ lex_attr_getters = LEX_ATTRS
+ stop_words = STOP_WORDS
class Dutch(Language):
@@ -42,4 +26,22 @@ class Dutch(Language):
Defaults = DutchDefaults
+@Dutch.factory(
+ "lemmatizer",
+ assigns=["token.lemma"],
+ default_config={"model": None, "mode": "rule", "lookups": None},
+ scores=["lemma_acc"],
+ default_score_weights={"lemma_acc": 1.0},
+)
+def make_lemmatizer(
+ nlp: Language,
+ model: Optional[Model],
+ name: str,
+ mode: str,
+ lookups: Optional[Lookups],
+):
+ lookups = DutchLemmatizer.load_lookups(nlp.lang, mode, lookups)
+ return DutchLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
+
+
__all__ = ["Dutch"]
diff --git a/spacy/lang/nl/examples.py b/spacy/lang/nl/examples.py
index a459760f4..8c8c50c60 100644
--- a/spacy/lang/nl/examples.py
+++ b/spacy/lang/nl/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/nl/lemmatizer.py b/spacy/lang/nl/lemmatizer.py
index 9a92bee44..42b97a862 100644
--- a/spacy/lang/nl/lemmatizer.py
+++ b/spacy/lang/nl/lemmatizer.py
@@ -1,43 +1,34 @@
-# coding: utf8
-from __future__ import unicode_literals
+from typing import List, Dict
-from ...lemmatizer import Lemmatizer
-from ...symbols import NOUN, VERB, ADJ, NUM, DET, PRON, ADP, AUX, ADV
+from ...pipeline import Lemmatizer
+from ...tokens import Token
class DutchLemmatizer(Lemmatizer):
- # Note: CGN does not distinguish AUX verbs, so we treat AUX as VERB.
- univ_pos_name_variants = {
- NOUN: "noun",
- "NOUN": "noun",
- "noun": "noun",
- VERB: "verb",
- "VERB": "verb",
- "verb": "verb",
- AUX: "verb",
- "AUX": "verb",
- "aux": "verb",
- ADJ: "adj",
- "ADJ": "adj",
- "adj": "adj",
- ADV: "adv",
- "ADV": "adv",
- "adv": "adv",
- PRON: "pron",
- "PRON": "pron",
- "pron": "pron",
- DET: "det",
- "DET": "det",
- "det": "det",
- ADP: "adp",
- "ADP": "adp",
- "adp": "adp",
- NUM: "num",
- "NUM": "num",
- "num": "num",
- }
+ @classmethod
+ def get_lookups_config(cls, mode: str) -> Dict:
+ if mode == "rule":
+ return {
+ "required_tables": [
+ "lemma_lookup",
+ "lemma_rules",
+ "lemma_exc",
+ "lemma_index",
+ ],
+ }
+ else:
+ return super().get_lookups_config(mode)
- def __call__(self, string, univ_pos, morphology=None):
+ def lookup_lemmatize(self, token: Token) -> List[str]:
+ """Overrides parent method so that a lowercased version of the string
+ is used to search the lookup table. This is necessary because our
+ lookup table consists entirely of lowercase keys."""
+ lookup_table = self.lookups.get_table("lemma_lookup", {})
+ string = token.text.lower()
+ return [lookup_table.get(string, string)]
+
+ # Note: CGN does not distinguish AUX verbs, so we treat AUX as VERB.
+ def rule_lemmatize(self, token: Token) -> List[str]:
# Difference 1: self.rules is assumed to be non-None, so no
# 'is None' check required.
# String lowercased from the get-go. All lemmatization results in
@@ -45,68 +36,61 @@ class DutchLemmatizer(Lemmatizer):
# any problems, and it keeps the exceptions indexes small. If this
# creates problems for proper nouns, we can introduce a check for
# univ_pos == "PROPN".
- string = string.lower()
- try:
- univ_pos = self.univ_pos_name_variants[univ_pos]
- except KeyError:
- # Because PROPN not in self.univ_pos_name_variants, proper names
- # are not lemmatized. They are lowercased, however.
- return [string]
- # if string in self.lemma_index.get(univ_pos)
+ cache_key = (token.lower, token.pos)
+ if cache_key in self.cache:
+ return self.cache[cache_key]
+ string = token.text
+ univ_pos = token.pos_.lower()
+ if univ_pos in ("", "eol", "space"):
+ forms = [string.lower()]
+ self.cache[cache_key] = forms
+ return forms
+
index_table = self.lookups.get_table("lemma_index", {})
+ exc_table = self.lookups.get_table("lemma_exc", {})
+ rules_table = self.lookups.get_table("lemma_rules", {})
+ index = index_table.get(univ_pos, {})
+ exceptions = exc_table.get(univ_pos, {})
+ rules = rules_table.get(univ_pos, {})
+
+ string = string.lower()
+ if univ_pos not in (
+ "noun",
+ "verb",
+ "aux",
+ "adj",
+ "adv",
+ "pron",
+ "det",
+ "adp",
+ "num",
+ ):
+ forms = [string]
+ self.cache[cache_key] = forms
+ return forms
lemma_index = index_table.get(univ_pos, {})
# string is already lemma
if string in lemma_index:
- return [string]
+ forms = [string]
+ self.cache[cache_key] = forms
+ return forms
exc_table = self.lookups.get_table("lemma_exc", {})
exceptions = exc_table.get(univ_pos, {})
# string is irregular token contained in exceptions index.
try:
- lemma = exceptions[string]
- return [lemma[0]]
+ forms = [exceptions[string][0]]
+ self.cache[cache_key] = forms
+ return forms
except KeyError:
pass
# string corresponds to key in lookup table
lookup_table = self.lookups.get_table("lemma_lookup", {})
looked_up_lemma = lookup_table.get(string)
if looked_up_lemma and looked_up_lemma in lemma_index:
- return [looked_up_lemma]
+ forms = [looked_up_lemma]
+ self.cache[cache_key] = forms
+ return forms
rules_table = self.lookups.get_table("lemma_rules", {})
- forms, is_known = self.lemmatize(
- string, lemma_index, exceptions, rules_table.get(univ_pos, [])
- )
- # Back-off through remaining return value candidates.
- if forms:
- if is_known:
- return forms
- else:
- for form in forms:
- if form in exceptions:
- return [form]
- if looked_up_lemma:
- return [looked_up_lemma]
- else:
- return forms
- elif looked_up_lemma:
- return [looked_up_lemma]
- else:
- return [string]
-
- # Overrides parent method so that a lowercased version of the string is
- # used to search the lookup table. This is necessary because our lookup
- # table consists entirely of lowercase keys.
- def lookup(self, string, orth=None):
- lookup_table = self.lookups.get_table("lemma_lookup", {})
- string = string.lower()
- if orth is not None:
- return lookup_table.get(orth, string)
- else:
- return lookup_table.get(string, string)
-
- # Reimplemented to focus more on application of suffix rules and to return
- # as early as possible.
- def lemmatize(self, string, index, exceptions, rules):
- # returns (forms, is_known: bool)
oov_forms = []
for old, new in rules:
if string.endswith(old):
@@ -114,7 +98,31 @@ class DutchLemmatizer(Lemmatizer):
if not form:
pass
elif form in index:
- return [form], True # True = Is known (is lemma)
+ forms = [form]
+ self.cache[cache_key] = forms
+ return forms
else:
oov_forms.append(form)
- return list(set(oov_forms)), False
+ forms = list(set(oov_forms))
+ # Back-off through remaining return value candidates.
+ if forms:
+ for form in forms:
+ if form in exceptions:
+ forms = [form]
+ self.cache[cache_key] = forms
+ return forms
+ if looked_up_lemma:
+ forms = [looked_up_lemma]
+ self.cache[cache_key] = forms
+ return forms
+ else:
+ self.cache[cache_key] = forms
+ return forms
+ elif looked_up_lemma:
+ forms = [looked_up_lemma]
+ self.cache[cache_key] = forms
+ return forms
+ else:
+ forms = [string]
+ self.cache[cache_key] = forms
+ return forms
diff --git a/spacy/lang/nl/lex_attrs.py b/spacy/lang/nl/lex_attrs.py
index 69343b589..f1acaefeb 100644
--- a/spacy/lang/nl/lex_attrs.py
+++ b/spacy/lang/nl/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ...attrs import LIKE_NUM
diff --git a/spacy/lang/nl/punctuation.py b/spacy/lang/nl/punctuation.py
index e7207038b..d9dd2a6e3 100644
--- a/spacy/lang/nl/punctuation.py
+++ b/spacy/lang/nl/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..char_classes import LIST_ELLIPSES, LIST_ICONS, LIST_UNITS, merge_chars
from ..char_classes import LIST_PUNCT, LIST_QUOTES, CURRENCY, PUNCT
from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
diff --git a/spacy/lang/nl/stop_words.py b/spacy/lang/nl/stop_words.py
index 44551f2d4..a2c6198e7 100644
--- a/spacy/lang/nl/stop_words.py
+++ b/spacy/lang/nl/stop_words.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
# The original stop words list (added in f46ffe3) was taken from
# http://www.damienvanholten.com/downloads/dutch-stop-words.txt
# and consisted of about 100 tokens.
diff --git a/spacy/lang/nl/tag_map.py b/spacy/lang/nl/tag_map.py
deleted file mode 100644
index 4fde5d39f..000000000
--- a/spacy/lang/nl/tag_map.py
+++ /dev/null
@@ -1,1028 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, PUNCT, ADJ, NUM, DET, ADV, ADP, X, VERB
-from ...symbols import NOUN, PROPN, SPACE, PRON, CONJ
-
-
-TAG_MAP = {
- "ADJ__Number=Sing": {POS: ADJ},
- "ADJ___": {POS: ADJ},
- "ADP__AdpType=Prep": {POS: ADP},
- "ADP__AdpType=Preppron|Gender=Fem|Number=Sing": {POS: ADP},
- "ADP__AdpType=Preppron|Gender=Masc|Number=Plur": {POS: ADP},
- "ADP__AdpType=Preppron|Gender=Masc|Number=Sing": {POS: ADP},
- "ADV__Number=Sing": {POS: ADV},
- "ADV__PunctType=Comm": {POS: ADV},
- "ADV___": {POS: ADV},
- "Adj_Adj_N_N__Degree=Pos|Number=Sing": {POS: ADJ},
- "Adj_Adj_N__Degree=Pos|Number=Plur|Variant=Short": {POS: ADJ},
- "Adj_Adj_N__Degree=Pos|Number=Sing": {POS: ADJ},
- "Adj_Adj__Case=Nom|Degree=Pos": {POS: ADJ},
- "Adj_Adj__Degree=Pos": {POS: ADJ},
- "Adj_Adj__Degree=Pos|Variant=Short": {POS: ADJ},
- "Adj_Adv__Degree=Pos|Variant=Short": {POS: ADJ},
- "Adj_Adv|adv|stell|onverv_deelv__Degree=Pos|Variant=Short": {POS: ADJ},
- "Adj_Art__Degree=Pos|Number=Sing": {POS: ADJ},
- "Adj_Art__Degree=Pos|Number=Sing|Variant=Short": {POS: ADJ},
- "Adj_Conj_V__Degree=Pos|Mood=Sub|VerbForm=Fin": {POS: ADJ},
- "Adj_Int|attr|stell|vervneut__Case=Nom|Degree=Pos": {POS: ADJ},
- "Adj_Misc_Misc__Degree=Pos": {POS: ADJ},
- "Adj_N_Conj_N__Degree=Pos|Number=Sing": {POS: ADJ},
- "Adj_N_N_N_N__Degree=Pos|Number=Sing": {POS: ADJ},
- "Adj_N_N_N__Degree=Pos|Number=Sing": {POS: ADJ},
- "Adj_N_N__Degree=Pos|Number=Sing": {POS: ADJ},
- "Adj_N_Num__Definite=Def|Degree=Pos|Number=Sing": {POS: ADJ},
- "Adj_N_Prep_Art_Adj_N__Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ},
- "Adj_N_Prep_N_Conj_N__Degree=Pos|Number=Sing": {POS: ADJ},
- "Adj_N_Prep_N_N__Degree=Pos|Number=Sing": {POS: ADJ},
- "Adj_N_Prep_N__Degree=Pos|Number=Sing": {POS: ADJ},
- "Adj_N_Punc__Degree=Pos|Number=Sing": {POS: ADJ},
- "Adj_N__Degree=Pos|Number=Plur": {POS: ADJ},
- "Adj_N__Degree=Pos|Number=Sing": {POS: ADJ},
- "Adj_N__Degree=Pos|Number=Sing|Variant=Short": {POS: ADJ},
- "Adj_Num__Definite=Def|Degree=Pos": {POS: ADJ},
- "Adj_Num__Definite=Def|Degree=Pos|Variant=Short": {POS: ADJ},
- "Adj_Prep|adv|stell|vervneut_voor__Degree=Pos|Variant=Short": {POS: ADJ},
- "Adj_Prep|adv|vergr|onverv_voor__Degree=Cmp|Variant=Short": {POS: ADJ},
- "Adj_V_Conj_V__Degree=Pos|VerbForm=Inf": {POS: ADJ},
- "Adj_V_N__Degree=Pos|Number=Sing|Tense=Past|VerbForm=Part": {POS: ADJ},
- "Adj_V|adv|stell|onverv_intrans|inf__Degree=Pos|Variant=Short|VerbForm=Inf": {
- POS: ADJ
- },
- "Adj_V|adv|stell|onverv_trans|imp__Degree=Pos|Mood=Imp|Variant=Short|VerbForm=Fin": {
- POS: ADJ
- },
- "Adj|adv|stell|onverv__Degree=Pos|Variant=Short": {POS: ADJ},
- "Adj|adv|stell|vervneut__Case=Nom|Degree=Pos|Variant=Short": {POS: ADJ},
- "Adj|adv|vergr|onverv__Degree=Cmp|Variant=Short": {POS: ADJ},
- "Adj|adv|vergr|vervneut__Case=Nom|Degree=Cmp|Variant=Short": {POS: ADJ},
- "Adj|attr|overtr|onverv__Degree=Sup": {POS: ADJ},
- "Adj|attr|overtr|vervneut__Case=Nom|Degree=Sup": {POS: ADJ},
- "Adj|attr|stell|onverv__Degree=Pos": {POS: ADJ},
- "Adj|attr|stell|vervgen__Case=Gen|Degree=Pos": {POS: ADJ},
- "Adj|attr|stell|vervneut__Case=Nom|Degree=Pos": {POS: ADJ},
- "Adj|attr|vergr|onverv__Degree=Cmp": {POS: ADJ},
- "Adj|attr|vergr|vervgen__Case=Gen|Degree=Cmp": {POS: ADJ},
- "Adj|attr|vergr|vervneut__Case=Nom|Degree=Cmp": {POS: ADJ},
- "Adj|zelfst|overtr|vervneut__Case=Nom|Degree=Sup": {POS: ADJ},
- "Adj|zelfst|stell|onverv__Degree=Pos": {POS: ADJ},
- "Adj|zelfst|stell|vervmv__Degree=Pos|Number=Plur": {POS: ADJ},
- "Adj|zelfst|stell|vervneut__Case=Nom|Degree=Pos": {POS: ADJ},
- "Adj|zelfst|vergr|vervneut__Case=Nom|Degree=Cmp": {POS: ADJ},
- "Adv_Adj_Conj__Degree=Pos": {POS: ADV},
- "Adv_Adj__Degree=Cmp": {POS: ADV},
- "Adv_Adj__Degree=Pos": {POS: ADV},
- "Adv_Adv_Conj_Adv__PronType=Dem": {POS: ADV},
- "Adv_Adv__AdpType=Prep": {POS: ADV},
- "Adv_Adv__Degree=Pos": {POS: ADV},
- "Adv_Adv__Degree=Pos|PronType=Dem": {POS: ADV},
- "Adv_Adv|pron|vrag_deeladv___": {POS: ADV},
- "Adv_Art__Degree=Pos|Number=Sing": {POS: ADV},
- "Adv_Art__Number=Sing": {POS: ADV},
- "Adv_Conj_Adv__AdpType=Preppron|Gender=Masc|Number=Sing": {POS: ADV},
- "Adv_Conj_Adv__Degree=Pos": {POS: ADV},
- "Adv_Conj_Adv|gew|aanw_neven_gew|aanw__PronType=Dem": {POS: ADV},
- "Adv_Conj_Adv|gew|onbep_neven_gew|onbep__PronType=Ind": {POS: ADV},
- "Adv_Conj_N__Degree=Pos|Number=Sing": {POS: ADV},
- "Adv_Conj__Degree=Pos": {POS: ADV},
- "Adv_N__Degree=Pos|Number=Sing": {POS: ADV},
- "Adv_Num__Degree=Cmp|PronType=Ind": {POS: ADV},
- "Adv_N|gew|aanw_soort|ev|neut__Number=Sing": {POS: ADV},
- "Adv_Prep_N__Case=Dat|Degree=Pos|Number=Sing": {POS: ADV},
- "Adv_Prep_Pron__AdpType=Preppron|Gender=Masc|Number=Sing": {POS: ADV},
- "Adv_Prep__Degree=Pos": {POS: ADV},
- "Adv_Prep|gew|aanw_voor__AdpType=Prep": {POS: ADV},
- "Adv_Prep|gew|aanw_voor___": {POS: ADV},
- "Adv_Pron__Degree=Pos": {POS: ADV},
- "Adv|deeladv__PartType=Vbp": {POS: ADV},
- "Adv|deelv__PartType=Vbp": {POS: ADV},
- "Adv|gew|aanw__PronType=Dem": {POS: ADV},
- "Adv|gew|betr__PronType=Rel": {POS: ADV},
- "Adv|gew|er__AdvType=Ex": {POS: ADV},
- "Adv|gew|geenfunc|overtr|onverv__Degree=Sup": {POS: ADV},
- "Adv|gew|geenfunc|stell|onverv__Degree=Pos": {POS: ADV},
- "Adv|gew|geenfunc|vergr|onverv__Degree=Cmp": {POS: ADV},
- "Adv|gew|onbep__PronType=Ind": {POS: ADV},
- "Adv|gew|vrag__PronType=Int": {POS: ADV},
- "Adv|pron|aanw__PronType=Dem": {POS: ADV},
- "Adv|pron|betr__PronType=Rel": {POS: ADV},
- "Adv|pron|er__AdvType=Ex": {POS: ADV},
- "Adv|pron|onbep__PronType=Ind": {POS: ADV},
- "Adv|pron|vrag__PronType=Int": {POS: ADV},
- "Art_Adj_N__AdpType=Prep": {POS: DET},
- "Art_Adj_N__Definite=Def|Degree=Sup|Gender=Neut|Number=Sing": {POS: DET},
- "Art_Adj__Case=Nom|Definite=Def|Degree=Cmp|Gender=Neut": {POS: DET},
- "Art_Adj__Case=Nom|Definite=Def|Degree=Sup|Gender=Neut": {POS: DET},
- "Art_Adj__Definite=Def|Degree=Cmp|Gender=Neut": {POS: DET},
- "Art_Adj__Definite=Def|Degree=Sup|Gender=Neut": {POS: DET},
- "Art_Adv__Definite=Def|Degree=Sup|Gender=Neut": {POS: DET},
- "Art_Conj_Pron__Number=Sing|PronType=Ind": {POS: DET},
- "Art_N_Conj_Art_N__Definite=Def|Gender=Neut|Number=Sing": {POS: DET},
- "Art_N_Conj_Art_V__AdpType=Prep": {POS: DET},
- "Art_N_Conj_Pron_N__Definite=Def|Gender=Neut|Number=Plur|Person=3": {POS: DET},
- "Art_N_Conj__Number=Sing|PronType=Ind": {POS: DET},
- "Art_N_N__AdpType=Prep": {POS: DET},
- "Art_N_Prep_Adj__Degree=Pos|Number=Sing|PronType=Ind": {POS: DET},
- "Art_N_Prep_Art_N__Number=Sing|PronType=Ind": {POS: DET},
- "Art_N_Prep_N__AdpType=Prep": {POS: DET},
- "Art_N_Prep_N__Definite=Def|Gender=Neut|Number=Sing": {POS: DET},
- "Art_N_Prep_N__Number=Sing|PronType=Ind": {POS: DET},
- "Art_N_Prep_Pron_N__AdpType=Prep": {POS: DET},
- "Art_N__AdpType=Prep": {POS: DET},
- "Art_N__Case=Gen|Definite=Def|Number=Sing": {POS: DET},
- "Art_N__Number=Sing|PronType=Ind": {POS: DET},
- "Art_Num_Art_Adj__AdpType=Prep": {POS: DET},
- "Art_Num_N__AdpType=Prep": {POS: DET},
- "Art_Num__Definite=Def|Degree=Sup|Gender=Neut|PronType=Ind": {POS: DET},
- "Art_Num__Definite=Def|Gender=Neut": {POS: DET},
- "Art_Num__Degree=Pos|Number=Sing|PronType=Ind": {POS: DET},
- "Art_N|bep|onzijd|neut_eigen|ev|neut__Definite=Def|Gender=Neut|Number=Sing": {
- POS: DET
- },
- "Art_N|bep|onzijd|neut_soort|ev|neut__Definite=Def|Gender=Neut|Number=Sing": {
- POS: DET
- },
- "Art_Pron_N__Case=Gen|Number=Plur|PronType=Ind": {POS: DET},
- "Art_Pron__Number=Sing|PronType=Ind": {POS: DET},
- "Art_V_N__AdpType=Prep": {POS: DET},
- "Art|bep|onzijd|neut__Definite=Def|Gender=Neut|PronType=Art": {POS: DET},
- "Art|bep|zijdofmv|gen__Case=Gen|Definite=Def|PronType=Art": {POS: DET},
- "Art|bep|zijdofmv|neut__Definite=Def|PronType=Art": {POS: DET},
- "Art|bep|zijdofonzijd|gen__Case=Gen|Definite=Def|Number=Sing|PronType=Art": {
- POS: DET
- },
- "Art|bep|zijd|dat__Case=Dat|Definite=Def|Gender=Com|PronType=Art": {POS: DET},
- "Art|onbep|zijdofonzijd|neut__Definite=Ind|Number=Sing|PronType=Art": {POS: DET},
- "CCONJ___": {POS: CONJ},
- "Conj_Adj|neven_adv|vergr|onverv__Degree=Cmp": {POS: CONJ},
- "Conj_Adj|neven_attr|stell|onverv__Degree=Pos": {POS: CONJ},
- "Conj_Adv_Adv__Degree=Pos": {POS: CONJ},
- "Conj_Adv__AdpType=Prep": {POS: CONJ},
- "Conj_Adv__AdpType=Preppron|Gender=Masc|Number=Plur": {POS: CONJ},
- "Conj_Adv__Degree=Pos": {POS: CONJ},
- "Conj_Adv|neven_gew|aanw__PronType=Dem": {POS: CONJ},
- "Conj_Art_N__AdpType=Preppron|Gender=Masc|Number=Plur": {POS: CONJ},
- "Conj_Art_N__Gender=Neut|Number=Sing": {POS: CONJ},
- "Conj_Conj|neven_onder|metfin___": {POS: CONJ},
- "Conj_Int|neven___": {POS: CONJ},
- "Conj_Int|onder|metfin___": {POS: CONJ},
- "Conj_N_Adv__AdpType=Preppron|Gender=Masc|Number=Plur": {POS: CONJ},
- "Conj_N_Prep__AdpType=Preppron|Gender=Masc|Number=Plur": {POS: CONJ},
- "Conj_N|onder|metfin_soort|ev|neut__AdpType=Preppron|Gender=Masc|Number=Plur": {
- POS: CONJ
- },
- "Conj_Pron_Adv__Degree=Pos|Number=Sing|Person=3": {POS: CONJ},
- "Conj_Pron_V__AdpType=Preppron|Gender=Masc|Number=Plur": {POS: CONJ},
- "Conj_Pron|neven_aanw|neut|zelfst__AdpType=Prep": {POS: CONJ},
- "Conj_Punc_Conj|neven_schuinstreep_neven__AdpType=Prep": {POS: CONJ},
- "Conj_V|onder|metfin_intrans|ott|3|ev__AdpType=Preppron|Gender=Masc|Number=Plur": {
- POS: CONJ
- },
- "Conj|neven___": {POS: CONJ},
- "Conj|onder|metfin___": {POS: CONJ},
- "Conj|onder|metinf___": {POS: CONJ},
- "DET__Degree=Cmp|NumType=Card|PronType=Ind": {POS: DET},
- "DET__Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {
- POS: DET
- },
- "DET__Gender=Fem|Number=Sing|PronType=Art": {POS: DET},
- "DET__Gender=Masc|Number=Plur|PronType=Art": {POS: DET},
- "DET__Gender=Masc|Number=Sing|PronType=Tot": {POS: DET},
- "Int_Adv|gew|aanw___": {POS: X},
- "Int_Int__NumType=Card": {POS: X},
- "Int_Int___": {POS: X},
- "Int_N_N_Misc_N___": {POS: X},
- "Int_N_Punc_Int_N__Number=Sing": {POS: X},
- "Int_Punc_Int|komma__PunctType=Comm": {POS: X},
- "Int___": {POS: X},
- "Misc_Misc_Misc_Misc_Misc_Misc_Misc_Misc_Misc___": {POS: X},
- "Misc_Misc_Misc_Misc_Misc_Misc_Misc___": {POS: X},
- "Misc_Misc_Misc_Misc_Misc_Misc_Punc_Misc_Misc_Misc___": {POS: X},
- "Misc_Misc_Misc_Misc_Misc_Misc___": {POS: X},
- "Misc_Misc_Misc_Misc_Misc_N_Misc_Misc_Misc_Misc_Misc_Misc___": {POS: X},
- "Misc_Misc_Misc_Misc|vreemd_vreemd_vreemd_vreemd__AdpType=Preppron|Gender=Masc|Number=Sing": {
- POS: X
- },
- "Misc_Misc_Misc_Misc|vreemd_vreemd_vreemd_vreemd___": {POS: X},
- "Misc_Misc_Misc_N__Number=Sing": {POS: X},
- "Misc_Misc_Misc|vreemd_vreemd_vreemd___": {POS: X},
- "Misc_Misc_N_N__Number=Sing": {POS: X},
- "Misc_Misc_N|vreemd_vreemd_soort|mv|neut__Number=Plur": {POS: X},
- "Misc_Misc_Punc_N_N__Number=Sing": {POS: X},
- "Misc_Misc|vreemd_vreemd__AdpType=Prep": {POS: X},
- "Misc_Misc|vreemd_vreemd__NumType=Card": {POS: X},
- "Misc_Misc|vreemd_vreemd___": {POS: X},
- "Misc_N_Misc_Misc__Number=Sing": {POS: X},
- "Misc_N_N__Number=Sing": {POS: X},
- "Misc_N|vreemd_eigen|ev|neut__Number=Sing": {POS: X},
- "Misc_N|vreemd_soort|ev|neut__Number=Sing": {POS: X},
- "Misc|vreemd__Foreign=Yes": {POS: X},
- "NUM__Case=Nom|Definite=Def|Degree=Pos|NumType=Card": {POS: NUM},
- "NUM__Definite=Def|Degree=Pos|NumType=Card": {POS: NUM},
- "NUM__Definite=Def|Degree=Pos|Number=Sing|NumType=Card": {POS: NUM},
- "NUM__Definite=Def|NumType=Card": {POS: NUM},
- "NUM__Definite=Def|Number=Plur|NumType=Card": {POS: NUM},
- "NUM__Definite=Def|Number=Sing|NumType=Card": {POS: NUM},
- "NUM__NumForm=Digit|NumType=Card": {POS: NUM},
- "NUM__NumType=Card": {POS: NUM},
- "N_Adj_N_Num__Definite=Def|Degree=Pos|Number=Sing": {POS: NOUN},
- "N_Adj_N__Degree=Pos|Number=Plur": {POS: NOUN},
- "N_Adj_N___": {POS: NOUN},
- "N_Adj__AdpType=Prep": {POS: NOUN},
- "N_Adj__Case=Nom|Degree=Pos|Number=Plur": {POS: NOUN},
- "N_Adj__Case=Nom|Degree=Pos|Number=Sing": {POS: NOUN},
- "N_Adj__Degree=Pos|Number=Plur": {POS: NOUN},
- "N_Adj__Degree=Pos|Number=Sing": {POS: NOUN},
- "N_Adj___": {POS: NOUN},
- "N_Adv_Punc_V_Pron_V__Aspect=Imp|Degree=Pos|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Inf": {
- POS: NOUN
- },
- "N_Adv__Degree=Pos|Number=Sing": {POS: NOUN},
- "N_Adv___": {POS: NOUN},
- "N_Adv|soort|ev|neut_deelv__Number=Sing": {POS: NOUN},
- "N_Art_Adj_Prep_N___": {POS: NOUN},
- "N_Art_N__Case=Gen|Number=Sing": {POS: NOUN},
- "N_Art_N__Number=Plur": {POS: NOUN},
- "N_Art_N__Number=Sing": {POS: NOUN},
- "N_Art_N___": {POS: NOUN},
- "N_Conj_Adv__Degree=Pos|Number=Sing": {POS: NOUN},
- "N_Conj_Art_N___": {POS: NOUN},
- "N_Conj_N_N__Number=Sing": {POS: NOUN},
- "N_Conj_N_N___": {POS: NOUN},
- "N_Conj_N__Number=Plur": {POS: NOUN},
- "N_Conj_N__Number=Sing": {POS: NOUN},
- "N_Conj_N___": {POS: NOUN},
- "N_Conj|soort|ev|neut_neven__Number=Sing": {POS: NOUN},
- "N_Int_N|eigen|ev|neut_eigen|ev|neut___": {POS: NOUN},
- "N_Misc_Misc_Misc_Misc___": {POS: NOUN},
- "N_Misc_Misc_N___": {POS: NOUN},
- "N_Misc_Misc|eigen|ev|neut_vreemd_vreemd___": {POS: NOUN},
- "N_Misc_Misc|soort|mv|neut_vreemd_vreemd__Number=Plur": {POS: NOUN},
- "N_Misc_N_N_N_N___": {POS: NOUN},
- "N_Misc_N_N___": {POS: NOUN},
- "N_Misc_N___": {POS: NOUN},
- "N_Misc_Num___": {POS: NOUN},
- "N_Misc|eigen|ev|neut_vreemd___": {POS: NOUN},
- "N_Misc|soort|ev|neut_vreemd__Number=Sing": {POS: NOUN},
- "N_N_Adj_Art_N_N__Gender=Masc|Number=Plur|PronType=Art": {POS: NOUN},
- "N_N_Adj_N___": {POS: NOUN},
- "N_N_Adj__Degree=Pos|Number=Sing": {POS: NOUN},
- "N_N_Adj___": {POS: NOUN},
- "N_N_Art_Adv___": {POS: NOUN},
- "N_N_Art_N___": {POS: NOUN},
- "N_N_Conj_N_N_N_N_N___": {POS: NOUN},
- "N_N_Conj_N_N___": {POS: NOUN},
- "N_N_Conj_N__Number=Sing": {POS: NOUN},
- "N_N_Conj_N___": {POS: NOUN},
- "N_N_Conj___": {POS: NOUN},
- "N_N_Int_N_N___": {POS: NOUN},
- "N_N_Misc___": {POS: NOUN},
- "N_N_N_Adj_N___": {POS: NOUN},
- "N_N_N_Adv___": {POS: NOUN},
- "N_N_N_Int__AdpType=Prep": {POS: NOUN},
- "N_N_N_Misc___": {POS: NOUN},
- "N_N_N_N_Conj_N___": {POS: NOUN},
- "N_N_N_N_Misc___": {POS: NOUN},
- "N_N_N_N_N_N_Int__AdpType=Prep": {POS: NOUN},
- "N_N_N_N_N_N_N__AdpType=Prep": {POS: NOUN},
- "N_N_N_N_N_N_N__Gender=Fem|Number=Sing|PronType=Art": {POS: NOUN},
- "N_N_N_N_N_N_N___": {POS: NOUN},
- "N_N_N_N_N_N_Prep_N___": {POS: NOUN},
- "N_N_N_N_N_N__AdpType=Prep": {POS: NOUN},
- "N_N_N_N_N_N___": {POS: NOUN},
- "N_N_N_N_N_Prep_N___": {POS: NOUN},
- "N_N_N_N_N__AdpType=Prep": {POS: NOUN},
- "N_N_N_N_N__Number=Sing": {POS: NOUN},
- "N_N_N_N_N___": {POS: NOUN},
- "N_N_N_N_Prep_N___": {POS: NOUN},
- "N_N_N_N_Punc_N_Punc___": {POS: NOUN},
- "N_N_N_N_V___": {POS: NOUN},
- "N_N_N_N__Gender=Fem|Number=Plur|PronType=Art": {POS: NOUN},
- "N_N_N_N__Gender=Fem|Number=Sing|PronType=Art": {POS: NOUN},
- "N_N_N_N__NumType=Card": {POS: NOUN},
- "N_N_N_N__Number=Plur": {POS: NOUN},
- "N_N_N_N__Number=Sing": {POS: NOUN},
- "N_N_N_N___": {POS: NOUN},
- "N_N_N_Prep_Art_Adj_N___": {POS: NOUN},
- "N_N_N_Prep_N_N___": {POS: NOUN},
- "N_N_N_Prep_N___": {POS: NOUN},
- "N_N_N_Punc_N___": {POS: NOUN},
- "N_N_N_Punc___": {POS: NOUN},
- "N_N_N__AdpType=Prep": {POS: NOUN},
- "N_N_N__Gender=Fem|Number=Sing|PronType=Art": {POS: NOUN},
- "N_N_N__Gender=Masc|Number=Plur|PronType=Art": {POS: NOUN},
- "N_N_N__Number=Plur": {POS: NOUN},
- "N_N_N__Number=Sing": {POS: NOUN},
- "N_N_N___": {POS: NOUN},
- "N_N_Num_N___": {POS: NOUN},
- "N_N_Num__Definite=Def|Number=Sing": {POS: NOUN},
- "N_N_Num___": {POS: NOUN},
- "N_N_Prep_Art_Adj_N__Degree=Pos|Gender=Neut|Number=Sing": {POS: NOUN},
- "N_N_Prep_Art_N_Prep_Art_N___": {POS: NOUN},
- "N_N_Prep_Art_N___": {POS: NOUN},
- "N_N_Prep_N_N__AdpType=Prep": {POS: NOUN},
- "N_N_Prep_N_Prep_Adj_N___": {POS: NOUN},
- "N_N_Prep_N__AdpType=Prep": {POS: NOUN},
- "N_N_Prep_N__Number=Sing": {POS: NOUN},
- "N_N_Prep_N___": {POS: NOUN},
- "N_N_Punc_N_Punc___": {POS: NOUN},
- "N_Num_N_N__Definite=Def|Number=Sing": {POS: NOUN},
- "N_Num_N_Num___": {POS: NOUN},
- "N_Num_N___": {POS: NOUN},
- "N_Num_Num__Definite=Def|Number=Sing": {POS: NOUN},
- "N_Num__Definite=Def|Number=Plur": {POS: NOUN},
- "N_Num__Definite=Def|Number=Sing": {POS: NOUN},
- "N_Num___": {POS: NOUN},
- "N_N|eigen|ev|gen_eigen|ev|gen___": {POS: NOUN},
- "N_N|eigen|ev|gen_eigen|ev|neut___": {POS: NOUN},
- "N_N|eigen|ev|gen_soort|ev|neut___": {POS: NOUN},
- "N_N|eigen|ev|gen_soort|mv|neut___": {POS: NOUN},
- "N_N|eigen|ev|neut_eigen|ev|gen___": {POS: NOUN},
- "N_N|eigen|ev|neut_eigen|ev|neut__AdpType=Prep": {POS: NOUN},
- "N_N|eigen|ev|neut_eigen|ev|neut__AdpType=Preppron|Gender=Fem|Number=Plur": {
- POS: NOUN
- },
- "N_N|eigen|ev|neut_eigen|ev|neut__AdpType=Preppron|Gender=Masc|Number=Sing": {
- POS: NOUN
- },
- "N_N|eigen|ev|neut_eigen|ev|neut__Gender=Fem|Number=Plur|PronType=Art": {POS: NOUN},
- "N_N|eigen|ev|neut_eigen|ev|neut__Gender=Fem|Number=Sing|PronType=Art": {POS: NOUN},
- "N_N|eigen|ev|neut_eigen|ev|neut__Gender=Masc|Number=Plur|PronType=Art": {
- POS: NOUN
- },
- "N_N|eigen|ev|neut_eigen|ev|neut__Gender=Masc|Number=Sing|PronType=Art": {
- POS: NOUN
- },
- "N_N|eigen|ev|neut_eigen|ev|neut__NumType=Card": {POS: NOUN},
- "N_N|eigen|ev|neut_eigen|ev|neut__Number=Sing": {POS: NOUN},
- "N_N|eigen|ev|neut_eigen|ev|neut___": {POS: NOUN},
- "N_N|eigen|ev|neut_eigen|mv|neut___": {POS: NOUN},
- "N_N|eigen|ev|neut_soort|ev|neut__AdpType=Prep": {POS: NOUN},
- "N_N|eigen|ev|neut_soort|ev|neut___": {POS: NOUN},
- "N_N|eigen|ev|neut_soort|mv|neut___": {POS: NOUN},
- "N_N|eigen|mv|neut_eigen|mv|neut___": {POS: NOUN},
- "N_N|soort|ev|neut_eigen|ev|neut__Number=Sing": {POS: NOUN},
- "N_N|soort|ev|neut_soort|ev|neut__Gender=Masc|Number=Plur|PronType=Art": {
- POS: NOUN
- },
- "N_N|soort|ev|neut_soort|ev|neut__NumForm=Digit|NumType=Card": {POS: NOUN},
- "N_N|soort|ev|neut_soort|ev|neut__Number=Sing": {POS: NOUN},
- "N_N|soort|ev|neut_soort|mv|neut__Number=Plur": {POS: NOUN},
- "N_N|soort|mv|neut_eigen|ev|neut__Number=Sing": {POS: NOUN},
- "N_N|soort|mv|neut_soort|ev|neut__Number=Sing": {POS: NOUN},
- "N_N|soort|mv|neut_soort|mv|neut__Number=Plur": {POS: NOUN},
- "N_Prep_Adj_Adj_N__Degree=Pos|Number=Plur": {POS: NOUN},
- "N_Prep_Adj_N___": {POS: NOUN},
- "N_Prep_Art_N_Art_N__Number=Plur": {POS: NOUN},
- "N_Prep_Art_N_N__Number=Sing": {POS: NOUN},
- "N_Prep_Art_N_Prep_Art_N__Gender=Neut|Number=Sing": {POS: NOUN},
- "N_Prep_Art_N__Number=Plur": {POS: NOUN},
- "N_Prep_Art_N__Number=Sing": {POS: NOUN},
- "N_Prep_Art_N___": {POS: NOUN},
- "N_Prep_N_Art_Adj___": {POS: NOUN},
- "N_Prep_N_N__Number=Sing": {POS: NOUN},
- "N_Prep_N_N___": {POS: NOUN},
- "N_Prep_N_Prep_Art_N___": {POS: NOUN},
- "N_Prep_N_Prep_N_Conj_N_Prep_Art_N_N__Number=Sing": {POS: NOUN},
- "N_Prep_N_Punc_N_Conj_N__Number=Sing": {POS: NOUN},
- "N_Prep_N__Number=Plur": {POS: NOUN},
- "N_Prep_N__Number=Sing": {POS: NOUN},
- "N_Prep_N___": {POS: NOUN},
- "N_Prep_Num__Definite=Def|Number=Sing": {POS: NOUN},
- "N_Prep_Pron_N___": {POS: NOUN},
- "N_Prep|soort|ev|neut_voor__Number=Sing": {POS: NOUN},
- "N_Pron___": {POS: NOUN},
- "N_Punc_Adj_N___": {POS: NOUN},
- "N_Punc_Adj_Pron_Punc__Degree=Pos|Number=Sing|Person=2": {POS: NOUN},
- "N_Punc_Adv_V_Pron_N__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {
- POS: NOUN
- },
- "N_Punc_Misc_Punc_N___": {POS: NOUN},
- "N_Punc_N_N_N_N__Number=Sing": {POS: NOUN},
- "N_Punc_N_Punc_N__Number=Sing": {POS: NOUN},
- "N_Punc_N_Punc__Number=Sing": {POS: NOUN},
- "N_Punc_N__Number=Sing": {POS: NOUN},
- "N_Punc_Punc_N_N_Punc_Punc_N___": {POS: NOUN},
- "N_V_N_N___": {POS: NOUN},
- "N_V_N___": {POS: NOUN},
- "N_V__Aspect=Imp|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin": {POS: NOUN},
- "N_V__Number=Sing|Tense=Past|VerbForm=Part": {POS: NOUN},
- "N_V___": {POS: NOUN},
- "N_V|eigen|ev|neut_trans|imp___": {POS: NOUN},
- "N_V|soort|ev|neut_hulpofkopp|conj__Mood=Sub|Number=Sing|VerbForm=Fin": {POS: NOUN},
- "N_V|soort|ev|neut_intrans|conj__Mood=Sub|Number=Sing|VerbForm=Fin": {POS: NOUN},
- "Num_Adj_Adj_N___": {POS: NUM},
- "Num_Adj_N___": {POS: NUM},
- "Num_Adj__Definite=Def|Degree=Pos|NumType=Card": {POS: NUM},
- "Num_Adj__NumForm=Digit|NumType=Card": {POS: NUM},
- "Num_Adj___": {POS: NUM},
- "Num_Conj_Adj__Case=Nom|Definite=Def|Degree=Pos|NumType=Card": {POS: NUM},
- "Num_Conj_Art_Adj__Definite=Def|Degree=Pos|Number=Sing|NumType=Card": {POS: NUM},
- "Num_Conj_Num_N__NumForm=Digit|NumType=Card": {POS: NUM},
- "Num_Conj_Num__Degree=Cmp|NumType=Card|PronType=Ind": {POS: NUM},
- "Num_N_N__Definite=Def|Number=Sing|NumType=Card": {POS: NUM},
- "Num_N_Num_Num_N__NumForm=Digit|NumType=Card": {POS: NUM},
- "Num_N_Num__Definite=Def|Number=Sing|NumType=Card": {POS: NUM},
- "Num_N_Num__NumForm=Digit|NumType=Card": {POS: NUM},
- "Num_N__Definite=Def|Number=Plur|NumType=Card": {POS: NUM},
- "Num_N__Definite=Def|Number=Sing|NumType=Card": {POS: NUM},
- "Num_N__NumForm=Digit|NumType=Card": {POS: NUM},
- "Num_N___": {POS: NUM},
- "Num_Num_N__NumForm=Digit|NumType=Card": {POS: NUM},
- "Num_Num__Definite=Def|NumType=Card": {POS: NUM},
- "Num_Num__NumForm=Digit|NumType=Card": {POS: NUM},
- "Num_Prep_Num__Definite=Def|NumType=Card": {POS: NUM},
- "Num_Punc_Num_N_N__NumForm=Digit|NumType=Card": {POS: NUM},
- "Num_Punc_Num__NumForm=Digit|NumType=Card": {POS: NUM},
- "Num_Punc__NumForm=Digit|NumType=Card": {POS: NUM},
- "Num__Case=Nom|Degree=Cmp|NumType=Card|PronType=Ind": {POS: NUM},
- "Num__Case=Nom|Degree=Pos|NumType=Card|PronType=Ind": {POS: NUM},
- "Num__Case=Nom|Degree=Sup|NumType=Card|PronType=Ind": {POS: NUM},
- "Num__Degree=Cmp|NumType=Card|PronType=Ind": {POS: NUM},
- "Num__Degree=Pos|NumType=Card|PronType=Ind": {POS: NUM},
- "Num__Degree=Pos|Number=Plur|NumType=Card|PronType=Ind": {POS: NUM},
- "Num__Degree=Sup|NumType=Card|PronType=Ind": {POS: NUM},
- "Num__Degree=Sup|Number=Plur|NumType=Card|PronType=Ind": {POS: NUM},
- "Num|hoofd|bep|attr|onverv__Definite=Def|NumType=Card": {POS: NUM},
- "Num|hoofd|bep|zelfst|onverv__Definite=Def|NumType=Card": {POS: NUM},
- "Num|hoofd|bep|zelfst|vervmv__Definite=Def|Number=Plur|NumType=Card": {POS: NUM},
- "Num|hoofd|onbep|attr|stell|onverv__Degree=Pos|NumType=Card|PronType=Ind": {
- POS: NUM
- },
- "Num|hoofd|onbep|attr|vergr|onverv__Degree=Cmp|NumType=Card|PronType=Ind": {
- POS: NUM
- },
- "Num|rang|bep|attr|onverv__Definite=Def|NumType=Ord": {POS: NUM},
- "Num|rang|bep|zelfst|onverv__Definite=Def|NumType=Ord": {POS: NUM},
- "N|eigen|ev|gen__Case=Gen|Number=Sing": {POS: NOUN},
- "N|eigen|ev|neut__Number=Sing": {POS: NOUN},
- "N|eigen|mv|neut__Number=Plur": {POS: NOUN},
- "N|soort|ev|dat__Case=Dat|Number=Sing": {POS: NOUN},
- "N|soort|ev|gen__Case=Gen|Number=Sing": {POS: NOUN},
- "N|soort|ev|neut__Number=Sing": {POS: NOUN},
- "N|soort|mv|neut__Number=Plur": {POS: NOUN},
- "PROPN___": {POS: PROPN},
- "PUNCT___": {POS: PUNCT},
- "Prep_Adj_Conj_Prep_N__Degree=Pos|Number=Sing": {POS: ADP},
- "Prep_Adj_N__Degree=Pos|Number=Plur": {POS: ADP},
- "Prep_Adj|voor_adv|vergr|vervneut__Case=Nom|Degree=Cmp": {POS: ADP},
- "Prep_Adj|voor_attr|stell|onverv__Degree=Pos": {POS: ADP},
- "Prep_Adj|voor_attr|stell|vervneut__Case=Nom|Degree=Pos": {POS: ADP},
- "Prep_Adv__AdpType=Prep": {POS: ADP},
- "Prep_Adv__Case=Nom|Degree=Pos": {POS: ADP},
- "Prep_Adv__Case=Nom|Degree=Sup": {POS: ADP},
- "Prep_Adv__Degree=Pos": {POS: ADP},
- "Prep_Adv|voor_gew|aanw__AdpType=Prep": {POS: ADP},
- "Prep_Adv|voor_gew|aanw__Gender=Masc|Number=Sing|PronType=Tot": {POS: ADP},
- "Prep_Adv|voor_gew|aanw__PronType=Dem": {POS: ADP},
- "Prep_Adv|voor_pron|vrag__PronType=Int": {POS: ADP},
- "Prep_Art_Adj_N__Degree=Pos|Number=Sing": {POS: ADP},
- "Prep_Art_Adj__AdpType=Prep": {POS: ADP},
- "Prep_Art_Adj__Case=Nom|Degree=Pos": {POS: ADP},
- "Prep_Art_Adj__Degree=Cmp|Gender=Neut": {POS: ADP},
- "Prep_Art_Misc_Misc___": {POS: ADP},
- "Prep_Art_N_Adv__Number=Sing": {POS: ADP},
- "Prep_Art_N_Adv__Number=Sing|PronType=Int": {POS: ADP},
- "Prep_Art_N_Art_N__AdpType=Prep": {POS: ADP},
- "Prep_Art_N_Prep_Art_N__AdpType=Prep": {POS: ADP},
- "Prep_Art_N_Prep__AdpType=Prep": {POS: ADP},
- "Prep_Art_N_Prep__Gender=Neut|Number=Sing": {POS: ADP},
- "Prep_Art_N_Prep__Number=Sing": {POS: ADP},
- "Prep_Art_N_V__Number=Plur|Tense=Past|VerbForm=Part": {POS: ADP},
- "Prep_Art_N__AdpType=Prep": {POS: ADP},
- "Prep_Art_N__Gender=Com|Number=Sing": {POS: ADP},
- "Prep_Art_N__Gender=Neut|Number=Sing": {POS: ADP},
- "Prep_Art_N__Number=Plur": {POS: ADP},
- "Prep_Art_N__Number=Sing": {POS: ADP},
- "Prep_Art_V__AdpType=Prep": {POS: ADP},
- "Prep_Art_V__Gender=Neut|VerbForm=Inf": {POS: ADP},
- "Prep_Art|voor_bep|onzijd|neut__Gender=Neut": {POS: ADP},
- "Prep_Art|voor_onbep|zijdofonzijd|neut__Number=Sing": {POS: ADP},
- "Prep_Conj_Prep|voor_neven_voor__Gender=Masc|Number=Sing|PronType=Tot": {POS: ADP},
- "Prep_Misc|voor_vreemd___": {POS: ADP},
- "Prep_N_Adv|voor_soort|ev|neut_deeladv__Number=Sing": {POS: ADP},
- "Prep_N_Adv|voor_soort|ev|neut_pron|aanw__AdpType=Prep": {POS: ADP},
- "Prep_N_Adv|voor_soort|ev|neut_pron|aanw__Number=Sing|PronType=Dem": {POS: ADP},
- "Prep_N_Adv|voor_soort|ev|neut_pron|vrag__Number=Sing|PronType=Int": {POS: ADP},
- "Prep_N_Adv|voor_soort|mv|neut_deelv__Gender=Masc|Number=Sing|PronType=Tot": {
- POS: ADP
- },
- "Prep_N_Conj_N__Number=Sing": {POS: ADP},
- "Prep_N_Conj__AdpType=Prep": {POS: ADP},
- "Prep_N_Prep_N__Number=Sing": {POS: ADP},
- "Prep_N_Prep|voor_soort|ev|dat_voor__Number=Sing": {POS: ADP},
- "Prep_N_Prep|voor_soort|ev|neut_voor__AdpType=Prep": {POS: ADP},
- "Prep_N_Prep|voor_soort|ev|neut_voor__Number=Sing": {POS: ADP},
- "Prep_N_Prep|voor_soort|mv|neut_voor__Number=Plur": {POS: ADP},
- "Prep_N_V__Case=Nom|Number=Sing|Tense=Past|VerbForm=Part": {POS: ADP},
- "Prep_Num_N__Definite=Def|Number=Sing": {POS: ADP},
- "Prep_Num__Case=Nom|Degree=Sup|PronType=Ind": {POS: ADP},
- "Prep_Num__Degree=Cmp|PronType=Ind": {POS: ADP},
- "Prep_N|voor_eigen|ev|neut__Number=Sing": {POS: ADP},
- "Prep_N|voor_soort|ev|dat__AdpType=Prep": {POS: ADP},
- "Prep_N|voor_soort|ev|dat__Case=Dat|Number=Sing": {POS: ADP},
- "Prep_N|voor_soort|ev|neut__AdpType=Prep": {POS: ADP},
- "Prep_N|voor_soort|ev|neut__Gender=Masc|Number=Sing|PronType=Tot": {POS: ADP},
- "Prep_N|voor_soort|ev|neut__Number=Sing": {POS: ADP},
- "Prep_N|voor_soort|mv|neut__AdpType=Prep": {POS: ADP},
- "Prep_N|voor_soort|mv|neut__Number=Plur": {POS: ADP},
- "Prep_Prep_Adj|voor_voor_adv|stell|onverv__Gender=Masc|Number=Sing|PronType=Tot": {
- POS: ADP
- },
- "Prep_Prep_Adv__Degree=Pos": {POS: ADP},
- "Prep_Pron_Adj__Degree=Cmp|Number=Sing|Person=3": {POS: ADP},
- "Prep_Pron_N_Adv__Number=Plur": {POS: ADP},
- "Prep_Pron_N__AdpType=Prep": {POS: ADP},
- "Prep_Pron_N__Case=Dat|Number=Sing": {POS: ADP},
- "Prep_Pron|voor_aanw|neut|zelfst___": {POS: ADP},
- "Prep_Pron|voor_onbep|neut|attr___": {POS: ADP},
- "Prep_Pron|voor_onbep|neut|zelfst___": {POS: ADP},
- "Prep_Pron|voor_rec|neut__AdpType=Prep": {POS: ADP},
- "Prep_Pron|voor_rec|neut___": {POS: ADP},
- "Prep_Pron|voor_ref|3|evofmv__Number=Plur,Sing|Person=3": {POS: ADP},
- "Prep_Punc_N_Conj_N__AdpType=Prep": {POS: ADP},
- "Prep_V_N__Number=Sing|Tense=Pres|VerbForm=Part": {POS: ADP},
- "Prep_V_Pron_Pron_Adv__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|PronType=Dem|Tense=Pres|VerbForm=Fin": {
- POS: ADP
- },
- "Prep_V|voor_intrans|inf__VerbForm=Inf": {POS: ADP},
- "Prep_V|voorinf_trans|inf__VerbForm=Inf": {POS: ADP},
- "Prep|achter__AdpType=Post": {POS: ADP},
- "Prep|comb__AdpType=Circ": {POS: ADP},
- "Prep|voor__AdpType=Prep": {POS: ADP},
- "Prep|voorinf__AdpType=Prep|PartType=Inf": {POS: ADP},
- "Pron_Adj_N_Punc_Art_Adj_N_Prep_Art_Adj_N__NumType=Card": {POS: PRON},
- "Pron_Adj__Case=Nom|Degree=Sup|Number=Sing|Person=2|Poss=Yes|PronType=Prs": {
- POS: PRON
- },
- "Pron_Adj__Degree=Cmp|PronType=Ind": {POS: PRON},
- "Pron_Adv|vrag|neut|attr_deelv__PronType=Int": {POS: PRON},
- "Pron_Art_N_N__Number=Plur|PronType=Ind": {POS: PRON},
- "Pron_Art__Number=Sing|PronType=Int": {POS: PRON},
- "Pron_N_Adv__Number=Sing|PronType=Ind": {POS: PRON},
- "Pron_N_V_Adv_Num_Punc__Aspect=Imp|Definite=Def|Mood=Ind|Number=Sing|Person=3|PronType=Ind|Tense=Pres|VerbForm=Fin": {
- POS: PRON
- },
- "Pron_N_V_Conj_N__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|PronType=Ind|Tense=Pres|VerbForm=Fin": {
- POS: PRON
- },
- "Pron_N__Case=Gen|Number=Sing|PronType=Ind": {POS: PRON},
- "Pron_N__Number=Sing|PronType=Ind": {POS: PRON},
- "Pron_N|aanw|gen|attr_soort|mv|neut__Case=Gen|Number=Plur|PronType=Dem": {
- POS: PRON
- },
- "Pron_N|onbep|neut|attr_soort|ev|neut__Number=Sing|PronType=Ind": {POS: PRON},
- "Pron_Prep_Art__Number=Sing|PronType=Int": {POS: PRON},
- "Pron_Prep_Art__Number=Sing|PronType=Rel": {POS: PRON},
- "Pron_Prep_N__Number=Plur|PronType=Int": {POS: PRON},
- "Pron_Prep|betr|neut|zelfst_voor__PronType=Rel": {POS: PRON},
- "Pron_Prep|onbep|neut|zelfst_voor__PronType=Ind": {POS: PRON},
- "Pron_Prep|vrag|neut|attr_voor__PronType=Int": {POS: PRON},
- "Pron_Pron_V__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|PronType=Rel|Tense=Pres|VerbForm=Fin": {
- POS: PRON
- },
- "Pron_Pron__Person=3|PronType=Prs|Reflex=Yes": {POS: PRON},
- "Pron_V_V__Aspect=Imp|Mood=Ind|Person=3|PronType=Dem|Tense=Pres|VerbForm=Inf": {
- POS: PRON
- },
- "Pron_V__Case=Gen|Number=Sing|Person=3|Poss=Yes|PronType=Prs|VerbForm=Inf": {
- POS: PRON
- },
- "Pron_V__Number=Plur|Person=1|Poss=Yes|PronType=Prs|VerbForm=Inf": {POS: PRON},
- "Pron|aanw|dat|attr__Case=Dat|PronType=Dem": {POS: PRON},
- "Pron|aanw|gen|attr__Case=Gen|PronType=Dem": {POS: PRON},
- "Pron|aanw|neut|attr__PronType=Dem": {POS: PRON},
- "Pron|aanw|neut|attr|weigen__PronType=Dem": {POS: PRON},
- "Pron|aanw|neut|attr|wzelf__PronType=Dem": {POS: PRON},
- "Pron|aanw|neut|zelfst__PronType=Dem": {POS: PRON},
- "Pron|betr|gen|zelfst__Case=Gen|PronType=Rel": {POS: PRON},
- "Pron|betr|neut|attr__PronType=Rel": {POS: PRON},
- "Pron|betr|neut|zelfst__PronType=Rel": {POS: PRON},
- "Pron|bez|1|ev|neut|attr__Number=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: PRON},
- "Pron|bez|1|mv|neut|attr__Number=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: PRON},
- "Pron|bez|2|ev|neut|attr__Number=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: PRON},
- "Pron|bez|2|mv|neut|attr__Number=Plur|Person=2|Poss=Yes|PronType=Prs": {POS: PRON},
- "Pron|bez|3|ev|gen|attr__Case=Gen|Number=Sing|Person=3|Poss=Yes|PronType=Prs": {
- POS: PRON
- },
- "Pron|bez|3|ev|neut|attr__Number=Sing|Person=3|Poss=Yes|PronType=Prs": {POS: PRON},
- "Pron|bez|3|ev|neut|zelfst__Number=Sing|Person=3|Poss=Yes|PronType=Prs": {
- POS: PRON
- },
- "Pron|bez|3|mv|neut|attr__Number=Plur|Person=3|Poss=Yes|PronType=Prs": {POS: PRON},
- "Pron|onbep|gen|attr__Case=Gen|PronType=Ind": {POS: PRON},
- "Pron|onbep|gen|zelfst__Case=Gen|PronType=Ind": {POS: PRON},
- "Pron|onbep|neut|attr__PronType=Ind": {POS: PRON},
- "Pron|onbep|neut|zelfst__PronType=Ind": {POS: PRON},
- "Pron|per|1|ev|datofacc__Case=Acc,Dat|Number=Sing|Person=1|PronType=Prs": {
- POS: PRON
- },
- "Pron|per|1|ev|nom__Case=Nom|Number=Sing|Person=1|PronType=Prs": {POS: PRON},
- "Pron|per|1|mv|datofacc__Case=Acc,Dat|Number=Plur|Person=1|PronType=Prs": {
- POS: PRON
- },
- "Pron|per|1|mv|nom__Case=Nom|Number=Plur|Person=1|PronType=Prs": {POS: PRON},
- "Pron|per|2|ev|datofacc__Case=Acc,Dat|Number=Sing|Person=2|PronType=Prs": {
- POS: PRON
- },
- "Pron|per|2|ev|nom__Case=Nom|Number=Sing|Person=2|PronType=Prs": {POS: PRON},
- "Pron|per|2|mv|datofacc__Case=Acc,Dat|Number=Plur|Person=2|PronType=Prs": {
- POS: PRON
- },
- "Pron|per|2|mv|nom__Case=Nom|Number=Plur|Person=2|PronType=Prs": {POS: PRON},
- "Pron|per|3|evofmv|datofacc__Case=Acc,Dat|Number=Plur,Sing|Person=3|PronType=Prs": {
- POS: PRON
- },
- "Pron|per|3|evofmv|nom__Case=Nom|Number=Plur,Sing|Person=3|PronType=Prs": {
- POS: PRON
- },
- "Pron|per|3|ev|datofacc__Case=Acc,Dat|Number=Sing|Person=3|PronType=Prs": {
- POS: PRON
- },
- "Pron|per|3|ev|nom__Case=Nom|Number=Sing|Person=3|PronType=Prs": {POS: PRON},
- "Pron|per|3|mv|datofacc__Case=Acc,Dat|Number=Plur|Person=3|PronType=Prs": {
- POS: PRON
- },
- "Pron|rec|gen__Case=Gen|PronType=Rcp": {POS: PRON},
- "Pron|rec|neut__PronType=Rcp": {POS: PRON},
- "Pron|ref|1|ev__Number=Sing|Person=1|PronType=Prs|Reflex=Yes": {POS: PRON},
- "Pron|ref|1|mv__Number=Plur|Person=1|PronType=Prs|Reflex=Yes": {POS: PRON},
- "Pron|ref|2|ev__Number=Sing|Person=2|PronType=Prs|Reflex=Yes": {POS: PRON},
- "Pron|ref|3|evofmv__Number=Plur,Sing|Person=3|PronType=Prs|Reflex=Yes": {POS: PRON},
- "Pron|vrag|neut|attr__PronType=Int": {POS: PRON},
- "Pron|vrag|neut|zelfst__PronType=Int": {POS: PRON},
- "Punc_Int_Punc_N_N_N_Punc_Pron_V_Pron_Adj_V_Punc___": {POS: PUNCT},
- "Punc_N_Punc_N___": {POS: PUNCT},
- "Punc_Num_Num___": {POS: PUNCT},
- "Punc_Num___": {POS: PUNCT},
- "Punc|aanhaaldubb__PunctType=Quot": {POS: PUNCT},
- "Punc|aanhaalenk__PunctType=Quot": {POS: PUNCT},
- "Punc|dubbpunt__PunctType=Colo": {POS: PUNCT},
- "Punc|haakopen__PunctSide=Ini|PunctType=Brck": {POS: PUNCT},
- "Punc|haaksluit__PunctSide=Fin|PunctType=Brck": {POS: PUNCT},
- "Punc|hellip__PunctType=Peri": {POS: PUNCT},
- "Punc|isgelijk___": {POS: PUNCT},
- "Punc|komma__PunctType=Comm": {POS: PUNCT},
- "Punc|liggstreep___": {POS: PUNCT},
- "Punc|maal___": {POS: PUNCT},
- "Punc|punt__PunctType=Peri": {POS: PUNCT},
- "Punc|puntkomma__PunctType=Semi": {POS: PUNCT},
- "Punc|schuinstreep___": {POS: PUNCT},
- "Punc|uitroep__PunctType=Excl": {POS: PUNCT},
- "Punc|vraag__PunctType=Qest": {POS: PUNCT},
- "V_Adv_Art_N_Prep_Pron_N__Degree=Pos|Number=Plur|Person=2|Subcat=Tran": {POS: VERB},
- "V_Adv__Degree=Pos|Subcat=Tran": {POS: VERB},
- "V_Art_N_Num_N__Aspect=Imp|Definite=Def|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|VerbType=Mod": {
- POS: VERB
- },
- "V_Art_N__Number=Sing|Subcat=Tran": {POS: VERB},
- "V_Conj_N_N__Number=Sing|Subcat=Tran|Tense=Past|VerbForm=Part": {POS: VERB},
- "V_Conj_Pron__Subcat=Tran|Tense=Past|VerbForm=Part": {POS: VERB},
- "V_N_Conj_Adj_N_Prep_Art_N__Degree=Pos|Number=Sing|Subcat=Tran|Tense=Past|VerbForm=Part": {
- POS: VERB
- },
- "V_N_N__Number=Sing|Subcat=Intr|Tense=Pres|VerbForm=Part": {POS: VERB},
- "V_N_N__Number=Sing|Subcat=Tran|Tense=Past|VerbForm=Part": {POS: VERB},
- "V_N_V__Aspect=Imp|Mood=Ind|Number=Sing|Subcat=Intr|Tense=Pres|VerbForm=Inf": {
- POS: VERB
- },
- "V_N__Number=Plur|Subcat=Tran|Tense=Past|VerbForm=Part": {POS: VERB},
- "V_N|trans|imp_eigen|ev|neut__Number=Sing|Subcat=Tran": {POS: VERB},
- "V_Prep|intrans|verldw|onverv_voor__Subcat=Intr|Tense=Past|VerbForm=Part": {
- POS: VERB
- },
- "V_Pron_Adv_Adv_Pron_V__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V_Pron_Adv__Aspect=Imp|Degree=Pos|Mood=Ind|Number=Sing|Person=2|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V_Pron_V__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V_Pron__VerbType=Aux,Cop": {POS: VERB},
- "V_V|hulp|imp_intrans|inf__VerbForm=Inf|VerbType=Mod": {POS: VERB},
- "V|hulpofkopp|conj__Mood=Sub|VerbForm=Fin": {POS: VERB},
- "V|hulpofkopp|conj__Mood=Sub|VerbForm=Fin|VerbType=Aux,Cop": {POS: VERB},
- "V|hulpofkopp|imp__Mood=Imp|VerbForm=Fin": {POS: VERB},
- "V|hulpofkopp|imp__Mood=Imp|VerbForm=Fin|VerbType=Aux,Cop": {POS: VERB},
- "V|hulpofkopp|inf__VerbForm=Inf": {POS: VERB},
- "V|hulpofkopp|inf__VerbForm=Inf|VerbType=Aux,Cop": {POS: VERB},
- "V|hulpofkopp|inf|subst__VerbForm=Inf": {POS: VERB},
- "V|hulpofkopp|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|hulpofkopp|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Pres|VerbForm=Fin|VerbType=Aux,Cop": {
- POS: VERB
- },
- "V|hulpofkopp|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|hulpofkopp|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin|VerbType=Aux,Cop": {
- POS: VERB
- },
- "V|hulpofkopp|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|hulpofkopp|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin|VerbType=Aux,Cop": {
- POS: VERB
- },
- "V|hulpofkopp|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|hulpofkopp|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|VerbType=Aux,Cop": {
- POS: VERB
- },
- "V|hulpofkopp|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin": {
- POS: VERB
- },
- "V|hulpofkopp|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|VerbType=Aux,Cop": {
- POS: VERB
- },
- "V|hulpofkopp|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin": {
- POS: VERB
- },
- "V|hulpofkopp|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|VerbType=Aux,Cop": {
- POS: VERB
- },
- "V|hulpofkopp|tegdw|vervneut__Case=Nom|Tense=Pres|VerbForm=Part": {POS: VERB},
- "V|hulpofkopp|tegdw|vervneut__Case=Nom|Tense=Pres|VerbForm=Part|VerbType=Aux,Cop": {
- POS: VERB
- },
- "V|hulpofkopp|verldw|onverv__Tense=Past|VerbForm=Part": {POS: VERB},
- "V|hulpofkopp|verldw|onverv__Tense=Past|VerbForm=Part|VerbType=Aux,Cop": {
- POS: VERB
- },
- "V|hulp|conj__Mood=Sub|VerbForm=Fin|VerbType=Mod": {POS: VERB},
- "V|hulp|inf__VerbForm=Inf": {POS: VERB},
- "V|hulp|inf__VerbForm=Inf|VerbType=Mod": {POS: VERB},
- "V|hulp|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|hulp|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Pres|VerbForm=Fin|VerbType=Mod": {
- POS: VERB
- },
- "V|hulp|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|hulp|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin|VerbType=Mod": {
- POS: VERB
- },
- "V|hulp|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|hulp|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin|VerbType=Mod": {
- POS: VERB
- },
- "V|hulp|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|hulp|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|VerbType=Mod": {
- POS: VERB
- },
- "V|hulp|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin": {
- POS: VERB
- },
- "V|hulp|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|VerbType=Mod": {
- POS: VERB
- },
- "V|hulp|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin": {
- POS: VERB
- },
- "V|hulp|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Tense=Past|VerbForm=Fin|VerbType=Mod": {
- POS: VERB
- },
- "V|hulp|verldw|onverv__Tense=Past|VerbForm=Part": {POS: VERB},
- "V|hulp|verldw|onverv__Tense=Past|VerbForm=Part|VerbType=Mod": {POS: VERB},
- "V|intrans|conj__Mood=Sub|Subcat=Intr|VerbForm=Fin": {POS: VERB},
- "V|intrans|imp__Mood=Imp|Subcat=Intr|VerbForm=Fin": {POS: VERB},
- "V|intrans|inf__Subcat=Intr|VerbForm=Inf": {POS: VERB},
- "V|intrans|inf|subst__Subcat=Intr|VerbForm=Inf": {POS: VERB},
- "V|intrans|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Subcat=Intr|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|intrans|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Subcat=Intr|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|intrans|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Subcat=Intr|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|intrans|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Subcat=Intr|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|intrans|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Subcat=Intr|Tense=Past|VerbForm=Fin": {
- POS: VERB
- },
- "V|intrans|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Subcat=Intr|Tense=Past|VerbForm=Fin": {
- POS: VERB
- },
- "V|intrans|tegdw|onverv__Subcat=Intr|Tense=Pres|VerbForm=Part": {POS: VERB},
- "V|intrans|tegdw|vervmv__Number=Plur|Subcat=Intr|Tense=Pres|VerbForm=Part": {
- POS: VERB
- },
- "V|intrans|tegdw|vervneut__Case=Nom|Subcat=Intr|Tense=Pres|VerbForm=Part": {
- POS: VERB
- },
- "V|intrans|tegdw|vervvergr__Degree=Cmp|Subcat=Intr|Tense=Pres|VerbForm=Part": {
- POS: VERB
- },
- "V|intrans|verldw|onverv__Subcat=Intr|Tense=Past|VerbForm=Part": {POS: VERB},
- "V|intrans|verldw|vervmv__Number=Plur|Subcat=Intr|Tense=Past|VerbForm=Part": {
- POS: VERB
- },
- "V|intrans|verldw|vervneut__Case=Nom|Subcat=Intr|Tense=Past|VerbForm=Part": {
- POS: VERB
- },
- "V|refl|imp__Mood=Imp|Reflex=Yes|VerbForm=Fin": {POS: VERB},
- "V|refl|inf__Reflex=Yes|VerbForm=Inf": {POS: VERB},
- "V|refl|inf|subst__Reflex=Yes|VerbForm=Inf": {POS: VERB},
- "V|refl|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Reflex=Yes|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|refl|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Reflex=Yes|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|refl|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Reflex=Yes|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|refl|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Reflex=Yes|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|refl|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Reflex=Yes|Tense=Past|VerbForm=Fin": {
- POS: VERB
- },
- "V|refl|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Reflex=Yes|Tense=Past|VerbForm=Fin": {
- POS: VERB
- },
- "V|refl|tegdw|vervneut__Case=Nom|Reflex=Yes|Tense=Pres|VerbForm=Part": {POS: VERB},
- "V|refl|verldw|onverv__Reflex=Yes|Tense=Past|VerbForm=Part": {POS: VERB},
- "V|trans|conj__Mood=Sub|Subcat=Tran|VerbForm=Fin": {POS: VERB},
- "V|trans|imp__Mood=Imp|Subcat=Tran|VerbForm=Fin": {POS: VERB},
- "V|trans|inf__Subcat=Tran|VerbForm=Inf": {POS: VERB},
- "V|trans|inf|subst__Subcat=Tran|VerbForm=Inf": {POS: VERB},
- "V|trans|ott|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|trans|ott|1|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|trans|ott|2|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|trans|ott|3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
- POS: VERB
- },
- "V|trans|ovt|1of2of3|ev__Aspect=Imp|Mood=Ind|Number=Sing|Subcat=Tran|Tense=Past|VerbForm=Fin": {
- POS: VERB
- },
- "V|trans|ovt|1of2of3|mv__Aspect=Imp|Mood=Ind|Number=Plur|Subcat=Tran|Tense=Past|VerbForm=Fin": {
- POS: VERB
- },
- "V|trans|tegdw|onverv__Subcat=Tran|Tense=Pres|VerbForm=Part": {POS: VERB},
- "V|trans|tegdw|vervneut__Case=Nom|Subcat=Tran|Tense=Pres|VerbForm=Part": {
- POS: VERB
- },
- "V|trans|verldw|onverv__Subcat=Tran|Tense=Past|VerbForm=Part": {POS: VERB},
- "V|trans|verldw|vervmv__Number=Plur|Subcat=Tran|Tense=Past|VerbForm=Part": {
- POS: VERB
- },
- "V|trans|verldw|vervneut__Case=Nom|Subcat=Tran|Tense=Past|VerbForm=Part": {
- POS: VERB
- },
- "V|trans|verldw|vervvergr__Degree=Cmp|Subcat=Tran|Tense=Past|VerbForm=Part": {
- POS: VERB
- },
- "X__Aspect=Imp|Definite=Def|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin|VerbType=Mod": {
- POS: X
- },
- "X__Aspect=Imp|Definite=Def|Mood=Ind|Number=Sing|Person=3|PronType=Ind|Tense=Pres|VerbForm=Fin": {
- POS: X
- },
- "X__Aspect=Imp|Degree=Pos|Mood=Ind|Number=Sing|Person=2|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
- POS: X
- },
- "X__Aspect=Imp|Degree=Pos|Mood=Ind|Number=Sing|Person=2|Tense=Past|VerbForm=Part": {
- POS: X
- },
- "X__Aspect=Imp|Degree=Pos|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Inf": {
- POS: X
- },
- "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: X},
- "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|PronType=Dem|Tense=Pres|VerbForm=Fin": {
- POS: X
- },
- "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|PronType=Rel|Tense=Pres|VerbForm=Fin": {
- POS: X
- },
- "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
- POS: X
- },
- "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|PronType=Ind|Tense=Pres|VerbForm=Fin": {
- POS: X
- },
- "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Subcat=Tran|Tense=Pres|VerbForm=Fin": {
- POS: X
- },
- "X__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: X},
- "X__Aspect=Imp|Mood=Ind|Number=Sing|Subcat=Intr|Tense=Pres|VerbForm=Inf": {POS: X},
- "X__Aspect=Imp|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin": {POS: X},
- "X__Aspect=Imp|Mood=Ind|Person=3|PronType=Dem|Tense=Pres|VerbForm=Inf": {POS: X},
- "X__Case=Dat|Degree=Pos|Number=Sing": {POS: X},
- "X__Case=Dat|Number=Sing": {POS: X},
- "X__Case=Gen|Definite=Def|Number=Sing": {POS: X},
- "X__Case=Gen|Number=Plur|PronType=Dem": {POS: X},
- "X__Case=Gen|Number=Plur|PronType=Ind": {POS: X},
- "X__Case=Gen|Number=Sing": {POS: X},
- "X__Case=Gen|Number=Sing|Person=3|Poss=Yes|PronType=Prs|VerbForm=Inf": {POS: X},
- "X__Case=Gen|Number=Sing|PronType=Ind": {POS: X},
- "X__Case=Nom|Definite=Def|Degree=Cmp|Gender=Neut": {POS: X},
- "X__Case=Nom|Definite=Def|Degree=Sup": {POS: X},
- "X__Case=Nom|Definite=Def|Degree=Sup|Gender=Neut": {POS: X},
- "X__Case=Nom|Degree=Cmp": {POS: X},
- "X__Case=Nom|Degree=Pos": {POS: X},
- "X__Case=Nom|Degree=Pos|Gender=Neut": {POS: X},
- "X__Case=Nom|Degree=Pos|Number=Plur": {POS: X},
- "X__Case=Nom|Degree=Pos|Number=Sing": {POS: X},
- "X__Case=Nom|Degree=Sup": {POS: X},
- "X__Case=Nom|Degree=Sup|Number=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: X},
- "X__Case=Nom|Degree=Sup|PronType=Ind": {POS: X},
- "X__Case=Nom|Number=Sing|Tense=Past|VerbForm=Part": {POS: X},
- "X__Definite=Def": {POS: X},
- "X__Definite=Def|Degree=Cmp|Gender=Neut": {POS: X},
- "X__Definite=Def|Degree=Pos": {POS: X},
- "X__Definite=Def|Degree=Pos|Number=Sing": {POS: X},
- "X__Definite=Def|Degree=Pos|Variant=Short": {POS: X},
- "X__Definite=Def|Degree=Sup|Gender=Neut": {POS: X},
- "X__Definite=Def|Degree=Sup|Gender=Neut|Number=Sing": {POS: X},
- "X__Definite=Def|Degree=Sup|Gender=Neut|PronType=Ind": {POS: X},
- "X__Definite=Def|Gender=Neut": {POS: X},
- "X__Definite=Def|Gender=Neut|Number=Plur|Person=3": {POS: X},
- "X__Definite=Def|Gender=Neut|Number=Sing": {POS: X},
- "X__Definite=Def|Number=Plur": {POS: X},
- "X__Definite=Def|Number=Sing": {POS: X},
- "X__Definite=Def|Number=Sing|Person=1": {POS: X},
- "X__Definite=Def|Number=Sing|Tense=Past|VerbForm=Part": {POS: X},
- "X__Definite=Def|Number=Sing|Tense=Pres|VerbForm=Part": {POS: X},
- "X__Degree=Cmp": {POS: X},
- "X__Degree=Cmp|Gender=Neut": {POS: X},
- "X__Degree=Cmp|Number=Sing|Person=3": {POS: X},
- "X__Degree=Cmp|PronType=Ind": {POS: X},
- "X__Degree=Cmp|Variant=Short": {POS: X},
- "X__Degree=Pos": {POS: X},
- "X__Degree=Pos|Gender=Neut|Number=Sing": {POS: X},
- "X__Degree=Pos|Mood=Imp|Variant=Short|VerbForm=Fin": {POS: X},
- "X__Degree=Pos|Mood=Sub|VerbForm=Fin": {POS: X},
- "X__Degree=Pos|Number=Plur": {POS: X},
- "X__Degree=Pos|Number=Plur|Person=2|Subcat=Tran": {POS: X},
- "X__Degree=Pos|Number=Plur|Variant=Short": {POS: X},
- "X__Degree=Pos|Number=Sing": {POS: X},
- "X__Degree=Pos|Number=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: X},
- "X__Degree=Pos|Number=Sing|Person=2": {POS: X},
- "X__Degree=Pos|Number=Sing|Person=3": {POS: X},
- "X__Degree=Pos|Number=Sing|PronType=Ind": {POS: X},
- "X__Degree=Pos|Number=Sing|Subcat=Tran|Tense=Past|VerbForm=Part": {POS: X},
- "X__Degree=Pos|Number=Sing|Tense=Past|VerbForm=Part": {POS: X},
- "X__Degree=Pos|Number=Sing|Variant=Short": {POS: X},
- "X__Degree=Pos|PronType=Dem": {POS: X},
- "X__Degree=Pos|Subcat=Tran": {POS: X},
- "X__Degree=Pos|Variant=Short": {POS: X},
- "X__Degree=Pos|Variant=Short|VerbForm=Inf": {POS: X},
- "X__Degree=Pos|VerbForm=Inf": {POS: X},
- "X__Gender=Com|Number=Sing": {POS: X},
- "X__Gender=Neut": {POS: X},
- "X__Gender=Neut|Number=Sing": {POS: X},
- "X__Gender=Neut|VerbForm=Inf": {POS: X},
- "X__Mood=Sub|Number=Sing|VerbForm=Fin": {POS: X},
- "X__Mood=Sub|VerbForm=Fin": {POS: X},
- "X__Number=Plur": {POS: X},
- "X__Number=Plur,Sing|Person=3": {POS: X},
- "X__Number=Plur|Person=1|Poss=Yes|PronType=Prs|VerbForm=Inf": {POS: X},
- "X__Number=Plur|PronType=Ind": {POS: X},
- "X__Number=Plur|PronType=Int": {POS: X},
- "X__Number=Plur|Subcat=Tran|Tense=Past|VerbForm=Part": {POS: X},
- "X__Number=Plur|Tense=Past|VerbForm=Part": {POS: X},
- "X__Number=Sing": {POS: X},
- "X__Number=Sing|Person=3": {POS: X},
- "X__Number=Sing|PronType=Dem": {POS: X},
- "X__Number=Sing|PronType=Ind": {POS: X},
- "X__Number=Sing|PronType=Int": {POS: X},
- "X__Number=Sing|PronType=Rel": {POS: X},
- "X__Number=Sing|Subcat=Intr|Tense=Pres|VerbForm=Part": {POS: X},
- "X__Number=Sing|Subcat=Tran": {POS: X},
- "X__Number=Sing|Subcat=Tran|Tense=Past|VerbForm=Part": {POS: X},
- "X__Number=Sing|Tense=Past|VerbForm=Part": {POS: X},
- "X__Number=Sing|Tense=Pres|VerbForm=Part": {POS: X},
- "X__Person=3|PronType=Prs|Reflex=Yes": {POS: X},
- "X__PronType=Dem": {POS: X},
- "X__PronType=Ind": {POS: X},
- "X__PronType=Int": {POS: X},
- "X__PronType=Rel": {POS: X},
- "X__Subcat=Intr|Tense=Past|VerbForm=Part": {POS: X},
- "X__Subcat=Tran|Tense=Past|VerbForm=Part": {POS: X},
- "X__VerbForm=Inf": {POS: X},
- "X__VerbForm=Inf|VerbType=Mod": {POS: X},
- "X__VerbType=Aux,Cop": {POS: X},
- "X___": {POS: X},
- "_SP": {POS: SPACE},
-}
diff --git a/spacy/lang/nl/tokenizer_exceptions.py b/spacy/lang/nl/tokenizer_exceptions.py
index c0915f127..489d10d71 100644
--- a/spacy/lang/nl/tokenizer_exceptions.py
+++ b/spacy/lang/nl/tokenizer_exceptions.py
@@ -1,7 +1,7 @@
-# coding: utf8
-from __future__ import unicode_literals
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...symbols import ORTH
+from ...util import update_exc
+
# Extensive list of both common and uncommon dutch abbreviations copied from
# github.com/diasks2/pragmatic_segmenter, a Ruby library for rule-based
@@ -1605,4 +1605,4 @@ for orth in abbrevs:
_exc[i] = [{ORTH: i}]
-TOKENIZER_EXCEPTIONS = _exc
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
diff --git a/spacy/lang/norm_exceptions.py b/spacy/lang/norm_exceptions.py
index 341967a78..f35f613b1 100644
--- a/spacy/lang/norm_exceptions.py
+++ b/spacy/lang/norm_exceptions.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
# These exceptions are used to add NORM values based on a token's ORTH value.
# Individual languages can also add their own exceptions and overwrite them -
# for example, British vs. American spelling in English.
diff --git a/spacy/lang/pl/__init__.py b/spacy/lang/pl/__init__.py
index 52b662a90..7ddad9893 100644
--- a/spacy/lang/pl/__init__.py
+++ b/spacy/lang/pl/__init__.py
@@ -1,43 +1,29 @@
-# coding: utf8
-from __future__ import unicode_literals
+from typing import Optional
+
+from thinc.api import Model
from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES
from .punctuation import TOKENIZER_SUFFIXES
-from .tag_map import TAG_MAP
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
from .lemmatizer import PolishLemmatizer
-
from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ..norm_exceptions import BASE_NORMS
-from ...language import Language
-from ...attrs import LANG, NORM
-from ...util import add_lookups
from ...lookups import Lookups
+from ...language import Language
+
+
+TOKENIZER_EXCEPTIONS = {
+ exc: val for exc, val in BASE_EXCEPTIONS.items() if not exc.endswith(".")
+}
class PolishDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters.update(LEX_ATTRS)
- lex_attr_getters[LANG] = lambda text: "pl"
- lex_attr_getters[NORM] = add_lookups(
- Language.Defaults.lex_attr_getters[NORM], BASE_NORMS
- )
- mod_base_exceptions = {
- exc: val for exc, val in BASE_EXCEPTIONS.items() if not exc.endswith(".")
- }
- tokenizer_exceptions = mod_base_exceptions
- stop_words = STOP_WORDS
- tag_map = TAG_MAP
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
prefixes = TOKENIZER_PREFIXES
infixes = TOKENIZER_INFIXES
suffixes = TOKENIZER_SUFFIXES
-
- @classmethod
- def create_lemmatizer(cls, nlp=None, lookups=None):
- if lookups is None:
- lookups = Lookups()
- return PolishLemmatizer(lookups)
+ lex_attr_getters = LEX_ATTRS
+ stop_words = STOP_WORDS
class Polish(Language):
@@ -45,4 +31,22 @@ class Polish(Language):
Defaults = PolishDefaults
+@Polish.factory(
+ "lemmatizer",
+ assigns=["token.lemma"],
+ default_config={"model": None, "mode": "pos_lookup", "lookups": None},
+ scores=["lemma_acc"],
+ default_score_weights={"lemma_acc": 1.0},
+)
+def make_lemmatizer(
+ nlp: Language,
+ model: Optional[Model],
+ name: str,
+ mode: str,
+ lookups: Optional[Lookups],
+):
+ lookups = PolishLemmatizer.load_lookups(nlp.lang, mode, lookups)
+ return PolishLemmatizer(nlp.vocab, model, name, mode=mode, lookups=lookups)
+
+
__all__ = ["Polish"]
diff --git a/spacy/lang/pl/examples.py b/spacy/lang/pl/examples.py
index 14b6c7030..b1ea5880f 100644
--- a/spacy/lang/pl/examples.py
+++ b/spacy/lang/pl/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/pl/lemmatizer.py b/spacy/lang/pl/lemmatizer.py
index 8b8d7fe27..406ef9e4a 100644
--- a/spacy/lang/pl/lemmatizer.py
+++ b/spacy/lang/pl/lemmatizer.py
@@ -1,8 +1,7 @@
-# coding: utf-8
-from __future__ import unicode_literals
+from typing import List, Dict
-from ...lemmatizer import Lemmatizer
-from ...parts_of_speech import NAMES
+from ...pipeline import Lemmatizer
+from ...tokens import Token
class PolishLemmatizer(Lemmatizer):
@@ -10,30 +9,47 @@ class PolishLemmatizer(Lemmatizer):
# dictionary (morfeusz.sgjp.pl/en) by Institute of Computer Science PAS.
# It utilizes some prefix based improvements for verb and adjectives
# lemmatization, as well as case-sensitive lemmatization for nouns.
- def __call__(self, string, univ_pos, morphology=None):
- if isinstance(univ_pos, int):
- univ_pos = NAMES.get(univ_pos, "X")
- univ_pos = univ_pos.upper()
+ @classmethod
+ def get_lookups_config(cls, mode: str) -> Dict:
+ if mode == "pos_lookup":
+ return {
+ "required_tables": [
+ "lemma_lookup_adj",
+ "lemma_lookup_adp",
+ "lemma_lookup_adv",
+ "lemma_lookup_aux",
+ "lemma_lookup_noun",
+ "lemma_lookup_num",
+ "lemma_lookup_part",
+ "lemma_lookup_pron",
+ "lemma_lookup_verb",
+ ]
+ }
+ else:
+ return super().get_lookups_config(mode)
+
+ def pos_lookup_lemmatize(self, token: Token) -> List[str]:
+ string = token.text
+ univ_pos = token.pos_
+ morphology = token.morph.to_dict()
lookup_pos = univ_pos.lower()
if univ_pos == "PROPN":
lookup_pos = "noun"
lookup_table = self.lookups.get_table("lemma_lookup_" + lookup_pos, {})
-
if univ_pos == "NOUN":
return self.lemmatize_noun(string, morphology, lookup_table)
-
if univ_pos != "PROPN":
string = string.lower()
-
if univ_pos == "ADJ":
return self.lemmatize_adj(string, morphology, lookup_table)
elif univ_pos == "VERB":
return self.lemmatize_verb(string, morphology, lookup_table)
-
return [lookup_table.get(string, string.lower())]
- def lemmatize_adj(self, string, morphology, lookup_table):
+ def lemmatize_adj(
+ self, string: str, morphology: dict, lookup_table: Dict[str, str]
+ ) -> List[str]:
# this method utilizes different procedures for adjectives
# with 'nie' and 'naj' prefixes
if string[:3] == "nie":
@@ -44,25 +60,26 @@ class PolishLemmatizer(Lemmatizer):
return [lookup_table[naj_search_string]]
if search_string in lookup_table:
return [lookup_table[search_string]]
-
if string[:3] == "naj":
naj_search_string = string[3:]
if naj_search_string in lookup_table:
return [lookup_table[naj_search_string]]
-
return [lookup_table.get(string, string)]
- def lemmatize_verb(self, string, morphology, lookup_table):
+ def lemmatize_verb(
+ self, string: str, morphology: dict, lookup_table: Dict[str, str]
+ ) -> List[str]:
# this method utilizes a different procedure for verbs
# with 'nie' prefix
if string[:3] == "nie":
search_string = string[3:]
if search_string in lookup_table:
return [lookup_table[search_string]]
-
return [lookup_table.get(string, string)]
- def lemmatize_noun(self, string, morphology, lookup_table):
+ def lemmatize_noun(
+ self, string: str, morphology: dict, lookup_table: Dict[str, str]
+ ) -> List[str]:
# this method is case-sensitive, in order to work
# for incorrectly tagged proper names
if string != string.lower():
@@ -71,11 +88,4 @@ class PolishLemmatizer(Lemmatizer):
elif string in lookup_table:
return [lookup_table[string]]
return [string.lower()]
-
return [lookup_table.get(string, string)]
-
- def lookup(self, string, orth=None):
- return string.lower()
-
- def lemmatize(self, string, index, exceptions, rules):
- raise NotImplementedError
diff --git a/spacy/lang/pl/lex_attrs.py b/spacy/lang/pl/lex_attrs.py
index f1379aa50..ce56e28a8 100644
--- a/spacy/lang/pl/lex_attrs.py
+++ b/spacy/lang/pl/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ...attrs import LIKE_NUM
diff --git a/spacy/lang/pl/punctuation.py b/spacy/lang/pl/punctuation.py
index c87464b1b..31e56b9ae 100644
--- a/spacy/lang/pl/punctuation.py
+++ b/spacy/lang/pl/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..char_classes import LIST_ELLIPSES, LIST_PUNCT, LIST_HYPHENS
from ..char_classes import LIST_ICONS, LIST_QUOTES, CURRENCY, UNITS, PUNCT
from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
diff --git a/spacy/lang/pl/stop_words.py b/spacy/lang/pl/stop_words.py
index 11df67328..075aec391 100644
--- a/spacy/lang/pl/stop_words.py
+++ b/spacy/lang/pl/stop_words.py
@@ -1,7 +1,3 @@
-# encoding: utf8
-
-from __future__ import unicode_literals
-
# sources: https://github.com/bieli/stopwords/blob/master/polish.stopwords.txt and https://github.com/stopwords-iso/stopwords-pl
STOP_WORDS = set(
diff --git a/spacy/lang/pl/tag_map.py b/spacy/lang/pl/tag_map.py
deleted file mode 100644
index ed7d6487e..000000000
--- a/spacy/lang/pl/tag_map.py
+++ /dev/null
@@ -1,1649 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import (
- POS,
- ADJ,
- ADP,
- ADV,
- AUX,
- CCONJ,
- DET,
- INTJ,
- NOUN,
- NUM,
- PART,
- PRON,
- PROPN,
- PUNCT,
- SCONJ,
- VERB,
- X,
-)
-
-# fmt: off
-TAG_MAP = {
- "adja": {POS: ADJ},
- "adjc": {POS: ADJ},
- "adjp": {POS: ADJ, "PrepCase": "pre"},
- "adj:pl:acc:m1.p1:com": {POS: ADJ, "Number": "plur", "Case": "acc", "Gender": "masc", "Degree": "cmp"},
- "adj:pl:acc:m1.p1:pos": {POS: ADJ, "Number": "plur", "Case": "acc", "Gender": "masc", "Degree": "pos"},
- "adj:pl:acc:m1.p1:sup": {POS: ADJ, "Number": "plur", "Case": "acc", "Gender": "masc", "Degree": "sup"},
- "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:com": {POS: ADJ, "Number": "plur", "Case": "acc", "Gender": "masc|fem|neut", "Degree": "cmp"},
- "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos": {POS: ADJ, "Number": "plur", "Case": "acc", "Gender": "masc|fem|neut", "Degree": "pos"},
- "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:sup": {POS: ADJ, "Number": "plur", "Case": "acc", "Gender": "masc|fem|neut", "Degree": "sup"},
- "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:com": {POS: ADJ, "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Degree": "cmp"},
- "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos": {POS: ADJ, "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Degree": "pos"},
- "adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:sup": {POS: ADJ, "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Degree": "sup"},
- "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:com": {POS: ADJ, "Number": "plur", "Case": "gen", "Gender": "masc|fem|neut", "Degree": "cmp"},
- "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos": {POS: ADJ, "Number": "plur", "Case": "gen", "Gender": "masc|fem|neut", "Degree": "pos"},
- "adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:sup": {POS: ADJ, "Number": "plur", "Case": "gen", "Gender": "masc|fem|neut", "Degree": "sup"},
- "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:com": {POS: ADJ, "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Degree": "cmp"},
- "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos": {POS: ADJ, "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Degree": "pos"},
- "adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:sup": {POS: ADJ, "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Degree": "sup"},
- "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:com": {POS: ADJ, "Number": "plur", "Case": "loc", "Gender": "masc|fem|neut", "Degree": "cmp"},
- "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos": {POS: ADJ, "Number": "plur", "Case": "loc", "Gender": "masc|fem|neut", "Degree": "pos"},
- "adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:sup": {POS: ADJ, "Number": "plur", "Case": "loc", "Gender": "masc|fem|neut", "Degree": "sup"},
- "adj:pl:nom:m1.p1:pos": {POS: ADJ, "Number": "plur", "Case": "nom", "Gender": "masc", "Degree": "pos"},
- "adj:pl:nom:m2.m3.f.n1.n2.p2.p3:pos": {POS: ADJ, "Number": "plur", "Case": "nom", "Gender": "masc|fem|neut", "Degree": "pos"},
- "adj:pl:nom.voc:m1.p1:com": {POS: ADJ, "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Degree": "cmp"},
- "adj:pl:nom.voc:m1.p1:pos": {POS: ADJ, "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Degree": "pos"},
- "adj:pl:nom.voc:m1.p1:sup": {POS: ADJ, "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Degree": "sup"},
- "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:com": {POS: ADJ, "Number": "plur", "Case": "nom|voc", "Gender": "masc|fem|neut", "Degree": "cmp"},
- "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos": {POS: ADJ, "Number": "plur", "Case": "nom|voc", "Gender": "masc|fem|neut", "Degree": "pos"},
- "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:sup": {POS: ADJ, "Number": "plur", "Case": "nom|voc", "Gender": "masc|fem|neut", "Degree": "sup"},
- "adj:sg:acc:f:com": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "fem", "Degree": "cmp"},
- "adj:sg:acc:f:pos": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "fem", "Degree": "pos"},
- "adj:sg:acc:f:sup": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "fem", "Degree": "sup"},
- "adj:sg:acc:m1.m2:com": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Degree": "cmp"},
- "adj:sg:acc:m1.m2:pos": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Degree": "pos"},
- "adj:sg:acc:m1.m2:sup": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Degree": "sup"},
- "adj:sg:acc:m3:com": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Degree": "cmp"},
- "adj:sg:acc:m3:pos": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Degree": "pos"},
- "adj:sg:acc:m3:sup": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Degree": "sup"},
- "adj:sg:acc:n1.n2:com": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "neut", "Degree": "cmp"},
- "adj:sg:acc:n1.n2:pos": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "neut", "Degree": "pos"},
- "adj:sg:acc:n1.n2:sup": {POS: ADJ, "Number": "sing", "Case": "acc", "Gender": "neut", "Degree": "sup"},
- "adj:sg:dat:f:com": {POS: ADJ, "Number": "sing", "Case": "dat", "Gender": "fem", "Degree": "cmp"},
- "adj:sg:dat:f:pos": {POS: ADJ, "Number": "sing", "Case": "dat", "Gender": "fem", "Degree": "pos"},
- "adj:sg:dat:f:sup": {POS: ADJ, "Number": "sing", "Case": "dat", "Gender": "fem", "Degree": "sup"},
- "adj:sg:dat:m1.m2.m3.n1.n2:com": {POS: ADJ, "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Degree": "cmp"},
- "adj:sg:dat:m1.m2.m3.n1.n2:pos": {POS: ADJ, "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Degree": "pos"},
- "adj:sg:dat:m1.m2.m3.n1.n2:sup": {POS: ADJ, "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Degree": "sup"},
- "adj:sg:gen:f:com": {POS: ADJ, "Number": "sing", "Case": "gen", "Gender": "fem", "Degree": "cmp"},
- "adj:sg:gen:f:pos": {POS: ADJ, "Number": "sing", "Case": "gen", "Gender": "fem", "Degree": "pos"},
- "adj:sg:gen:f:sup": {POS: ADJ, "Number": "sing", "Case": "gen", "Gender": "fem", "Degree": "sup"},
- "adj:sg:gen:m1.m2.m3.n1.n2:com": {POS: ADJ, "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Degree": "cmp"},
- "adj:sg:gen:m1.m2.m3.n1.n2:pos": {POS: ADJ, "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Degree": "pos"},
- "adj:sg:gen:m1.m2.m3.n1.n2:sup": {POS: ADJ, "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Degree": "sup"},
- "adj:sg:inst:f:com": {POS: ADJ, "Number": "sing", "Case": "ins", "Gender": "fem", "Degree": "cmp"},
- "adj:sg:inst:f:pos": {POS: ADJ, "Number": "sing", "Case": "ins", "Gender": "fem", "Degree": "pos"},
- "adj:sg:inst:f:sup": {POS: ADJ, "Number": "sing", "Case": "ins", "Gender": "fem", "Degree": "sup"},
- "adj:sg:inst:m1.m2.m3.n1.n2:com": {POS: ADJ, "Number": "sing", "Case": "ins", "Gender": "masc|neut", "Degree": "cmp"},
- "adj:sg:inst:m1.m2.m3.n1.n2:pos": {POS: ADJ, "Number": "sing", "Case": "ins", "Gender": "masc|neut", "Degree": "pos"},
- "adj:sg:inst:m1.m2.m3.n1.n2:sup": {POS: ADJ, "Number": "sing", "Case": "ins", "Gender": "masc|neut", "Degree": "sup"},
- "adj:sg:loc:f:com": {POS: ADJ, "Number": "sing", "Case": "loc", "Gender": "fem", "Degree": "cmp"},
- "adj:sg:loc:f:pos": {POS: ADJ, "Number": "sing", "Case": "loc", "Gender": "fem", "Degree": "pos"},
- "adj:sg:loc:f:sup": {POS: ADJ, "Number": "sing", "Case": "loc", "Gender": "fem", "Degree": "sup"},
- "adj:sg:loc:m1.m2.m3.n1.n2:com": {POS: ADJ, "Number": "sing", "Case": "loc", "Gender": "masc|neut", "Degree": "cmp"},
- "adj:sg:loc:m1.m2.m3.n1.n2:pos": {POS: ADJ, "Number": "sing", "Case": "loc", "Gender": "masc|neut", "Degree": "pos"},
- "adj:sg:loc:m1.m2.m3.n1.n2:sup": {POS: ADJ, "Number": "sing", "Case": "loc", "Gender": "masc|neut", "Degree": "sup"},
- "adj:sg:nom:f:pos": {POS: ADJ, "Number": "sing", "Case": "nom", "Gender": "fem", "Degree": "pos"},
- "adj:sg:nom:m1.m2.m3:pos": {POS: ADJ, "Number": "sing", "Case": "nom", "Gender": "Masc", "Degree": "pos"},
- "adj:sg:nom:n1.n2:pos": {POS: ADJ, "Number": "sing", "Case": "nom", "Gender": "neut", "Degree": "pos"},
- "adj:sg:nom.voc:f:com": {POS: ADJ, "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Degree": "cmp"},
- "adj:sg:nom.voc:f:pos": {POS: ADJ, "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Degree": "pos"},
- "adj:sg:nom.voc:f:sup": {POS: ADJ, "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Degree": "sup"},
- "adj:sg:nom.voc:m1.m2.m3:com": {POS: ADJ, "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Degree": "cmp"},
- "adj:sg:nom.voc:m1.m2.m3:pos": {POS: ADJ, "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Degree": "pos"},
- "adj:sg:nom.voc:m1.m2.m3:sup": {POS: ADJ, "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Degree": "sup"},
- "adj:sg:nom.voc:n1.n2:com": {POS: ADJ, "Number": "sing", "Case": "nom|voc", "Gender": "neut", "Degree": "cmp"},
- "adj:sg:nom.voc:n1.n2:pos": {POS: ADJ, "Number": "sing", "Case": "nom|voc", "Gender": "neut", "Degree": "pos"},
- "adj:sg:nom.voc:n1.n2:sup": {POS: ADJ, "Number": "sing", "Case": "nom|voc", "Gender": "neut", "Degree": "sup"},
- "adv": {POS: ADV},
- "adv:com": {POS: ADV, "Degree": "cmp"},
- "adv:pos": {POS: ADV, "Degree": "pos"},
- "adv:sup": {POS: ADV, "Degree": "sup"},
- "aglt:pl:pri:imperf:nwok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "plur", "Person": "one", "Aspect": "imp", },
- "aglt:pl:pri:imperf:wok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "plur", "Person": "one", "Aspect": "imp", },
- "aglt:pl:sec:imperf:nwok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "plur", "Person": "two", "Aspect": "imp", },
- "aglt:pl:sec:imperf:wok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "plur", "Person": "two", "Aspect": "imp", },
- "aglt:sg:pri:imperf:nwok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": "one", "Aspect": "imp", },
- "aglt:sg:pri:imperf:wok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": "one", "Aspect": "imp", },
- "aglt:sg:sec:imperf:nwok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": "two", "Aspect": "imp", },
- "aglt:sg:sec:imperf:wok": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "pres", "Number": "sing", "Person": "two", "Aspect": "imp", },
- "bedzie:pl:pri:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "plur", "Person": "one", "Aspect": "imp"},
- "bedzie:pl:sec:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "plur", "Person": "two", "Aspect": "imp"},
- "bedzie:pl:ter:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "plur", "Person": "three", "Aspect": "imp"},
- "bedzie:sg:pri:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "sing", "Person": "one", "Aspect": "imp"},
- "bedzie:sg:sec:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "sing", "Person": "two", "Aspect": "imp"},
- "bedzie:sg:ter:imperf": {POS: AUX, "Aspect": "imp", "Mood": "ind", "VerbForm": "fin", "Tense": "fut", "Number": "sing", "Person": "three", "Aspect": "imp"},
- "burk": {POS: X},
- "comp": {POS: SCONJ},
- "conj": {POS: CCONJ},
- "depr:pl:nom:m2": {POS: NOUN, "Animacy": "anim", "Number": "plur", "Case": "nom", "Gender": "masc", "Animacy": "anim"},
- "depr:pl:voc:m2": {POS: NOUN, "Animacy": "anim", "Number": "plur", "Case": "voc", "Gender": "masc", "Animacy": "anim"},
- "fin:pl:pri:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": "one", "Aspect": "imp"},
- "fin:pl:pri:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": "one", "Aspect": "imp|perf"},
- "fin:pl:pri:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": "one", "Aspect": "perf"},
- "fin:pl:sec:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": "two", "Aspect": "imp"},
- "fin:pl:sec:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": "two", "Aspect": "imp|perf"},
- "fin:pl:sec:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": "two", "Aspect": "perf"},
- "fin:pl:ter:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": "three", "Aspect": "imp"},
- "fin:pl:ter:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": "three", "Aspect": "imp|perf"},
- "fin:pl:ter:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "plur", "Person": "three", "Aspect": "perf"},
- "fin:sg:pri:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": "one", "Aspect": "imp"},
- "fin:sg:pri:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": "one", "Aspect": "imp|perf"},
- "fin:sg:pri:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": "one", "Aspect": "perf"},
- "fin:sg:sec:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": "two", "Aspect": "imp"},
- "fin:sg:sec:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": "two", "Aspect": "imp|perf"},
- "fin:sg:sec:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": "two", "Aspect": "perf"},
- "fin:sg:ter:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": "three", "Aspect": "imp"},
- "fin:sg:ter:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": "three", "Aspect": "imp|perf"},
- "fin:sg:ter:perf": {POS: VERB, "VerbForm": "fin", "Tense": "pres", "Mood": "ind", "Number": "sing", "Person": "three", "Aspect": "perf"},
- "ger:sg:dat.loc:n2:imperf:aff": {POS: VERB, "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"},
- "ger:sg:dat.loc:n2:imperf:neg": {POS: VERB, "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"},
- "ger:sg:dat.loc:n2:imperf.perf:aff": {POS: VERB, "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "ger:sg:dat.loc:n2:imperf.perf:neg": {POS: VERB, "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "ger:sg:dat.loc:n2:perf:aff": {POS: VERB, "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "perf", "Polarity": "pos"},
- "ger:sg:dat.loc:n2:perf:neg": {POS: VERB, "Number": "sing", "Case": "dat|loc", "Gender": "neut", "Aspect": "perf", "Polarity": "neg"},
- "ger:sg:gen:n2:imperf:aff": {POS: VERB, "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"},
- "ger:sg:gen:n2:imperf:neg": {POS: VERB, "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"},
- "ger:sg:gen:n2:imperf.perf:aff": {POS: VERB, "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "ger:sg:gen:n2:imperf.perf:neg": {POS: VERB, "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "ger:sg:gen:n2:perf:aff": {POS: VERB, "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "perf", "Polarity": "pos"},
- "ger:sg:gen:n2:perf:neg": {POS: VERB, "Number": "sing", "Case": "gen", "Gender": "neut", "Aspect": "perf", "Polarity": "neg"},
- "ger:sg:inst:n2:imperf:aff": {POS: VERB, "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"},
- "ger:sg:inst:n2:imperf:neg": {POS: VERB, "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"},
- "ger:sg:inst:n2:imperf.perf:aff": {POS: VERB, "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "ger:sg:inst:n2:imperf.perf:neg": {POS: VERB, "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "ger:sg:inst:n2:perf:aff": {POS: VERB, "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "perf", "Polarity": "pos"},
- "ger:sg:inst:n2:perf:neg": {POS: VERB, "Number": "sing", "Case": "ins", "Gender": "neut", "Aspect": "perf", "Polarity": "neg"},
- "ger:sg:nom.acc:n2:imperf:aff": {POS: VERB, "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"},
- "ger:sg:nom.acc:n2:imperf:neg": {POS: VERB, "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"},
- "ger:sg:nom.acc:n2:imperf.perf:aff": {POS: VERB, "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "ger:sg:nom.acc:n2:imperf.perf:neg": {POS: VERB, "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "ger:sg:nom.acc:n2:perf:aff": {POS: VERB, "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "perf", "Polarity": "pos"},
- "ger:sg:nom.acc:n2:perf:neg": {POS: VERB, "Number": "sing", "Case": "nom|acc", "Gender": "neut", "Aspect": "perf", "Polarity": "neg"},
- "imps:imperf": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Aspect": "imp"},
- "imps:imperf.perf": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Aspect": "imp|perf"},
- "imps:perf": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Aspect": "perf"},
- "impt:pl:pri:imperf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": "one", "Aspect": "imp"},
- "impt:pl:pri:imperf.perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": "one", "Aspect": "imp|perf"},
- "impt:pl:pri:perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": "one", "Aspect": "perf"},
- "impt:pl:sec:imperf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": "two", "Aspect": "imp"},
- "impt:pl:sec:imperf.perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": "two", "Aspect": "imp|perf"},
- "impt:pl:sec:perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "plur", "Person": "two", "Aspect": "perf"},
- "impt:sg:sec:imperf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "sing", "Person": "two", "Aspect": "imp"},
- "impt:sg:sec:imperf.perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "sing", "Person": "two", "Aspect": "imp|perf"},
- "impt:sg:sec:perf": {POS: VERB, "Mood": "imp", "VerbForm": "fin", "Number": "sing", "Person": "two", "Aspect": "perf"},
- "inf:imperf": {POS: VERB, "VerbForm": "inf", "Aspect": "imp"},
- "inf:imperf.perf": {POS: VERB, "VerbForm": "inf", "Aspect": "imp|perf"},
- "inf:perf": {POS: VERB, "VerbForm": "inf", "Aspect": "perf"},
- "interj": {POS: INTJ},
- "num:comp": {POS: NUM},
- "num:pl:acc:m1:rec": {POS: NUM, "Number": "plur", "Case": "acc", "Gender": "Masc", "Animacy": "hum"},
- "num:pl:dat.loc:n1.p1.p2:congr.rec": {POS: NUM, "Number": "plur", "Case": "dat|loc", "Gender": "neut"},
- "num:pl:dat:m1.m2.m3.n2.f:congr": {POS: NUM, "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut"},
- "num:pl:gen.dat.inst.loc:m1.m2.m3.f.n1.n2.p1.p2:congr": {POS: NUM, "Number": "plur", "Case": "gen|dat|ins|loc", "Gender": "masc|fem|neut"},
- "num:pl:gen.dat.inst.loc:m1.m2.m3.f.n2:congr": {POS: NUM, "Number": "plur", "Case": "gen|dat|ins|loc", "Gender": "masc|fem|neut"},
- "num:pl:gen.dat.loc:m1.m2.m3.n2.f:congr": {POS: NUM, "Number": "plur", "Case": "gen|dat|loc", "Gender": "masc|fem|neut"},
- "num:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2:congr": {POS: NUM, "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut"},
- "num:pl:gen.loc:m1.m2.m3.n2.f:congr": {POS: NUM, "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut"},
- "num:pl:gen:n1.p1.p2:rec": {POS: NUM, "Number": "plur", "Case": "gen", "Gender": "neut"},
- "num:pl:inst:f:congr": {POS: NUM, "Number": "plur", "Case": "ins", "Gender": "fem"},
- "num:pl:inst:m1.m2.m3.f.n1.n2.p1.p2:congr": {POS: NUM, "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut"},
- "num:pl:inst:m1.m2.m3.f.n2:congr": {POS: NUM, "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut"},
- "num:pl:inst:m1.m2.m3.n2:congr": {POS: NUM, "Number": "plur", "Case": "ins", "Gender": "masc|neut"},
- "num:pl:inst:m1.m2.m3.n2.f:congr": {POS: NUM, "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut"},
- "num:pl:inst:n1.p1.p2:rec": {POS: NUM, "Number": "plur", "Case": "ins", "Gender": "neut"},
- "num:pl:nom.acc:m1.m2.m3.f.n1.n2.p1.p2:rec": {POS: NUM, "Number": "plur", "Case": "nom|acc", "Gender": "masc|fem|neut"},
- "num:pl:nom.acc.voc:f:congr": {POS: NUM, "Number": "plur", "Case": "nom|acc|voc", "Gender": "fem"},
- "num:pl:nom.acc.voc:m1:rec": {POS: NUM, "Number": "plur", "Case": "nom|acc|voc", "Gender": "Masc", "Animacy": "hum"},
- "num:pl:nom.acc.voc:m2.m3.f.n1.n2.p1.p2:rec": {POS: NUM, "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut"},
- "num:pl:nom.acc.voc:m2.m3.f.n2:rec": {POS: NUM, "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut"},
- "num:pl:nom.acc.voc:m2.m3.n2:congr": {POS: NUM, "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|neut"},
- "num:pl:nom.acc.voc:m2.m3.n2.f:congr": {POS: NUM, "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut"},
- "num:pl:nom.acc.voc:n1.p1.p2:rec": {POS: NUM, "Number": "plur", "Case": "nom|acc|voc", "Gender": "neut"},
- "num:pl:nom.gen.dat.inst.acc.loc.voc:m1.m2.m3.f.n1.n2.p1.p2:rec": {POS: NUM, "Number": "plur", "Gender": "masc|fem|neut"},
- "num:pl:nom.voc:m1:congr": {POS: NUM, "Number": "plur", "Case": "nom|voc", "Gender": "Masc", "Animacy": "hum"},
- "num:pl:nom.voc:m1:rec": {POS: NUM, "Number": "plur", "Case": "nom|voc", "Gender": "Masc", "Animacy": "hum"},
- "num:sg:nom.gen.dat.inst.acc.loc.voc:f:rec": {POS: NUM, "Number": "sing", "Gender": "fem"},
- "num:sg:nom.gen.dat.inst.acc.loc.voc:m1.m2.m3.n1.n2:rec": {POS: NUM, "Number": "sing", "Gender": "masc|neut"},
- "pact:pl:acc:m1.p1:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "imp", "Polarity": "pos"},
- "pact:pl:acc:m1.p1:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "imp", "Polarity": "neg"},
- "pact:pl:acc:m1.p1:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "imp|perf", "Polarity": "pos"},
- "pact:pl:acc:m1.p1:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "imp|perf", "Polarity": "neg"},
- "pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "pos"},
- "pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "neg"},
- "pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "pos"},
- "pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "neg"},
- "pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "pos"},
- "pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "neg"},
- "pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "pos"},
- "pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "neg"},
- "pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "pact:pl:nom.voc:m1.p1:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "imp", "Polarity": "pos"},
- "pact:pl:nom.voc:m1.p1:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "imp", "Polarity": "neg"},
- "pact:pl:nom.voc:m1.p1:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "imp|perf", "Polarity": "pos"},
- "pact:pl:nom.voc:m1.p1:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "imp|perf", "Polarity": "neg"},
- "pact:sg:acc.inst:f:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "imp", "Polarity": "pos"},
- "pact:sg:acc.inst:f:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "imp", "Polarity": "neg"},
- "pact:sg:acc.inst:f:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "pos"},
- "pact:sg:acc.inst:f:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "neg"},
- "pact:sg:acc:m1.m2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "imp", "Polarity": "pos"},
- "pact:sg:acc:m1.m2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "imp", "Polarity": "neg"},
- "pact:sg:acc:m1.m2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "imp|perf", "Polarity": "pos"},
- "pact:sg:acc:m1.m2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "imp|perf", "Polarity": "neg"},
- "pact:sg:acc:m3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "imp", "Polarity": "pos"},
- "pact:sg:acc:m3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "imp", "Polarity": "neg"},
- "pact:sg:acc:m3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "imp|perf", "Polarity": "pos"},
- "pact:sg:acc:m3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "imp|perf", "Polarity": "neg"},
- "pact:sg:dat:m1.m2.m3.n1.n2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "pos"},
- "pact:sg:dat:m1.m2.m3.n1.n2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "neg"},
- "pact:sg:dat:m1.m2.m3.n1.n2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "pact:sg:dat:m1.m2.m3.n1.n2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "pact:sg:gen.dat.loc:f:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "imp", "Polarity": "pos"},
- "pact:sg:gen.dat.loc:f:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "imp", "Polarity": "neg"},
- "pact:sg:gen.dat.loc:f:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "pos"},
- "pact:sg:gen.dat.loc:f:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "neg"},
- "pact:sg:gen:m1.m2.m3.n1.n2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "pos"},
- "pact:sg:gen:m1.m2.m3.n1.n2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "neg"},
- "pact:sg:gen:m1.m2.m3.n1.n2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "pact:sg:gen:m1.m2.m3.n1.n2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "pos"},
- "pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "neg"},
- "pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "pact:sg:nom.acc.voc:n1.n2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"},
- "pact:sg:nom.acc.voc:n1.n2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"},
- "pact:sg:nom.acc.voc:n1.n2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "pact:sg:nom.acc.voc:n1.n2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "pact:sg:nom.voc:f:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "imp", "Polarity": "pos"},
- "pact:sg:nom.voc:f:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "imp", "Polarity": "neg"},
- "pact:sg:nom.voc:f:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "pos"},
- "pact:sg:nom.voc:f:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "neg"},
- "pact:sg:nom.voc:m1.m2.m3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "imp", "Polarity": "pos"},
- "pact:sg:nom.voc:m1.m2.m3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "imp", "Polarity": "neg"},
- "pact:sg:nom.voc:m1.m2.m3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "imp|perf", "Polarity": "pos"},
- "pact:sg:nom.voc:m1.m2.m3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "act", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "imp|perf", "Polarity": "neg"},
- "pant:perf": {POS: VERB, "Tense": "past", "VerbForm": "conv", "Aspect": "perf"},
- "pcon:imperf": {POS: VERB, "Tense": "pres", "VerbForm": "conv", "Aspect": "imp"},
- "ppas:pl:acc:m1.p1:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "imp", "Polarity": "pos"},
- "ppas:pl:acc:m1.p1:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "imp", "Polarity": "neg"},
- "ppas:pl:acc:m1.p1:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "imp|perf", "Polarity": "pos"},
- "ppas:pl:acc:m1.p1:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "imp|perf", "Polarity": "neg"},
- "ppas:pl:acc:m1.p1:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "perf", "Polarity": "pos"},
- "ppas:pl:acc:m1.p1:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "acc", "Gender": "masc", "Aspect": "perf", "Polarity": "neg"},
- "ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "pos"},
- "ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "neg"},
- "ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "perf", "Polarity": "pos"},
- "ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "dat", "Gender": "masc|fem|neut", "Aspect": "perf", "Polarity": "neg"},
- "ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "pos"},
- "ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "neg"},
- "ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "perf", "Polarity": "pos"},
- "ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "gen|loc", "Gender": "masc|fem|neut", "Aspect": "perf", "Polarity": "neg"},
- "ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "pos"},
- "ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "neg"},
- "ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "perf", "Polarity": "pos"},
- "ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "ins", "Gender": "masc|fem|neut", "Aspect": "perf", "Polarity": "neg"},
- "ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "pos"},
- "ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "imp", "Polarity": "neg"},
- "ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "perf", "Polarity": "pos"},
- "ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|acc|voc", "Gender": "masc|fem|neut", "Aspect": "perf", "Polarity": "neg"},
- "ppas:pl:nom.voc:m1.p1:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "imp", "Polarity": "pos"},
- "ppas:pl:nom.voc:m1.p1:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "imp", "Polarity": "neg"},
- "ppas:pl:nom.voc:m1.p1:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "imp|perf", "Polarity": "pos"},
- "ppas:pl:nom.voc:m1.p1:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "imp|perf", "Polarity": "neg"},
- "ppas:pl:nom.voc:m1.p1:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "perf", "Polarity": "pos"},
- "ppas:pl:nom.voc:m1.p1:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "plur", "Case": "nom|voc", "Gender": "masc", "Aspect": "perf", "Polarity": "neg"},
- "ppas:sg:acc.inst:f:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "imp", "Polarity": "pos"},
- "ppas:sg:acc.inst:f:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "imp", "Polarity": "neg"},
- "ppas:sg:acc.inst:f:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "pos"},
- "ppas:sg:acc.inst:f:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "neg"},
- "ppas:sg:acc.inst:f:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "perf", "Polarity": "pos"},
- "ppas:sg:acc.inst:f:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc|ins", "Gender": "fem", "Aspect": "perf", "Polarity": "neg"},
- "ppas:sg:acc:m1.m2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "imp", "Polarity": "pos"},
- "ppas:sg:acc:m1.m2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "imp", "Polarity": "neg"},
- "ppas:sg:acc:m1.m2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "imp|perf", "Polarity": "pos"},
- "ppas:sg:acc:m1.m2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "imp|perf", "Polarity": "neg"},
- "ppas:sg:acc:m1.m2:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "perf", "Polarity": "pos"},
- "ppas:sg:acc:m1.m2:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum|anim", "Aspect": "perf", "Polarity": "neg"},
- "ppas:sg:acc:m3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "imp", "Polarity": "pos"},
- "ppas:sg:acc:m3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "imp", "Polarity": "neg"},
- "ppas:sg:acc:m3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "imp|perf", "Polarity": "pos"},
- "ppas:sg:acc:m3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "imp|perf", "Polarity": "neg"},
- "ppas:sg:acc:m3:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "perf", "Polarity": "pos"},
- "ppas:sg:acc:m3:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan", "Aspect": "perf", "Polarity": "neg"},
- "ppas:sg:dat:m1.m2.m3.n1.n2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "pos"},
- "ppas:sg:dat:m1.m2.m3.n1.n2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "neg"},
- "ppas:sg:dat:m1.m2.m3.n1.n2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "ppas:sg:dat:m1.m2.m3.n1.n2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "ppas:sg:dat:m1.m2.m3.n1.n2:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "perf", "Polarity": "pos"},
- "ppas:sg:dat:m1.m2.m3.n1.n2:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "dat", "Gender": "masc|neut", "Aspect": "perf", "Polarity": "neg"},
- "ppas:sg:gen.dat.loc:f:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "imp", "Polarity": "pos"},
- "ppas:sg:gen.dat.loc:f:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "imp", "Polarity": "neg"},
- "ppas:sg:gen.dat.loc:f:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "pos"},
- "ppas:sg:gen.dat.loc:f:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "neg"},
- "ppas:sg:gen.dat.loc:f:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "perf", "Polarity": "pos"},
- "ppas:sg:gen.dat.loc:f:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen|dat|loc", "Gender": "fem", "Aspect": "perf", "Polarity": "neg"},
- "ppas:sg:gen:m1.m2.m3.n1.n2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "pos"},
- "ppas:sg:gen:m1.m2.m3.n1.n2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "neg"},
- "ppas:sg:gen:m1.m2.m3.n1.n2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "ppas:sg:gen:m1.m2.m3.n1.n2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "ppas:sg:gen:m1.m2.m3.n1.n2:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "perf", "Polarity": "pos"},
- "ppas:sg:gen:m1.m2.m3.n1.n2:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "gen", "Gender": "masc|neut", "Aspect": "perf", "Polarity": "neg"},
- "ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "pos"},
- "ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "imp", "Polarity": "neg"},
- "ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "ppas:sg:inst.loc:m1.m2.m3.n1.n2:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "perf", "Polarity": "pos"},
- "ppas:sg:inst.loc:m1.m2.m3.n1.n2:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "ins|loc", "Gender": "masc|neut", "Aspect": "perf", "Polarity": "neg"},
- "ppas:sg:nom.acc.voc:n1.n2:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "imp", "Polarity": "pos"},
- "ppas:sg:nom.acc.voc:n1.n2:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "imp", "Polarity": "neg"},
- "ppas:sg:nom.acc.voc:n1.n2:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "pos"},
- "ppas:sg:nom.acc.voc:n1.n2:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "imp|perf", "Polarity": "neg"},
- "ppas:sg:nom.acc.voc:n1.n2:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "perf", "Polarity": "pos"},
- "ppas:sg:nom.acc.voc:n1.n2:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|acc|voc", "Gender": "neut", "Aspect": "perf", "Polarity": "neg"},
- "ppas:sg:nom.voc:f:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "imp", "Polarity": "pos"},
- "ppas:sg:nom.voc:f:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "imp", "Polarity": "neg"},
- "ppas:sg:nom.voc:f:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "pos"},
- "ppas:sg:nom.voc:f:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "imp|perf", "Polarity": "neg"},
- "ppas:sg:nom.voc:f:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "perf", "Polarity": "pos"},
- "ppas:sg:nom.voc:f:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "fem", "Aspect": "perf", "Polarity": "neg"},
- "ppas:sg:nom.voc:m1.m2.m3:imperf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "imp", "Polarity": "pos"},
- "ppas:sg:nom.voc:m1.m2.m3:imperf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "imp", "Polarity": "neg"},
- "ppas:sg:nom.voc:m1.m2.m3:imperf.perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "imp|perf", "Polarity": "pos"},
- "ppas:sg:nom.voc:m1.m2.m3:imperf.perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "imp|perf", "Polarity": "neg"},
- "ppas:sg:nom.voc:m1.m2.m3:perf:aff": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "perf", "Polarity": "pos"},
- "ppas:sg:nom.voc:m1.m2.m3:perf:neg": {POS: VERB, "VerbForm": "part", "Voice": "pass", "Number": "sing", "Case": "nom|voc", "Gender": "Masc", "Aspect": "perf", "Polarity": "neg"},
- "ppron12:pl:acc:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "acc", "Person": "one"},
- "ppron12:pl:acc:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "acc", "Person": "two"},
- "ppron12:pl:dat:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "dat", "Person": "one"},
- "ppron12:pl:dat:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "dat", "Person": "two"},
- "ppron12:pl:gen:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "gen", "Person": "one"},
- "ppron12:pl:gen:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "gen", "Person": "two"},
- "ppron12:pl:inst:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "ins", "Person": "one"},
- "ppron12:pl:inst:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "ins", "Person": "two"},
- "ppron12:pl:loc:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "loc", "Person": "one"},
- "ppron12:pl:loc:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "loc", "Person": "two"},
- "ppron12:pl:nom:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "nom", "Person": "one"},
- "ppron12:pl:nom:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "nom", "Person": "two"},
- "ppron12:pl:voc:_:pri": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "voc", "Person": "one"},
- "ppron12:pl:voc:_:sec": {POS: PRON, "PronType": "prs", "Number": "plur", "Case": "voc", "Person": "two"},
- "ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "acc", "Gender": "masc|fem|neut", "Person": "one", },
- "ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "acc", "Gender": "masc|fem|neut", "Person": "one", },
- "ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "acc", "Gender": "masc|fem|neut", "Person": "two", },
- "ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "acc", "Gender": "masc|fem|neut", "Person": "two", },
- "ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "dat", "Gender": "masc|fem|neut", "Person": "one", },
- "ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "dat", "Gender": "masc|fem|neut", "Person": "one", },
- "ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "dat", "Gender": "masc|fem|neut", "Person": "two", },
- "ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "dat", "Gender": "masc|fem|neut", "Person": "two", },
- "ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "gen", "Gender": "masc|fem|neut", "Person": "one", },
- "ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "gen", "Gender": "masc|fem|neut", "Person": "one", },
- "ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:akc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "gen", "Gender": "masc|fem|neut", "Person": "two", },
- "ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:nakc": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "gen", "Gender": "masc|fem|neut", "Person": "two", },
- "ppron12:sg:inst:m1.m2.m3.f.n1.n2:pri": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "ins", "Gender": "masc|fem|neut", "Person": "one"},
- "ppron12:sg:inst:m1.m2.m3.f.n1.n2:sec": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "ins", "Gender": "masc|fem|neut", "Person": "two"},
- "ppron12:sg:loc:m1.m2.m3.f.n1.n2:pri": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "loc", "Gender": "masc|fem|neut", "Person": "one"},
- "ppron12:sg:loc:m1.m2.m3.f.n1.n2:sec": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "loc", "Gender": "masc|fem|neut", "Person": "two"},
- "ppron12:sg:nom:m1.m2.m3.f.n1.n2:pri": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "nom", "Gender": "masc|fem|neut", "Person": "one"},
- "ppron12:sg:nom:m1.m2.m3.f.n1.n2:sec": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "nom", "Gender": "masc|fem|neut", "Person": "two"},
- "ppron12:sg:voc:m1.m2.m3.f.n1.n2:pri": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "voc", "Gender": "masc|fem|neut", "Person": "one"},
- "ppron12:sg:voc:m1.m2.m3.f.n1.n2:sec": {POS: PRON, "PronType": "prs", "Number": "sing", "Case": "voc", "Gender": "masc|fem|neut", "Person": "two"},
- "ppron3:pl:acc:m1.p1:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "acc", "Gender": "masc", "Person": "three", "PrepCase": "npr"},
- "ppron3:pl:acc:m1.p1:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "acc", "Gender": "masc", "Person": "three", "PrepCase": "pre"},
- "ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "acc", "Gender": "masc|fem|neut", "Person": "three", "PrepCase": "npr"},
- "ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "acc", "Gender": "masc|fem|neut", "Person": "three", "PrepCase": "pre"},
- "ppron3:pl:dat:_:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "dat", "Person": "three", "PrepCase": "npr"},
- "ppron3:pl:dat:_:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "dat", "Person": "three", "PrepCase": "pre"},
- "ppron3:pl:gen:_:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "gen", "Person": "three", "PrepCase": "npr"},
- "ppron3:pl:gen:_:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "gen", "Person": "three", "PrepCase": "pre"},
- "ppron3:pl:inst:_:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "ins", "Person": "three"},
- "ppron3:pl:loc:_:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "loc", "Person": "three"},
- "ppron3:pl:nom:m1.p1:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "nom", "Gender": "masc", "Person": "three"},
- "ppron3:pl:nom:m2.m3.f.n1.n2.p2.p3:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "plur", "Case": "nom", "Gender": "masc|fem|neut", "Person": "three"},
- "ppron3:sg:acc:f:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "acc", "Gender": "fem", "Person": "three", "PrepCase": "npr"},
- "ppron3:sg:acc:f:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "acc", "Gender": "fem", "Person": "three", "PrepCase": "pre"},
- "ppron3:sg:acc:m1.m2.m3:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "acc", "Gender": "Masc", "Person": "three", "PrepCase": "npr"},
- "ppron3:sg:acc:m1.m2.m3:ter:akc:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "acc", "Gender": "Masc", "Person": "three", "PrepCase": "pre"},
- "ppron3:sg:acc:m1.m2.m3:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "acc", "Gender": "Masc", "Person": "three", "PrepCase": "npr"},
- "ppron3:sg:acc:m1.m2.m3:ter:nakc:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "acc", "Gender": "Masc", "Person": "three", "PrepCase": "pre"},
- "ppron3:sg:acc:n1.n2:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "acc", "Gender": "neut", "Person": "three", "PrepCase": "npr"},
- "ppron3:sg:acc:n1.n2:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "acc", "Gender": "neut", "Person": "three", "PrepCase": "pre"},
- "ppron3:sg:dat:f:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "dat", "Gender": "fem", "Person": "three", "PrepCase": "npr"},
- "ppron3:sg:dat:f:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "dat", "Gender": "fem", "Person": "three", "PrepCase": "pre"},
- "ppron3:sg:dat:m1.m2.m3:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "dat", "Gender": "Masc", "Person": "three", "PrepCase": "npr"},
- "ppron3:sg:dat:m1.m2.m3:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "dat", "Gender": "Masc", "Person": "three", "PrepCase": "npr"},
- "ppron3:sg:dat:m1.m2.m3:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "dat", "Gender": "Masc", "Person": "three", "PrepCase": "pre"},
- "ppron3:sg:dat:n1.n2:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "dat", "Gender": "neut", "Person": "three", "PrepCase": "npr"},
- "ppron3:sg:dat:n1.n2:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "dat", "Gender": "neut", "Person": "three", "PrepCase": "npr"},
- "ppron3:sg:dat:n1.n2:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "dat", "Gender": "neut", "Person": "three", "PrepCase": "pre"},
- "ppron3:sg:gen:f:ter:_:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "gen", "Gender": "fem", "Person": "three", "PrepCase": "npr"},
- "ppron3:sg:gen:f:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "gen", "Gender": "fem", "Person": "three", "PrepCase": "pre"},
- "ppron3:sg:gen:m1.m2.m3:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "gen", "Gender": "Masc", "Person": "three", "PrepCase": "npr"},
- "ppron3:sg:gen:m1.m2.m3:ter:akc:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "gen", "Gender": "Masc", "Person": "three", "PrepCase": "pre"},
- "ppron3:sg:gen:m1.m2.m3:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "gen", "Gender": "Masc", "Person": "three", "PrepCase": "npr"},
- "ppron3:sg:gen:m1.m2.m3:ter:nakc:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "gen", "Gender": "Masc", "Person": "three", "PrepCase": "pre"},
- "ppron3:sg:gen:n1.n2:ter:akc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "gen", "Gender": "neut", "Person": "three", "PrepCase": "npr"},
- "ppron3:sg:gen:n1.n2:ter:nakc:npraep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "gen", "Gender": "neut", "Person": "three", "PrepCase": "npr"},
- "ppron3:sg:gen:n1.n2:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "gen", "Gender": "neut", "Person": "three", "PrepCase": "pre"},
- "ppron3:sg:inst:f:ter:_:praep": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "ins", "Gender": "fem", "Person": "three", "PrepCase": "pre"},
- "ppron3:sg:inst:m1.m2.m3:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "ins", "Gender": "Masc", "Person": "three"},
- "ppron3:sg:inst:n1.n2:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "ins", "Gender": "neut", "Person": "three"},
- "ppron3:sg:loc:f:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "loc", "Gender": "fem", "Person": "three"},
- "ppron3:sg:loc:m1.m2.m3:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "loc", "Gender": "Masc", "Person": "three"},
- "ppron3:sg:loc:n1.n2:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "loc", "Gender": "neut", "Person": "three"},
- "ppron3:sg:nom:f:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "nom", "Gender": "fem", "Person": "three"},
- "ppron3:sg:nom:m1.m2.m3:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "nom", "Gender": "Masc", "Person": "three"},
- "ppron3:sg:nom:n1.n2:ter:_:_": {POS: PRON, "PronType": "prs", "Person": "three", "Number": "sing", "Case": "nom", "Gender": "neut", "Person": "three"},
- "praet:pl:m1.p1:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "plur", "Gender": "masc", "Aspect": "imp"},
- "praet:pl:m1.p1:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "plur", "Gender": "masc", "Aspect": "imp|perf"},
- "praet:pl:m1.p1:perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "plur", "Gender": "masc", "Aspect": "perf"},
- "praet:pl:m2.m3.f.n1.n2.p2.p3:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "plur", "Gender": "masc|fem|neut", "Aspect": "imp"},
- "praet:pl:m2.m3.f.n1.n2.p2.p3:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "plur", "Gender": "masc|fem|neut", "Aspect": "imp|perf"},
- "praet:pl:m2.m3.f.n1.n2.p2.p3:perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "plur", "Gender": "masc|fem|neut", "Aspect": "perf"},
- "praet:sg:f:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "fem", "Aspect": "imp"},
- "praet:sg:f:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "fem", "Aspect": "imp|perf"},
- "praet:sg:f:perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "fem", "Aspect": "perf"},
- "praet:sg:m1.m2.m3:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "Masc", "Aspect": "imp"},
- "praet:sg:m1.m2.m3:imperf:agl": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "Masc", "Aspect": "imp"},
- "praet:sg:m1.m2.m3:imperf:nagl": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "Masc", "Aspect": "imp"},
- "praet:sg:m1.m2.m3:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "Masc", "Aspect": "imp|perf"},
- "praet:sg:m1.m2.m3:perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "Masc", "Aspect": "perf"},
- "praet:sg:m1.m2.m3:perf:agl": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "Masc", "Aspect": "perf"},
- "praet:sg:m1.m2.m3:perf:nagl": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "Masc", "Aspect": "perf"},
- "praet:sg:n1.n2:imperf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "neut", "Aspect": "imp"},
- "praet:sg:n1.n2:imperf.perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "neut", "Aspect": "imp|perf"},
- "praet:sg:n1.n2:perf": {POS: VERB, "VerbForm": "fin", "Tense": "past", "Number": "sing", "Gender": "neut", "Aspect": "perf"},
- "pred": {POS: VERB},
- "prep:acc": {POS: ADP, "AdpType": "prep", "Case": "acc"},
- "prep:acc:nwok": {POS: ADP, "AdpType": "prep", "Case": "acc", },
- "prep:acc:wok": {POS: ADP, "AdpType": "prep", "Case": "acc", },
- "prep:dat": {POS: ADP, "AdpType": "prep", "Case": "dat"},
- "prep:gen": {POS: ADP, "AdpType": "prep", "Case": "gen"},
- "prep:gen:nwok": {POS: ADP, "AdpType": "prep", "Case": "gen", },
- "prep:gen:wok": {POS: ADP, "AdpType": "prep", "Case": "gen", },
- "prep:inst": {POS: ADP, "AdpType": "prep", "Case": "ins"},
- "prep:inst:nwok": {POS: ADP, "AdpType": "prep", "Case": "ins", },
- "prep:inst:wok": {POS: ADP, "AdpType": "prep", "Case": "ins", },
- "prep:loc": {POS: ADP, "AdpType": "prep", "Case": "loc"},
- "prep:loc:nwok": {POS: ADP, "AdpType": "prep", "Case": "loc", },
- "prep:loc:wok": {POS: ADP, "AdpType": "prep", "Case": "loc", },
- "prep:nom": {POS: ADP, "AdpType": "prep", "Case": "nom"},
- "qub": {POS: PART},
- "subst:pl:acc:f": {POS: NOUN, "Number": "plur", "Case": "acc", "Gender": "fem"},
- "subst:pl:acc:m1": {POS: NOUN, "Number": "plur", "Case": "acc", "Gender": "Masc", "Animacy": "hum"},
- "subst:pl:acc:m2": {POS: NOUN, "Number": "plur", "Case": "acc", "Gender": "masc", "Animacy": "anim"},
- "subst:pl:acc:m3": {POS: NOUN, "Number": "plur", "Case": "acc", "Gender": "masc", "Animacy": "inan"},
- "subst:pl:acc:n1": {POS: NOUN, "Number": "plur", "Case": "acc", "Gender": "neut"},
- "subst:pl:acc:n2": {POS: NOUN, "Number": "plur", "Case": "acc", "Gender": "neut"},
- "subst:pl:acc:p1": {POS: NOUN, "Number": "plur", "Case": "acc", "Person": "one"},
- "subst:pl:acc:p2": {POS: NOUN, "Number": "plur", "Case": "acc", "Person": "two"},
- "subst:pl:acc:p3": {POS: NOUN, "Number": "plur", "Case": "acc", "Person": "three"},
- "subst:pl:dat:f": {POS: NOUN, "Number": "plur", "Case": "dat", "Gender": "fem"},
- "subst:pl:dat:m1": {POS: NOUN, "Number": "plur", "Case": "dat", "Gender": "Masc", "Animacy": "hum"},
- "subst:pl:dat:m2": {POS: NOUN, "Number": "plur", "Case": "dat", "Gender": "masc", "Animacy": "anim"},
- "subst:pl:dat:m3": {POS: NOUN, "Number": "plur", "Case": "dat", "Gender": "masc", "Animacy": "inan"},
- "subst:pl:dat:n1": {POS: NOUN, "Number": "plur", "Case": "dat", "Gender": "neut"},
- "subst:pl:dat:n2": {POS: NOUN, "Number": "plur", "Case": "dat", "Gender": "neut"},
- "subst:pl:dat:p1": {POS: NOUN, "Number": "plur", "Case": "dat", "Person": "one"},
- "subst:pl:dat:p2": {POS: NOUN, "Number": "plur", "Case": "dat", "Person": "two"},
- "subst:pl:dat:p3": {POS: NOUN, "Number": "plur", "Case": "dat", "Person": "three"},
- "subst:pl:gen:f": {POS: NOUN, "Number": "plur", "Case": "gen", "Gender": "fem"},
- "subst:pl:gen:m1": {POS: NOUN, "Number": "plur", "Case": "gen", "Gender": "Masc", "Animacy": "hum"},
- "subst:pl:gen:m2": {POS: NOUN, "Number": "plur", "Case": "gen", "Gender": "masc", "Animacy": "anim"},
- "subst:pl:gen:m3": {POS: NOUN, "Number": "plur", "Case": "gen", "Gender": "masc", "Animacy": "inan"},
- "subst:pl:gen:n1": {POS: NOUN, "Number": "plur", "Case": "gen", "Gender": "neut"},
- "subst:pl:gen:n2": {POS: NOUN, "Number": "plur", "Case": "gen", "Gender": "neut"},
- "subst:pl:gen:p1": {POS: NOUN, "Number": "plur", "Case": "gen", "Person": "one"},
- "subst:pl:gen:p2": {POS: NOUN, "Number": "plur", "Case": "gen", "Person": "two"},
- "subst:pl:gen:p3": {POS: NOUN, "Number": "plur", "Case": "gen", "Person": "three"},
- "subst:pl:inst:f": {POS: NOUN, "Number": "plur", "Case": "ins", "Gender": "fem"},
- "subst:pl:inst:m1": {POS: NOUN, "Number": "plur", "Case": "ins", "Gender": "Masc", "Animacy": "hum"},
- "subst:pl:inst:m2": {POS: NOUN, "Number": "plur", "Case": "ins", "Gender": "masc", "Animacy": "anim"},
- "subst:pl:inst:m3": {POS: NOUN, "Number": "plur", "Case": "ins", "Gender": "masc", "Animacy": "inan"},
- "subst:pl:inst:n1": {POS: NOUN, "Number": "plur", "Case": "ins", "Gender": "neut"},
- "subst:pl:inst:n2": {POS: NOUN, "Number": "plur", "Case": "ins", "Gender": "neut"},
- "subst:pl:inst:p1": {POS: NOUN, "Number": "plur", "Case": "ins", "Person": "one"},
- "subst:pl:inst:p2": {POS: NOUN, "Number": "plur", "Case": "ins", "Person": "two"},
- "subst:pl:inst:p3": {POS: NOUN, "Number": "plur", "Case": "ins", "Person": "three"},
- "subst:pl:loc:f": {POS: NOUN, "Number": "plur", "Case": "loc", "Gender": "fem"},
- "subst:pl:loc:m1": {POS: NOUN, "Number": "plur", "Case": "loc", "Gender": "Masc", "Animacy": "hum"},
- "subst:pl:loc:m2": {POS: NOUN, "Number": "plur", "Case": "loc", "Gender": "masc", "Animacy": "anim"},
- "subst:pl:loc:m3": {POS: NOUN, "Number": "plur", "Case": "loc", "Gender": "masc", "Animacy": "inan"},
- "subst:pl:loc:n1": {POS: NOUN, "Number": "plur", "Case": "loc", "Gender": "neut"},
- "subst:pl:loc:n2": {POS: NOUN, "Number": "plur", "Case": "loc", "Gender": "neut"},
- "subst:pl:loc:p1": {POS: NOUN, "Number": "plur", "Case": "loc", "Person": "one"},
- "subst:pl:loc:p2": {POS: NOUN, "Number": "plur", "Case": "loc", "Person": "two"},
- "subst:pl:loc:p3": {POS: NOUN, "Number": "plur", "Case": "loc", "Person": "three"},
- "subst:pl:nom:f": {POS: NOUN, "Number": "plur", "Case": "nom", "Gender": "fem"},
- "subst:pl:nom:m1": {POS: NOUN, "Number": "plur", "Case": "nom", "Gender": "Masc", "Animacy": "hum"},
- "subst:pl:nom:m2": {POS: NOUN, "Number": "plur", "Case": "nom", "Gender": "masc", "Animacy": "anim"},
- "subst:pl:nom:m3": {POS: NOUN, "Number": "plur", "Case": "nom", "Gender": "masc", "Animacy": "inan"},
- "subst:pl:nom:n1": {POS: NOUN, "Number": "plur", "Case": "nom", "Gender": "neut"},
- "subst:pl:nom:n2": {POS: NOUN, "Number": "plur", "Case": "nom", "Gender": "neut"},
- "subst:pl:nom:p1": {POS: NOUN, "Number": "plur", "Case": "nom", "Person": "one"},
- "subst:pl:nom:p2": {POS: NOUN, "Number": "plur", "Case": "nom", "Person": "two"},
- "subst:pl:nom:p3": {POS: NOUN, "Number": "plur", "Case": "nom", "Person": "three"},
- "subst:pl:voc:f": {POS: NOUN, "Number": "plur", "Case": "voc", "Gender": "fem"},
- "subst:pl:voc:m1": {POS: NOUN, "Number": "plur", "Case": "voc", "Gender": "Masc", "Animacy": "hum"},
- "subst:pl:voc:m2": {POS: NOUN, "Number": "plur", "Case": "voc", "Gender": "masc", "Animacy": "anim"},
- "subst:pl:voc:m3": {POS: NOUN, "Number": "plur", "Case": "voc", "Gender": "masc", "Animacy": "inan"},
- "subst:pl:voc:n1": {POS: NOUN, "Number": "plur", "Case": "voc", "Gender": "neut"},
- "subst:pl:voc:n2": {POS: NOUN, "Number": "plur", "Case": "voc", "Gender": "neut"},
- "subst:pl:voc:p1": {POS: NOUN, "Number": "plur", "Case": "voc", "Person": "one"},
- "subst:pl:voc:p2": {POS: NOUN, "Number": "plur", "Case": "voc", "Person": "two"},
- "subst:pl:voc:p3": {POS: NOUN, "Number": "plur", "Case": "voc", "Person": "three"},
- "subst:sg:acc:f": {POS: NOUN, "Number": "sing", "Case": "acc", "Gender": "fem"},
- "subst:sg:acc:m1": {POS: NOUN, "Number": "sing", "Case": "acc", "Gender": "Masc", "Animacy": "hum"},
- "subst:sg:acc:m2": {POS: NOUN, "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "anim"},
- "subst:sg:acc:m3": {POS: NOUN, "Number": "sing", "Case": "acc", "Gender": "masc", "Animacy": "inan"},
- "subst:sg:acc:n1": {POS: NOUN, "Number": "sing", "Case": "acc", "Gender": "neut"},
- "subst:sg:acc:n2": {POS: NOUN, "Number": "sing", "Case": "acc", "Gender": "neut"},
- "subst:sg:dat:f": {POS: NOUN, "Number": "sing", "Case": "dat", "Gender": "fem"},
- "subst:sg:dat:m1": {POS: NOUN, "Number": "sing", "Case": "dat", "Gender": "Masc", "Animacy": "hum"},
- "subst:sg:dat:m2": {POS: NOUN, "Number": "sing", "Case": "dat", "Gender": "masc", "Animacy": "anim"},
- "subst:sg:dat:m3": {POS: NOUN, "Number": "sing", "Case": "dat", "Gender": "masc", "Animacy": "inan"},
- "subst:sg:dat:n1": {POS: NOUN, "Number": "sing", "Case": "dat", "Gender": "neut"},
- "subst:sg:dat:n2": {POS: NOUN, "Number": "sing", "Case": "dat", "Gender": "neut"},
- "subst:sg:gen:f": {POS: NOUN, "Number": "sing", "Case": "gen", "Gender": "fem"},
- "subst:sg:gen:m1": {POS: NOUN, "Number": "sing", "Case": "gen", "Gender": "Masc", "Animacy": "hum"},
- "subst:sg:gen:m2": {POS: NOUN, "Number": "sing", "Case": "gen", "Gender": "masc", "Animacy": "anim"},
- "subst:sg:gen:m3": {POS: NOUN, "Number": "sing", "Case": "gen", "Gender": "masc", "Animacy": "inan"},
- "subst:sg:gen:n1": {POS: NOUN, "Number": "sing", "Case": "gen", "Gender": "neut"},
- "subst:sg:gen:n2": {POS: NOUN, "Number": "sing", "Case": "gen", "Gender": "neut"},
- "subst:sg:inst:f": {POS: NOUN, "Number": "sing", "Case": "ins", "Gender": "fem"},
- "subst:sg:inst:m1": {POS: NOUN, "Number": "sing", "Case": "ins", "Gender": "Masc", "Animacy": "hum"},
- "subst:sg:inst:m2": {POS: NOUN, "Number": "sing", "Case": "ins", "Gender": "masc", "Animacy": "anim"},
- "subst:sg:inst:m3": {POS: NOUN, "Number": "sing", "Case": "ins", "Gender": "masc", "Animacy": "inan"},
- "subst:sg:inst:n1": {POS: NOUN, "Number": "sing", "Case": "ins", "Gender": "neut"},
- "subst:sg:inst:n2": {POS: NOUN, "Number": "sing", "Case": "ins", "Gender": "neut"},
- "subst:sg:loc:f": {POS: NOUN, "Number": "sing", "Case": "loc", "Gender": "fem"},
- "subst:sg:loc:m1": {POS: NOUN, "Number": "sing", "Case": "loc", "Gender": "Masc", "Animacy": "hum"},
- "subst:sg:loc:m2": {POS: NOUN, "Number": "sing", "Case": "loc", "Gender": "masc", "Animacy": "anim"},
- "subst:sg:loc:m3": {POS: NOUN, "Number": "sing", "Case": "loc", "Gender": "masc", "Animacy": "inan"},
- "subst:sg:loc:n1": {POS: NOUN, "Number": "sing", "Case": "loc", "Gender": "neut"},
- "subst:sg:loc:n2": {POS: NOUN, "Number": "sing", "Case": "loc", "Gender": "neut"},
- "subst:sg:nom:f": {POS: NOUN, "Number": "sing", "Case": "nom", "Gender": "fem"},
- "subst:sg:nom:m1": {POS: NOUN, "Number": "sing", "Case": "nom", "Gender": "Masc", "Animacy": "hum"},
- "subst:sg:nom:m2": {POS: NOUN, "Number": "sing", "Case": "nom", "Gender": "masc", "Animacy": "anim"},
- "subst:sg:nom:m3": {POS: NOUN, "Number": "sing", "Case": "nom", "Gender": "masc", "Animacy": "inan"},
- "subst:sg:nom:n1": {POS: NOUN, "Number": "sing", "Case": "nom", "Gender": "neut"},
- "subst:sg:nom:n2": {POS: NOUN, "Number": "sing", "Case": "nom", "Gender": "neut"},
- "subst:sg:voc:f": {POS: NOUN, "Number": "sing", "Case": "voc", "Gender": "fem"},
- "subst:sg:voc:m1": {POS: NOUN, "Number": "sing", "Case": "voc", "Gender": "Masc", "Animacy": "hum"},
- "subst:sg:voc:m2": {POS: NOUN, "Number": "sing", "Case": "voc", "Gender": "masc", "Animacy": "anim"},
- "subst:sg:voc:m3": {POS: NOUN, "Number": "sing", "Case": "voc", "Gender": "masc", "Animacy": "inan"},
- "subst:sg:voc:n1": {POS: NOUN, "Number": "sing", "Case": "voc", "Gender": "neut"},
- "subst:sg:voc:n2": {POS: NOUN, "Number": "sing", "Case": "voc", "Gender": "neut"},
- "winien:pl:m1.p1:imperf": {POS: ADJ, "Number": "plur", "Gender": "masc", "Aspect": "imp"},
- "winien:pl:m2.m3.f.n1.n2.p2.p3:imperf": {POS: ADJ, "Number": "plur", "Gender": "masc|fem|neut", "Aspect": "imp"},
- "winien:sg:f:imperf": {POS: ADJ, "Number": "sing", "Gender": "fem", "Aspect": "imp"},
- "winien:sg:m1.m2.m3:imperf": {POS: ADJ, "Number": "sing", "Gender": "Masc", "Aspect": "imp"},
- "winien:sg:n1.n2:imperf": {POS: ADJ, "Number": "sing", "Gender": "neut", "Aspect": "imp"},
- # UD
- "ADJ__Animacy=Hum|Aspect=Imp|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Hum|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Hum|Aspect=Imp|Case=Dat|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Dat|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Hum|Aspect=Imp|Case=Dat|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Dat|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Hum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Hum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Hum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Hum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Hum|Aspect=Imp|Case=Ins|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Ins|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Hum|Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Hum|Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Hum|Aspect=Imp|Case=Loc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Loc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Hum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Hum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Hum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Hum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Hum|Aspect=Perf|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Hum|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Hum|Aspect=Perf|Case=Dat|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Dat|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Hum|Aspect=Perf|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Hum|Aspect=Perf|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Hum|Aspect=Perf|Case=Ins|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Ins|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Hum|Aspect=Perf|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Hum|Aspect=Perf|Case=Loc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Loc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Hum|Aspect=Perf|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Hum|Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Hum|Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Hum|Case=Acc|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Acc|Degree=Pos|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Hum|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Hum|Case=Acc|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Acc|Degree=Sup|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Hum|Case=Dat|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Dat|Degree=Pos|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Hum|Case=Dat|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Dat|Degree=Pos|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Hum|Case=Dat|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Dat|Degree=Sup|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Hum|Case=Gen|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Gen|Degree=Pos|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Hum|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Hum|Case=Gen|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Gen|Degree=Sup|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Hum|Case=Gen|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Gen|Degree=Sup|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Hum|Case=Ins|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Ins|Degree=Pos|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Hum|Case=Ins|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Ins|Degree=Pos|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Hum|Case=Ins|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Ins|Degree=Sup|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Hum|Case=Ins|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Ins|Degree=Sup|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Hum|Case=Loc|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Loc|Degree=Pos|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Hum|Case=Loc|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Loc|Degree=Pos|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Hum|Case=Loc|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Loc|Degree=Sup|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Hum|Case=Nom|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Nom|Degree=Pos|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Hum|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Hum|Case=Nom|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Nom|Degree=Sup|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Hum|Case=Nom|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Nom|Degree=Sup|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Dat|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Dat|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Ins|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Ins|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Loc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Inan|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Inan|Aspect=Perf|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Perf|Case=Gen|Gender=Masc|Number=Plur|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Gen|Gender=Masc|Number=Plur|Polarity=Neg|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Perf|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Perf|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Perf|Case=Ins|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Ins|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Perf|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Ins|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Perf|Case=Loc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Loc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Perf|Case=Loc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Loc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Perf|Case=Nom|Gender=Masc|Number=Plur|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Nom|Gender=Masc|Number=Plur|Polarity=Neg|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Perf|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Inan|Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Inan|Case=Acc|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Degree=Sup|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Inan|Case=Acc|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Degree=Sup|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Inan|Case=Dat|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Dat|Degree=Pos|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Inan|Case=Dat|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Dat|Degree=Pos|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Inan|Case=Gen|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Degree=Pos|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Inan|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Inan|Case=Gen|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Degree=Sup|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Inan|Case=Gen|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Degree=Sup|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Inan|Case=Ins|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Degree=Pos|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Inan|Case=Ins|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Degree=Pos|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Inan|Case=Ins|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Degree=Sup|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Inan|Case=Ins|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Degree=Sup|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Degree=Pos|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Inan|Case=Loc|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Degree=Sup|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Inan|Case=Loc|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Degree=Sup|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Inan|Case=Nom|Degree=Sup|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Sup|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Inan|Case=Nom|Degree=Sup|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Degree=Sup|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Nhum|Aspect=Imp|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Imp|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Nhum|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Imp|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Nhum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Imp|Case=Gen|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Nhum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Nhum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Imp|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Animacy=Nhum|Aspect=Imp|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Imp|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Nhum|Aspect=Perf|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Perf|Case=Acc|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Nhum|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Perf|Case=Acc|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Nhum|Aspect=Perf|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Perf|Case=Gen|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Nhum|Aspect=Perf|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Perf|Case=Nom|Gender=Masc|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Nhum|Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Animacy=Nhum|Aspect=Perf|Case=Nom|Gender=Masc|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Animacy=Nhum|Case=Acc|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Nhum|Case=Acc|Degree=Pos|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Nhum|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Nhum|Case=Acc|Degree=Pos|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Nhum|Case=Gen|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Nhum|Case=Gen|Degree=Pos|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Nhum|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Nhum|Case=Gen|Degree=Pos|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Nhum|Case=Ins|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Nhum|Case=Ins|Degree=Pos|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Nhum|Case=Ins|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Nhum|Case=Ins|Degree=Pos|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Nhum|Case=Loc|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Nhum|Case=Loc|Degree=Pos|Gender=Masc|Number=Sing"},
- "ADJ__Animacy=Nhum|Case=Nom|Degree=Pos|Gender=Masc|Number=Plur": {POS: ADJ, "morph": "Animacy=Nhum|Case=Nom|Degree=Pos|Gender=Masc|Number=Plur"},
- "ADJ__Animacy=Nhum|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing": {POS: ADJ, "morph": "Animacy=Nhum|Case=Nom|Degree=Pos|Gender=Masc|Number=Sing"},
- "ADJ__Aspect=Imp|Case=Acc|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Acc|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Acc|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Acc|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Acc|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Imp|Case=Acc|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Acc|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Imp|Case=Dat|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Dat|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Dat|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Dat|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Imp|Case=Dat|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Dat|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Gen|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Gen|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Gen|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Gen|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Gen|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Imp|Case=Gen|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Gen|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Gen|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Gen|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Ins|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Imp|Case=Ins|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Ins|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Ins|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Ins|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Loc|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Loc|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Loc|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Loc|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Loc|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Loc|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Loc|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Loc|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Imp|Case=Nom|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Nom|Gender=Fem|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Nom|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Nom|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Nom|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Imp|Case=Nom|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Nom|Gender=Neut|Number=Plur|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Nom|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Nom|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act": {POS: ADJ, "morph": "Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|Tense=Pres|VerbForm=Part|Voice=Act"},
- "ADJ__Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Imp|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Aspect=Imp|Gender=Fem|Number=Plur"},
- "ADJ__Aspect=Imp|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Aspect=Imp|Gender=Fem|Number=Sing"},
- "ADJ__Aspect=Imp|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Aspect=Imp|Gender=Neut|Number=Plur"},
- "ADJ__Aspect=Imp|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Aspect=Imp|Gender=Neut|Number=Sing"},
- "ADJ__Aspect=Perf|Case=Acc|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Acc|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Acc|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Acc|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Acc|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Acc|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Dat|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Dat|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Dat|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Dat|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Dat|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Dat|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Gen|Gender=Fem|Number=Plur|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Gen|Gender=Fem|Number=Plur|Polarity=Neg|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Gen|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Gen|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Gen|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Gen|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Gen|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Gen|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Ins|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Ins|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Ins|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Ins|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Ins|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Ins|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Ins|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Ins|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Ins|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Ins|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Loc|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Loc|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Loc|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Loc|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Loc|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Loc|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Nom|Gender=Fem|Number=Plur|Polarity=Neg|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Nom|Gender=Fem|Number=Plur|Polarity=Neg|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Nom|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Nom|Gender=Fem|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Nom|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Nom|Gender=Fem|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Nom|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos": {POS: ADJ, "morph": "Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos"},
- "ADJ__Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass": {POS: ADJ, "morph": "Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Part|Voice=Pass"},
- "ADJ__Case=Acc|Degree=Pos|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Fem|Number=Plur"},
- "ADJ__Case=Acc|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Fem|Number=Sing"},
- "ADJ__Case=Acc|Degree=Pos|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Neut|Number=Plur"},
- "ADJ__Case=Acc|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Acc|Degree=Pos|Gender=Neut|Number=Sing"},
- "ADJ__Case=Acc|Degree=Sup|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Acc|Degree=Sup|Gender=Fem|Number=Plur"},
- "ADJ__Case=Acc|Degree=Sup|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Acc|Degree=Sup|Gender=Fem|Number=Sing"},
- "ADJ__Case=Acc|Degree=Sup|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Acc|Degree=Sup|Gender=Neut|Number=Plur"},
- "ADJ__Case=Acc|Degree=Sup|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Acc|Degree=Sup|Gender=Neut|Number=Sing"},
- "ADJ__Case=Acc|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Acc|Gender=Fem|Number=Plur"},
- "ADJ__Case=Acc|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Acc|Gender=Fem|Number=Sing"},
- "ADJ__Case=Acc|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Acc|Gender=Neut|Number=Plur"},
- "ADJ__Case=Acc|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Acc|Gender=Neut|Number=Sing"},
- "ADJ__Case=Dat|Degree=Pos|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Fem|Number=Plur"},
- "ADJ__Case=Dat|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Fem|Number=Sing"},
- "ADJ__Case=Dat|Degree=Pos|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Neut|Number=Plur"},
- "ADJ__Case=Dat|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Dat|Degree=Pos|Gender=Neut|Number=Sing"},
- "ADJ__Case=Dat|Degree=Sup|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Dat|Degree=Sup|Gender=Neut|Number=Plur"},
- "ADJ__Case=Gen|Degree=Pos|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Fem|Number=Plur"},
- "ADJ__Case=Gen|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Fem|Number=Sing"},
- "ADJ__Case=Gen|Degree=Pos|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Neut|Number=Plur"},
- "ADJ__Case=Gen|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Gen|Degree=Pos|Gender=Neut|Number=Sing"},
- "ADJ__Case=Gen|Degree=Sup|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Gen|Degree=Sup|Gender=Fem|Number=Plur"},
- "ADJ__Case=Gen|Degree=Sup|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Gen|Degree=Sup|Gender=Fem|Number=Sing"},
- "ADJ__Case=Gen|Degree=Sup|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Gen|Degree=Sup|Gender=Neut|Number=Plur"},
- "ADJ__Case=Gen|Degree=Sup|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Gen|Degree=Sup|Gender=Neut|Number=Sing"},
- "ADJ__Case=Gen|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Gen|Gender=Fem|Number=Plur"},
- "ADJ__Case=Gen|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Gen|Gender=Fem|Number=Sing"},
- "ADJ__Case=Gen|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Gen|Gender=Neut|Number=Plur"},
- "ADJ__Case=Gen|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Gen|Gender=Neut|Number=Sing"},
- "ADJ__Case=Ins|Degree=Pos|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Fem|Number=Plur"},
- "ADJ__Case=Ins|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Fem|Number=Sing"},
- "ADJ__Case=Ins|Degree=Pos|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Neut|Number=Plur"},
- "ADJ__Case=Ins|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Ins|Degree=Pos|Gender=Neut|Number=Sing"},
- "ADJ__Case=Ins|Degree=Sup|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Ins|Degree=Sup|Gender=Fem|Number=Sing"},
- "ADJ__Case=Ins|Degree=Sup|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Ins|Degree=Sup|Gender=Neut|Number=Plur"},
- "ADJ__Case=Ins|Degree=Sup|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Ins|Degree=Sup|Gender=Neut|Number=Sing"},
- "ADJ__Case=Ins|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Ins|Gender=Fem|Number=Plur"},
- "ADJ__Case=Ins|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Ins|Gender=Fem|Number=Sing"},
- "ADJ__Case=Ins|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Ins|Gender=Neut|Number=Sing"},
- "ADJ__Case=Loc|Degree=Pos|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Fem|Number=Plur"},
- "ADJ__Case=Loc|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Fem|Number=Sing"},
- "ADJ__Case=Loc|Degree=Pos|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Neut|Number=Plur"},
- "ADJ__Case=Loc|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Loc|Degree=Pos|Gender=Neut|Number=Sing"},
- "ADJ__Case=Loc|Degree=Sup|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Loc|Degree=Sup|Gender=Fem|Number=Plur"},
- "ADJ__Case=Loc|Degree=Sup|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Loc|Degree=Sup|Gender=Neut|Number=Plur"},
- "ADJ__Case=Loc|Degree=Sup|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Loc|Degree=Sup|Gender=Neut|Number=Sing"},
- "ADJ__Case=Loc|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Loc|Gender=Fem|Number=Plur"},
- "ADJ__Case=Loc|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Loc|Gender=Fem|Number=Sing"},
- "ADJ__Case=Loc|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Loc|Gender=Neut|Number=Plur"},
- "ADJ__Case=Loc|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Loc|Gender=Neut|Number=Sing"},
- "ADJ__Case=Nom|Degree=Pos|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Fem|Number=Plur"},
- "ADJ__Case=Nom|Degree=Pos|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Fem|Number=Sing"},
- "ADJ__Case=Nom|Degree=Pos|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Neut|Number=Plur"},
- "ADJ__Case=Nom|Degree=Pos|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Nom|Degree=Pos|Gender=Neut|Number=Sing"},
- "ADJ__Case=Nom|Degree=Sup|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Nom|Degree=Sup|Gender=Fem|Number=Plur"},
- "ADJ__Case=Nom|Degree=Sup|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Nom|Degree=Sup|Gender=Fem|Number=Sing"},
- "ADJ__Case=Nom|Degree=Sup|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Nom|Degree=Sup|Gender=Neut|Number=Plur"},
- "ADJ__Case=Nom|Degree=Sup|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Nom|Degree=Sup|Gender=Neut|Number=Sing"},
- "ADJ__Case=Nom|Gender=Fem|Number=Plur": {POS: ADJ, "morph": "Case=Nom|Gender=Fem|Number=Plur"},
- "ADJ__Case=Nom|Gender=Fem|Number=Sing": {POS: ADJ, "morph": "Case=Nom|Gender=Fem|Number=Sing"},
- "ADJ__Case=Nom|Gender=Neut|Number=Plur": {POS: ADJ, "morph": "Case=Nom|Gender=Neut|Number=Plur"},
- "ADJ__Case=Nom|Gender=Neut|Number=Sing": {POS: ADJ, "morph": "Case=Nom|Gender=Neut|Number=Sing"},
- "ADJ__Hyph=Yes": {POS: ADJ, "morph": "Hyph=Yes"},
- "ADJ__PrepCase=Pre": {POS: ADJ, "morph": "PrepCase=Pre"},
- "ADP__AdpType=Prep|Case=Acc": {POS: ADP, "morph": "AdpType=Prep|Case=Acc"},
- "ADP__AdpType=Prep|Case=Acc|Variant=Long": {POS: ADP, "morph": "AdpType=Prep|Case=Acc|Variant=Long"},
- "ADP__AdpType=Prep|Case=Acc|Variant=Short": {POS: ADP, "morph": "AdpType=Prep|Case=Acc|Variant=Short"},
- "ADP__AdpType=Prep|Case=Dat": {POS: ADP, "morph": "AdpType=Prep|Case=Dat"},
- "ADP__AdpType=Prep|Case=Gen": {POS: ADP, "morph": "AdpType=Prep|Case=Gen"},
- "ADP__AdpType=Prep|Case=Gen|Variant=Long": {POS: ADP, "morph": "AdpType=Prep|Case=Gen|Variant=Long"},
- "ADP__AdpType=Prep|Case=Gen|Variant=Short": {POS: ADP, "morph": "AdpType=Prep|Case=Gen|Variant=Short"},
- "ADP__AdpType=Prep|Case=Ins": {POS: ADP, "morph": "AdpType=Prep|Case=Ins"},
- "ADP__AdpType=Prep|Case=Ins|Variant=Long": {POS: ADP, "morph": "AdpType=Prep|Case=Ins|Variant=Long"},
- "ADP__AdpType=Prep|Case=Ins|Variant=Short": {POS: ADP, "morph": "AdpType=Prep|Case=Ins|Variant=Short"},
- "ADP__AdpType=Prep|Case=Loc": {POS: ADP, "morph": "AdpType=Prep|Case=Loc"},
- "ADP__AdpType=Prep|Case=Loc|Variant=Long": {POS: ADP, "morph": "AdpType=Prep|Case=Loc|Variant=Long"},
- "ADP__AdpType=Prep|Case=Loc|Variant=Short": {POS: ADP, "morph": "AdpType=Prep|Case=Loc|Variant=Short"},
- "ADP__AdpType=Prep|Case=Nom": {POS: ADP, "morph": "AdpType=Prep|Case=Nom"},
- "ADV___": {POS: ADV},
- "ADV__Degree=Pos": {POS: ADV, "morph": "Degree=Pos"},
- "ADV__Degree=Sup": {POS: ADV, "morph": "Degree=Sup"},
- "AUX___": {POS: AUX},
- "AUX__Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"},
- "AUX__Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "AUX__Animacy=Hum|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Hum|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"},
- "AUX__Animacy=Hum|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Hum|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "AUX__Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"},
- "AUX__Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "AUX__Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"},
- "AUX__Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "AUX__Animacy=Nhum|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Nhum|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "AUX__Animacy=Nhum|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Animacy=Nhum|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "AUX__Aspect=Imp|Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Aspect=Imp|Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"},
- "AUX__Aspect=Imp|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Aspect=Imp|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "AUX__Aspect=Imp|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Aspect=Imp|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"},
- "AUX__Aspect=Imp|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Aspect=Imp|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "AUX__Aspect=Imp|Mood=Cnd|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Cnd|VerbForm=Fin"},
- "AUX__Aspect=Imp|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin"},
- "AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Fut|VerbForm=Fin"},
- "AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|Variant=Short|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|Variant=Short|VerbForm=Fin"},
- "AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin"},
- "AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Tense=Pres|Variant=Short|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Tense=Pres|Variant=Short|VerbForm=Fin"},
- "AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin"},
- "AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Fut|VerbForm=Fin"},
- "AUX__Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin"},
- "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Fut|VerbForm=Fin"},
- "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|Variant=Long|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|Variant=Long|VerbForm=Fin"},
- "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|Variant=Short|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|Variant=Short|VerbForm=Fin"},
- "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin"},
- "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Fut|VerbForm=Fin"},
- "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|Variant=Long|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|Variant=Long|VerbForm=Fin"},
- "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|Variant=Short|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|Variant=Short|VerbForm=Fin"},
- "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin"},
- "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Fut|VerbForm=Fin"},
- "AUX__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin"},
- "AUX__Aspect=Imp|VerbForm=Inf": {POS: AUX, "morph": "Aspect=Imp|VerbForm=Inf"},
- "AUX__Aspect=Perf|Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Aspect=Perf|Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"},
- "AUX__Aspect=Perf|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Aspect=Perf|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "AUX__Aspect=Perf|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Aspect=Perf|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"},
- "AUX__Aspect=Perf|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: AUX, "morph": "Aspect=Perf|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "AUX__Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin"},
- "AUX__Aspect=Perf|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Perf|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin"},
- "AUX__Aspect=Perf|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Perf|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin"},
- "AUX__Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: AUX, "morph": "Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin"},
- "AUX__Aspect=Perf|VerbForm=Inf": {POS: AUX, "morph": "Aspect=Perf|VerbForm=Inf"},
- "CCONJ___": {POS: CCONJ},
- "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|PronType=Dem"},
- "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|PronType=Int,Rel"},
- "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|PronType=Tot"},
- "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Dem"},
- "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Ind": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Ind"},
- "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "DET__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Tot"},
- "DET__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|PronType=Dem"},
- "DET__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|PronType=Int,Rel"},
- "DET__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|PronType=Tot"},
- "DET__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "DET__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Tot"},
- "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|PronType=Dem"},
- "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|PronType=Int,Rel"},
- "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|PronType=Tot"},
- "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Dem"},
- "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Ind": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Ind"},
- "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "DET__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Neg": {POS: DET, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Neg"},
- "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|PronType=Dem"},
- "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|PronType=Int,Rel"},
- "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|PronType=Tot"},
- "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|PronType=Dem"},
- "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "DET__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|PronType=Tot"},
- "DET__Animacy=Hum|Case=Loc|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Plur|PronType=Tot"},
- "DET__Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|PronType=Dem"},
- "DET__Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Dem"},
- "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Ind": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Ind"},
- "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Int,Rel"},
- "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Tot"},
- "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Dem"},
- "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Ind": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Ind"},
- "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Neg": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Neg"},
- "DET__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Tot"},
- "DET__Animacy=Hum|Case=Voc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Hum|Case=Voc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Dem"},
- "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Ind"},
- "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Int,Rel"},
- "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Neg": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Neg"},
- "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|PronType=Tot"},
- "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Dem"},
- "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Ind"},
- "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Neg": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Neg"},
- "DET__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|PronType=Tot"},
- "DET__Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur|PronType=Dem"},
- "DET__Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur|PronType=Int,Rel"},
- "DET__Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing|PronType=Dem"},
- "DET__Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Dem"},
- "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Ind"},
- "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Int,Rel"},
- "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Neg": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Neg"},
- "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|PronType=Tot"},
- "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Dem"},
- "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Ind"},
- "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Neg": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Neg"},
- "DET__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|PronType=Tot"},
- "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur|PronType=Dem"},
- "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur|PronType=Int,Rel"},
- "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur|PronType=Tot"},
- "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|PronType=Dem"},
- "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|PronType=Neg": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|PronType=Neg"},
- "DET__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|PronType=Tot"},
- "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|PronType=Dem"},
- "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|PronType=Ind"},
- "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|PronType=Int,Rel"},
- "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|PronType=Tot"},
- "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|PronType=Dem"},
- "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|PronType=Ind"},
- "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "DET__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|PronType=Tot"},
- "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|PronType=Dem"},
- "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|PronType=Ind"},
- "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|PronType=Int,Rel"},
- "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|PronType=Tot"},
- "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Dem"},
- "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Ind": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Ind"},
- "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Neg": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Neg"},
- "DET__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|PronType=Tot"},
- "DET__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|PronType=Dem"},
- "DET__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|PronType=Tot"},
- "DET__Animacy=Nhum|Case=Dat|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Nhum|Case=Dat|Gender=Masc|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing|PronType=Tot": {POS: DET, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing|PronType=Tot"},
- "DET__Animacy=Nhum|Case=Ins|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Nhum|Case=Ins|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "DET__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur|PronType=Dem": {POS: DET, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur|PronType=Dem"},
- "DET__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur|PronType=Int,Rel"},
- "DET__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur|PronType=Tot": {POS: DET, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur|PronType=Tot"},
- "DET__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing|PronType=Dem": {POS: DET, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing|PronType=Dem"},
- "DET__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing|PronType=Ind": {POS: DET, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing|PronType=Ind"},
- "DET__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "DET__Case=Acc|Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Acc|Gender=Fem|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Case=Acc|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Case=Acc|Gender=Fem|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Case=Acc|Gender=Fem|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Plur|PronType=Dem"},
- "DET__Case=Acc|Gender=Fem|Number=Plur|PronType=Ind": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Plur|PronType=Ind"},
- "DET__Case=Acc|Gender=Fem|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Plur|PronType=Int,Rel"},
- "DET__Case=Acc|Gender=Fem|Number=Plur|PronType=Tot": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Plur|PronType=Tot"},
- "DET__Case=Acc|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Acc|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Case=Acc|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Acc|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Case=Acc|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Case=Acc|Gender=Fem|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Sing|PronType=Dem"},
- "DET__Case=Acc|Gender=Fem|Number=Sing|PronType=Ind": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Sing|PronType=Ind"},
- "DET__Case=Acc|Gender=Fem|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Sing|PronType=Int,Rel"},
- "DET__Case=Acc|Gender=Fem|Number=Sing|PronType=Tot": {POS: DET, "morph": "Case=Acc|Gender=Fem|Number=Sing|PronType=Tot"},
- "DET__Case=Acc|Gender=Neut|Number=Plur|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Plur|Number[psor]=Plur|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Case=Acc|Gender=Neut|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Case=Acc|Gender=Neut|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Case=Acc|Gender=Neut|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Plur|PronType=Dem"},
- "DET__Case=Acc|Gender=Neut|Number=Plur|PronType=Ind": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Plur|PronType=Ind"},
- "DET__Case=Acc|Gender=Neut|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Plur|PronType=Int,Rel"},
- "DET__Case=Acc|Gender=Neut|Number=Plur|PronType=Neg": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Plur|PronType=Neg"},
- "DET__Case=Acc|Gender=Neut|Number=Plur|PronType=Tot": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Plur|PronType=Tot"},
- "DET__Case=Acc|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Case=Acc|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Case=Acc|Gender=Neut|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Sing|PronType=Dem"},
- "DET__Case=Acc|Gender=Neut|Number=Sing|PronType=Ind": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Sing|PronType=Ind"},
- "DET__Case=Acc|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Sing|PronType=Int,Rel"},
- "DET__Case=Acc|Gender=Neut|Number=Sing|PronType=Tot": {POS: DET, "morph": "Case=Acc|Gender=Neut|Number=Sing|PronType=Tot"},
- "DET__Case=Dat|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Dat|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Case=Dat|Gender=Fem|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Dat|Gender=Fem|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Case=Dat|Gender=Fem|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Dat|Gender=Fem|Number=Plur|PronType=Dem"},
- "DET__Case=Dat|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Dat|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Dat|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Dat|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Dat|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Dat|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Case=Dat|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Dat|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Case=Dat|Gender=Fem|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Dat|Gender=Fem|Number=Sing|PronType=Dem"},
- "DET__Case=Dat|Gender=Fem|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Dat|Gender=Fem|Number=Sing|PronType=Int,Rel"},
- "DET__Case=Dat|Gender=Neut|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Dat|Gender=Neut|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Dat|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Dat|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Case=Dat|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Dat|Gender=Neut|Number=Sing|PronType=Int,Rel"},
- "DET__Case=Gen|Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Gen|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Case=Gen|Gender=Fem|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Case=Gen|Gender=Fem|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Plur|PronType=Dem"},
- "DET__Case=Gen|Gender=Fem|Number=Plur|PronType=Ind": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Plur|PronType=Ind"},
- "DET__Case=Gen|Gender=Fem|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Plur|PronType=Int,Rel"},
- "DET__Case=Gen|Gender=Fem|Number=Plur|PronType=Neg": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Plur|PronType=Neg"},
- "DET__Case=Gen|Gender=Fem|Number=Plur|PronType=Tot": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Plur|PronType=Tot"},
- "DET__Case=Gen|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Gen|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Gen|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Case=Gen|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Case=Gen|Gender=Fem|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Sing|PronType=Dem"},
- "DET__Case=Gen|Gender=Fem|Number=Sing|PronType=Ind": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Sing|PronType=Ind"},
- "DET__Case=Gen|Gender=Fem|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Sing|PronType=Int,Rel"},
- "DET__Case=Gen|Gender=Fem|Number=Sing|PronType=Neg": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Sing|PronType=Neg"},
- "DET__Case=Gen|Gender=Fem|Number=Sing|PronType=Tot": {POS: DET, "morph": "Case=Gen|Gender=Fem|Number=Sing|PronType=Tot"},
- "DET__Case=Gen|Gender=Neut|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Gen|Gender=Neut|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Case=Gen|Gender=Neut|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Case=Gen|Gender=Neut|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Plur|PronType=Dem"},
- "DET__Case=Gen|Gender=Neut|Number=Plur|PronType=Ind": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Plur|PronType=Ind"},
- "DET__Case=Gen|Gender=Neut|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Plur|PronType=Int,Rel"},
- "DET__Case=Gen|Gender=Neut|Number=Plur|PronType=Neg": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Plur|PronType=Neg"},
- "DET__Case=Gen|Gender=Neut|Number=Plur|PronType=Tot": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Plur|PronType=Tot"},
- "DET__Case=Gen|Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Gen|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Gen|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Case=Gen|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Case=Gen|Gender=Neut|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Dem"},
- "DET__Case=Gen|Gender=Neut|Number=Sing|PronType=Ind": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Ind"},
- "DET__Case=Gen|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Int,Rel"},
- "DET__Case=Gen|Gender=Neut|Number=Sing|PronType=Neg": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Neg"},
- "DET__Case=Gen|Gender=Neut|Number=Sing|PronType=Tot": {POS: DET, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Tot"},
- "DET__Case=Ins|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Case=Ins|Gender=Fem|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Plur|PronType=Dem"},
- "DET__Case=Ins|Gender=Fem|Number=Plur|PronType=Tot": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Plur|PronType=Tot"},
- "DET__Case=Ins|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Ins|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Ins|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Case=Ins|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Case=Ins|Gender=Fem|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Sing|PronType=Dem"},
- "DET__Case=Ins|Gender=Fem|Number=Sing|PronType=Ind": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Sing|PronType=Ind"},
- "DET__Case=Ins|Gender=Fem|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Sing|PronType=Int,Rel"},
- "DET__Case=Ins|Gender=Fem|Number=Sing|PronType=Neg": {POS: DET, "morph": "Case=Ins|Gender=Fem|Number=Sing|PronType=Neg"},
- "DET__Case=Ins|Gender=Neut|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Ins|Gender=Neut|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Case=Ins|Gender=Neut|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Plur|PronType=Dem"},
- "DET__Case=Ins|Gender=Neut|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Plur|PronType=Int,Rel"},
- "DET__Case=Ins|Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Ins|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Ins|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Case=Ins|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Case=Ins|Gender=Neut|Number=Sing|PronType=Ind": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Sing|PronType=Ind"},
- "DET__Case=Ins|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Ins|Gender=Neut|Number=Sing|PronType=Int,Rel"},
- "DET__Case=Loc|Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Loc|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Case=Loc|Gender=Fem|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Case=Loc|Gender=Fem|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Plur|PronType=Dem"},
- "DET__Case=Loc|Gender=Fem|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Plur|PronType=Int,Rel"},
- "DET__Case=Loc|Gender=Fem|Number=Plur|PronType=Neg": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Plur|PronType=Neg"},
- "DET__Case=Loc|Gender=Fem|Number=Plur|PronType=Tot": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Plur|PronType=Tot"},
- "DET__Case=Loc|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Loc|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Loc|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Case=Loc|Gender=Fem|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Sing|PronType=Dem"},
- "DET__Case=Loc|Gender=Fem|Number=Sing|PronType=Ind": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Sing|PronType=Ind"},
- "DET__Case=Loc|Gender=Fem|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Sing|PronType=Int,Rel"},
- "DET__Case=Loc|Gender=Fem|Number=Sing|PronType=Tot": {POS: DET, "morph": "Case=Loc|Gender=Fem|Number=Sing|PronType=Tot"},
- "DET__Case=Loc|Gender=Neut|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Plur|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Case=Loc|Gender=Neut|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Plur|PronType=Dem"},
- "DET__Case=Loc|Gender=Neut|Number=Plur|PronType=Ind": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Plur|PronType=Ind"},
- "DET__Case=Loc|Gender=Neut|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Plur|PronType=Int,Rel"},
- "DET__Case=Loc|Gender=Neut|Number=Plur|PronType=Tot": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Plur|PronType=Tot"},
- "DET__Case=Loc|Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Loc|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Loc|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Case=Loc|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Sing|Poss=Yes|PronType=Prs|Reflex=Yes"},
- "DET__Case=Loc|Gender=Neut|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Sing|PronType=Dem"},
- "DET__Case=Loc|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Sing|PronType=Int,Rel"},
- "DET__Case=Loc|Gender=Neut|Number=Sing|PronType=Tot": {POS: DET, "morph": "Case=Loc|Gender=Neut|Number=Sing|PronType=Tot"},
- "DET__Case=Nom|Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Nom|Gender=Fem|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Nom|Gender=Fem|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Case=Nom|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Case=Nom|Gender=Fem|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Plur|PronType=Dem"},
- "DET__Case=Nom|Gender=Fem|Number=Plur|PronType=Ind": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Plur|PronType=Ind"},
- "DET__Case=Nom|Gender=Fem|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Plur|PronType=Int,Rel"},
- "DET__Case=Nom|Gender=Fem|Number=Plur|PronType=Tot": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Plur|PronType=Tot"},
- "DET__Case=Nom|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Nom|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Nom|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Sing|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Case=Nom|Gender=Fem|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Sing|PronType=Dem"},
- "DET__Case=Nom|Gender=Fem|Number=Sing|PronType=Ind": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Sing|PronType=Ind"},
- "DET__Case=Nom|Gender=Fem|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Sing|PronType=Int,Rel"},
- "DET__Case=Nom|Gender=Fem|Number=Sing|PronType=Neg": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Sing|PronType=Neg"},
- "DET__Case=Nom|Gender=Fem|Number=Sing|PronType=Tot": {POS: DET, "morph": "Case=Nom|Gender=Fem|Number=Sing|PronType=Tot"},
- "DET__Case=Nom|Gender=Neut|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Plur|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Nom|Gender=Neut|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Plur|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Nom|Gender=Neut|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Plur|Number[psor]=Sing|Person=2|Poss=Yes|PronType=Prs"},
- "DET__Case=Nom|Gender=Neut|Number=Plur|NumType=Card|PronType=Ind": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Plur|NumType=Card|PronType=Ind"},
- "DET__Case=Nom|Gender=Neut|Number=Plur|PronType=Dem": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Plur|PronType=Dem"},
- "DET__Case=Nom|Gender=Neut|Number=Plur|PronType=Ind": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Plur|PronType=Ind"},
- "DET__Case=Nom|Gender=Neut|Number=Plur|PronType=Int,Rel": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Plur|PronType=Int,Rel"},
- "DET__Case=Nom|Gender=Neut|Number=Plur|PronType=Neg": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Plur|PronType=Neg"},
- "DET__Case=Nom|Gender=Neut|Number=Plur|PronType=Tot": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Plur|PronType=Tot"},
- "DET__Case=Nom|Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Sing|Number[psor]=Plur|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Nom|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Sing|Number[psor]=Sing|Person=1|Poss=Yes|PronType=Prs"},
- "DET__Case=Nom|Gender=Neut|Number=Sing|PronType=Dem": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Sing|PronType=Dem"},
- "DET__Case=Nom|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Sing|PronType=Int,Rel"},
- "DET__Case=Nom|Gender=Neut|Number=Sing|PronType=Neg": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Sing|PronType=Neg"},
- "DET__Case=Nom|Gender=Neut|Number=Sing|PronType=Tot": {POS: DET, "morph": "Case=Nom|Gender=Neut|Number=Sing|PronType=Tot"},
- "NOUN__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur"},
- "NOUN__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur"},
- "NOUN__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur"},
- "NOUN__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur"},
- "NOUN__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Hum|Case=Loc|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Plur"},
- "NOUN__Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur"},
- "NOUN__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Hum|Case=Voc|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Hum|Case=Voc|Gender=Masc|Number=Plur"},
- "NOUN__Animacy=Hum|Case=Voc|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Hum|Case=Voc|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur"},
- "NOUN__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur"},
- "NOUN__Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur"},
- "NOUN__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur"},
- "NOUN__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur"},
- "NOUN__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur"},
- "NOUN__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Plur"},
- "NOUN__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Nhum|Case=Dat|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Nhum|Case=Dat|Gender=Masc|Number=Plur"},
- "NOUN__Animacy=Nhum|Case=Dat|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Nhum|Case=Dat|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur"},
- "NOUN__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Nhum|Case=Ins|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Nhum|Case=Ins|Gender=Masc|Number=Plur"},
- "NOUN__Animacy=Nhum|Case=Ins|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Nhum|Case=Ins|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Nhum|Case=Loc|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Nhum|Case=Loc|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur"},
- "NOUN__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing": {POS: NOUN, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing"},
- "NOUN__Animacy=Nhum|Case=Voc|Gender=Masc|Number=Plur": {POS: NOUN, "morph": "Animacy=Nhum|Case=Voc|Gender=Masc|Number=Plur"},
- "NOUN__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun"},
- "NOUN__Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Imp|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"},
- "NOUN__Aspect=Imp|Case=Dat|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Imp|Case=Dat|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"},
- "NOUN__Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Imp|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"},
- "NOUN__Aspect=Imp|Case=Ins|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Imp|Case=Ins|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"},
- "NOUN__Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Imp|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"},
- "NOUN__Aspect=Imp|Case=Nom|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Imp|Case=Nom|Gender=Neut|Number=Plur|Polarity=Pos|VerbForm=Vnoun"},
- "NOUN__Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun"},
- "NOUN__Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Imp|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"},
- "NOUN__Aspect=Perf|Case=Acc|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Perf|Case=Acc|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun"},
- "NOUN__Aspect=Perf|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Perf|Case=Acc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"},
- "NOUN__Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Perf|Case=Dat|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"},
- "NOUN__Aspect=Perf|Case=Gen|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Perf|Case=Gen|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun"},
- "NOUN__Aspect=Perf|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Perf|Case=Gen|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"},
- "NOUN__Aspect=Perf|Case=Ins|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Perf|Case=Ins|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"},
- "NOUN__Aspect=Perf|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Perf|Case=Loc|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"},
- "NOUN__Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Polarity=Neg|VerbForm=Vnoun"},
- "NOUN__Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun": {POS: NOUN, "morph": "Aspect=Perf|Case=Nom|Gender=Neut|Number=Sing|Polarity=Pos|VerbForm=Vnoun"},
- "NOUN__Case=Acc|Gender=Fem|Number=Plur": {POS: NOUN, "morph": "Case=Acc|Gender=Fem|Number=Plur"},
- "NOUN__Case=Acc|Gender=Fem|Number=Sing": {POS: NOUN, "morph": "Case=Acc|Gender=Fem|Number=Sing"},
- "NOUN__Case=Acc|Gender=Neut|Number=Plur": {POS: NOUN, "morph": "Case=Acc|Gender=Neut|Number=Plur"},
- "NOUN__Case=Acc|Gender=Neut|Number=Sing": {POS: NOUN, "morph": "Case=Acc|Gender=Neut|Number=Sing"},
- "NOUN__Case=Dat|Gender=Fem|Number=Plur": {POS: NOUN, "morph": "Case=Dat|Gender=Fem|Number=Plur"},
- "NOUN__Case=Dat|Gender=Fem|Number=Sing": {POS: NOUN, "morph": "Case=Dat|Gender=Fem|Number=Sing"},
- "NOUN__Case=Dat|Gender=Neut|Number=Plur": {POS: NOUN, "morph": "Case=Dat|Gender=Neut|Number=Plur"},
- "NOUN__Case=Dat|Gender=Neut|Number=Sing": {POS: NOUN, "morph": "Case=Dat|Gender=Neut|Number=Sing"},
- "NOUN__Case=Gen|Gender=Fem|Number=Plur": {POS: NOUN, "morph": "Case=Gen|Gender=Fem|Number=Plur"},
- "NOUN__Case=Gen|Gender=Fem|Number=Sing": {POS: NOUN, "morph": "Case=Gen|Gender=Fem|Number=Sing"},
- "NOUN__Case=Gen|Gender=Neut|Number=Plur": {POS: NOUN, "morph": "Case=Gen|Gender=Neut|Number=Plur"},
- "NOUN__Case=Gen|Gender=Neut|Number=Sing": {POS: NOUN, "morph": "Case=Gen|Gender=Neut|Number=Sing"},
- "NOUN__Case=Ins|Gender=Fem|Number=Plur": {POS: NOUN, "morph": "Case=Ins|Gender=Fem|Number=Plur"},
- "NOUN__Case=Ins|Gender=Fem|Number=Sing": {POS: NOUN, "morph": "Case=Ins|Gender=Fem|Number=Sing"},
- "NOUN__Case=Ins|Gender=Neut|Number=Plur": {POS: NOUN, "morph": "Case=Ins|Gender=Neut|Number=Plur"},
- "NOUN__Case=Ins|Gender=Neut|Number=Sing": {POS: NOUN, "morph": "Case=Ins|Gender=Neut|Number=Sing"},
- "NOUN__Case=Loc|Gender=Fem|Number=Plur": {POS: NOUN, "morph": "Case=Loc|Gender=Fem|Number=Plur"},
- "NOUN__Case=Loc|Gender=Fem|Number=Sing": {POS: NOUN, "morph": "Case=Loc|Gender=Fem|Number=Sing"},
- "NOUN__Case=Loc|Gender=Neut|Number=Plur": {POS: NOUN, "morph": "Case=Loc|Gender=Neut|Number=Plur"},
- "NOUN__Case=Loc|Gender=Neut|Number=Sing": {POS: NOUN, "morph": "Case=Loc|Gender=Neut|Number=Sing"},
- "NOUN__Case=Nom|Gender=Fem|Number=Plur": {POS: NOUN, "morph": "Case=Nom|Gender=Fem|Number=Plur"},
- "NOUN__Case=Nom|Gender=Fem|Number=Sing": {POS: NOUN, "morph": "Case=Nom|Gender=Fem|Number=Sing"},
- "NOUN__Case=Nom|Gender=Neut|Number=Plur": {POS: NOUN, "morph": "Case=Nom|Gender=Neut|Number=Plur"},
- "NOUN__Case=Nom|Gender=Neut|Number=Sing": {POS: NOUN, "morph": "Case=Nom|Gender=Neut|Number=Sing"},
- "NOUN__Case=Voc|Gender=Fem|Number=Sing": {POS: NOUN, "morph": "Case=Voc|Gender=Fem|Number=Sing"},
- "NOUN__Case=Voc|Gender=Neut|Number=Plur": {POS: NOUN, "morph": "Case=Voc|Gender=Neut|Number=Plur"},
- "NOUN__Case=Voc|Gender=Neut|Number=Sing": {POS: NOUN, "morph": "Case=Voc|Gender=Neut|Number=Sing"},
- "NUM__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur"},
- "NUM__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur"},
- "NUM__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur"},
- "NUM__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur"},
- "NUM__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur"},
- "NUM__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur"},
- "NUM__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing": {POS: NUM, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing"},
- "NUM__Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur"},
- "NUM__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur"},
- "NUM__Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur"},
- "NUM__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur"},
- "NUM__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing": {POS: NUM, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing"},
- "NUM__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur"},
- "NUM__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Plur"},
- "NUM__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur"},
- "NUM__Animacy=Nhum|Case=Ins|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Nhum|Case=Ins|Gender=Masc|Number=Plur"},
- "NUM__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur": {POS: NUM, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur"},
- "NUM__Case=Acc|Gender=Fem|Number=Plur": {POS: NUM, "morph": "Case=Acc|Gender=Fem|Number=Plur"},
- "NUM__Case=Acc|Gender=Fem|Number=Sing": {POS: NUM, "morph": "Case=Acc|Gender=Fem|Number=Sing"},
- "NUM__Case=Acc|Gender=Neut|Number=Plur": {POS: NUM, "morph": "Case=Acc|Gender=Neut|Number=Plur"},
- "NUM__Case=Dat|Gender=Fem|Number=Plur": {POS: NUM, "morph": "Case=Dat|Gender=Fem|Number=Plur"},
- "NUM__Case=Dat|Gender=Neut|Number=Plur": {POS: NUM, "morph": "Case=Dat|Gender=Neut|Number=Plur"},
- "NUM__Case=Gen|Gender=Fem|Number=Plur": {POS: NUM, "morph": "Case=Gen|Gender=Fem|Number=Plur"},
- "NUM__Case=Gen|Gender=Neut|Number=Plur": {POS: NUM, "morph": "Case=Gen|Gender=Neut|Number=Plur"},
- "NUM__Case=Ins|Gender=Fem|Number=Plur": {POS: NUM, "morph": "Case=Ins|Gender=Fem|Number=Plur"},
- "NUM__Case=Ins|Gender=Neut|Number=Plur": {POS: NUM, "morph": "Case=Ins|Gender=Neut|Number=Plur"},
- "NUM__Case=Loc|Gender=Fem|Number=Plur": {POS: NUM, "morph": "Case=Loc|Gender=Fem|Number=Plur"},
- "NUM__Case=Loc|Gender=Neut|Number=Plur": {POS: NUM, "morph": "Case=Loc|Gender=Neut|Number=Plur"},
- "NUM__Case=Nom|Gender=Fem|Number=Plur": {POS: NUM, "morph": "Case=Nom|Gender=Fem|Number=Plur"},
- "NUM__Case=Nom|Gender=Neut|Number=Plur": {POS: NUM, "morph": "Case=Nom|Gender=Neut|Number=Plur"},
- "NUM__Case=Nom|Number=Plur": {POS: NUM, "morph": "Case=Nom|Number=Plur"},
- "PART___": {POS: PART},
- "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Person=1|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Person=1|PronType=Prs"},
- "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Person=2|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Person=2|PronType=Prs"},
- "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|PronType=Tot": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur|PronType=Tot"},
- "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=1|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=1|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Short"},
- "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"},
- "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Short"},
- "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Ind": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Ind"},
- "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "PRON__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Neg": {POS: PRON, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing|PronType=Neg"},
- "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Person=1|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Person=1|PronType=Prs"},
- "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Person=2|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Person=2|PronType=Prs"},
- "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|PronType=Tot": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur|PronType=Tot"},
- "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=1|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=1|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=1|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=1|PronType=Prs|Variant=Short"},
- "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Short"},
- "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"},
- "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Ind": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Ind"},
- "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "PRON__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Neg": {POS: PRON, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing|PronType=Neg"},
- "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Person=1|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Person=1|PronType=Prs"},
- "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Person=2|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Person=2|PronType=Prs"},
- "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|PronType=Tot": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur|PronType=Tot"},
- "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=1|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=1|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=2|PronType=Prs|Variant=Short"},
- "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"},
- "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Ind": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Ind"},
- "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "PRON__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Neg": {POS: PRON, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing|PronType=Neg"},
- "PRON__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Person=1|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Person=1|PronType=Prs"},
- "PRON__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Person=2|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Person=2|PronType=Prs"},
- "PRON__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Short"},
- "PRON__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Person=1|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Person=1|PronType=Prs"},
- "PRON__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Person=2|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Person=2|PronType=Prs"},
- "PRON__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "PRON__Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|Person=1|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|Person=1|PronType=Prs"},
- "PRON__Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|Person=2|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|Person=2|PronType=Prs"},
- "PRON__Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|PronType=Ind": {POS: PRON, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing|PronType=Ind"},
- "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Person=1|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Person=1|PronType=Prs"},
- "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Person=2|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Person=2|PronType=Prs"},
- "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Tot": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur|PronType=Tot"},
- "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Person=1|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Person=1|PronType=Prs"},
- "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Person=2|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Person=2|PronType=Prs"},
- "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Ind": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Ind"},
- "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Int,Rel"},
- "PRON__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Neg": {POS: PRON, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing|PronType=Neg"},
- "PRON__Animacy=Hum|Case=Voc|Gender=Masc|Number=Sing|Person=2|PronType=Prs": {POS: PRON, "morph": "Animacy=Hum|Case=Voc|Gender=Masc|Number=Sing|Person=2|PronType=Prs"},
- "PRON__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"},
- "PRON__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"},
- "PRON__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"},
- "PRON__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"},
- "PRON__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Nhum|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Animacy=Nhum|Case=Dat|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"},
- "PRON__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Nhum|Case=Ins|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Nhum|Case=Ins|Gender=Masc|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Nhum|Case=Ins|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Nhum|Case=Ins|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Animacy=Nhum|Case=Loc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Animacy=Nhum|Case=Loc|Gender=Masc|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Case=Acc|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Acc|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Case=Acc|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Acc|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Case=Acc|Gender=Fem|Number=Sing|Person=1|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Acc|Gender=Fem|Number=Sing|Person=1|PronType=Prs|Variant=Long"},
- "PRON__Case=Acc|Gender=Fem|Number=Sing|Person=2|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Case=Acc|Gender=Fem|Number=Sing|Person=2|PronType=Prs|Variant=Short"},
- "PRON__Case=Acc|Gender=Fem|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Acc|Gender=Fem|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Case=Acc|Gender=Fem|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Acc|Gender=Fem|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Case=Acc|Gender=Neut|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Acc|Gender=Neut|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Case=Acc|Gender=Neut|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Acc|Gender=Neut|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Case=Acc|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Acc|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Case=Acc|Gender=Neut|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Acc|Gender=Neut|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Case=Acc|Gender=Neut|Number=Sing|PronType=Dem": {POS: PRON, "morph": "Case=Acc|Gender=Neut|Number=Sing|PronType=Dem"},
- "PRON__Case=Acc|Gender=Neut|Number=Sing|PronType=Ind": {POS: PRON, "morph": "Case=Acc|Gender=Neut|Number=Sing|PronType=Ind"},
- "PRON__Case=Acc|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Case=Acc|Gender=Neut|Number=Sing|PronType=Int,Rel"},
- "PRON__Case=Acc|Gender=Neut|Number=Sing|PronType=Neg": {POS: PRON, "morph": "Case=Acc|Gender=Neut|Number=Sing|PronType=Neg"},
- "PRON__Case=Acc|Gender=Neut|Number=Sing|PronType=Tot": {POS: PRON, "morph": "Case=Acc|Gender=Neut|Number=Sing|PronType=Tot"},
- "PRON__Case=Acc|PronType=Prs|Reflex=Yes": {POS: PRON, "morph": "Case=Acc|PronType=Prs|Reflex=Yes"},
- "PRON__Case=Dat|Gender=Fem|Number=Plur|Person=1|PronType=Prs": {POS: PRON, "morph": "Case=Dat|Gender=Fem|Number=Plur|Person=1|PronType=Prs"},
- "PRON__Case=Dat|Gender=Fem|Number=Plur|Person=2|PronType=Prs": {POS: PRON, "morph": "Case=Dat|Gender=Fem|Number=Plur|Person=2|PronType=Prs"},
- "PRON__Case=Dat|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Dat|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Case=Dat|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Dat|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Case=Dat|Gender=Fem|Number=Sing|Person=1|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Dat|Gender=Fem|Number=Sing|Person=1|PronType=Prs|Variant=Long"},
- "PRON__Case=Dat|Gender=Fem|Number=Sing|Person=1|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Case=Dat|Gender=Fem|Number=Sing|Person=1|PronType=Prs|Variant=Short"},
- "PRON__Case=Dat|Gender=Fem|Number=Sing|Person=2|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Case=Dat|Gender=Fem|Number=Sing|Person=2|PronType=Prs|Variant=Short"},
- "PRON__Case=Dat|Gender=Fem|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Dat|Gender=Fem|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Case=Dat|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Dat|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Case=Dat|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Case=Dat|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"},
- "PRON__Case=Dat|Gender=Neut|Number=Sing|PronType=Dem": {POS: PRON, "morph": "Case=Dat|Gender=Neut|Number=Sing|PronType=Dem"},
- "PRON__Case=Dat|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Case=Dat|Gender=Neut|Number=Sing|PronType=Int,Rel"},
- "PRON__Case=Dat|PronType=Prs|Reflex=Yes": {POS: PRON, "morph": "Case=Dat|PronType=Prs|Reflex=Yes"},
- "PRON__Case=Gen|Gender=Fem|Number=Plur|Person=1|PronType=Prs": {POS: PRON, "morph": "Case=Gen|Gender=Fem|Number=Plur|Person=1|PronType=Prs"},
- "PRON__Case=Gen|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Case=Gen|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Case=Gen|Gender=Fem|Number=Sing|Person=1|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Fem|Number=Sing|Person=1|PronType=Prs|Variant=Long"},
- "PRON__Case=Gen|Gender=Fem|Number=Sing|Person=2|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Fem|Number=Sing|Person=2|PronType=Prs|Variant=Long"},
- "PRON__Case=Gen|Gender=Fem|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Fem|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Case=Gen|Gender=Fem|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Fem|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Case=Gen|Gender=Neut|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Case=Gen|Gender=Neut|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Case=Gen|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Case=Gen|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Short"},
- "PRON__Case=Gen|Gender=Neut|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Case=Gen|Gender=Neut|Number=Sing|PronType=Dem": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Dem"},
- "PRON__Case=Gen|Gender=Neut|Number=Sing|PronType=Ind": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Ind"},
- "PRON__Case=Gen|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Int,Rel"},
- "PRON__Case=Gen|Gender=Neut|Number=Sing|PronType=Neg": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Neg"},
- "PRON__Case=Gen|Gender=Neut|Number=Sing|PronType=Tot": {POS: PRON, "morph": "Case=Gen|Gender=Neut|Number=Sing|PronType=Tot"},
- "PRON__Case=Gen|PronType=Prs|Reflex=Yes": {POS: PRON, "morph": "Case=Gen|PronType=Prs|Reflex=Yes"},
- "PRON__Case=Ins|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Ins|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Case=Ins|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Ins|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Case=Ins|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Short": {POS: PRON, "morph": "Case=Ins|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Short"},
- "PRON__Case=Ins|Gender=Fem|Number=Sing|Person=1|PronType=Prs": {POS: PRON, "morph": "Case=Ins|Gender=Fem|Number=Sing|Person=1|PronType=Prs"},
- "PRON__Case=Ins|Gender=Fem|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Ins|Gender=Fem|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Case=Ins|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Ins|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Case=Ins|Gender=Neut|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Ins|Gender=Neut|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Case=Ins|Gender=Neut|Number=Sing|PronType=Dem": {POS: PRON, "morph": "Case=Ins|Gender=Neut|Number=Sing|PronType=Dem"},
- "PRON__Case=Ins|Gender=Neut|Number=Sing|PronType=Ind": {POS: PRON, "morph": "Case=Ins|Gender=Neut|Number=Sing|PronType=Ind"},
- "PRON__Case=Ins|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Case=Ins|Gender=Neut|Number=Sing|PronType=Int,Rel"},
- "PRON__Case=Ins|Gender=Neut|Number=Sing|PronType=Tot": {POS: PRON, "morph": "Case=Ins|Gender=Neut|Number=Sing|PronType=Tot"},
- "PRON__Case=Ins|PronType=Prs|Reflex=Yes": {POS: PRON, "morph": "Case=Ins|PronType=Prs|Reflex=Yes"},
- "PRON__Case=Loc|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Loc|Gender=Fem|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Case=Loc|Gender=Fem|Number=Sing|Person=1|PronType=Prs": {POS: PRON, "morph": "Case=Loc|Gender=Fem|Number=Sing|Person=1|PronType=Prs"},
- "PRON__Case=Loc|Gender=Fem|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Loc|Gender=Fem|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Case=Loc|Gender=Neut|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Loc|Gender=Neut|Number=Plur|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Case=Loc|Gender=Neut|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Loc|Gender=Neut|Number=Sing|Person=3|PrepCase=Pre|PronType=Prs|Variant=Long"},
- "PRON__Case=Loc|Gender=Neut|Number=Sing|PronType=Dem": {POS: PRON, "morph": "Case=Loc|Gender=Neut|Number=Sing|PronType=Dem"},
- "PRON__Case=Loc|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Case=Loc|Gender=Neut|Number=Sing|PronType=Int,Rel"},
- "PRON__Case=Loc|Gender=Neut|Number=Sing|PronType=Neg": {POS: PRON, "morph": "Case=Loc|Gender=Neut|Number=Sing|PronType=Neg"},
- "PRON__Case=Loc|Gender=Neut|Number=Sing|PronType=Tot": {POS: PRON, "morph": "Case=Loc|Gender=Neut|Number=Sing|PronType=Tot"},
- "PRON__Case=Loc|PronType=Prs|Reflex=Yes": {POS: PRON, "morph": "Case=Loc|PronType=Prs|Reflex=Yes"},
- "PRON__Case=Nom|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Nom|Gender=Fem|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Case=Nom|Gender=Fem|Number=Sing|Person=1|PronType=Prs": {POS: PRON, "morph": "Case=Nom|Gender=Fem|Number=Sing|Person=1|PronType=Prs"},
- "PRON__Case=Nom|Gender=Fem|Number=Sing|Person=2|PronType=Prs": {POS: PRON, "morph": "Case=Nom|Gender=Fem|Number=Sing|Person=2|PronType=Prs"},
- "PRON__Case=Nom|Gender=Fem|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Nom|Gender=Fem|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Case=Nom|Gender=Neut|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Nom|Gender=Neut|Number=Plur|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Case=Nom|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long": {POS: PRON, "morph": "Case=Nom|Gender=Neut|Number=Sing|Person=3|PrepCase=Npr|PronType=Prs|Variant=Long"},
- "PRON__Case=Nom|Gender=Neut|Number=Sing|PronType=Dem": {POS: PRON, "morph": "Case=Nom|Gender=Neut|Number=Sing|PronType=Dem"},
- "PRON__Case=Nom|Gender=Neut|Number=Sing|PronType=Ind": {POS: PRON, "morph": "Case=Nom|Gender=Neut|Number=Sing|PronType=Ind"},
- "PRON__Case=Nom|Gender=Neut|Number=Sing|PronType=Int,Rel": {POS: PRON, "morph": "Case=Nom|Gender=Neut|Number=Sing|PronType=Int,Rel"},
- "PRON__Case=Nom|Gender=Neut|Number=Sing|PronType=Neg": {POS: PRON, "morph": "Case=Nom|Gender=Neut|Number=Sing|PronType=Neg"},
- "PRON__Case=Nom|Gender=Neut|Number=Sing|PronType=Tot": {POS: PRON, "morph": "Case=Nom|Gender=Neut|Number=Sing|PronType=Tot"},
- "PRON__PronType=Prs|Reflex=Yes": {POS: PRON, "morph": "PronType=Prs|Reflex=Yes"},
- "PRON__PronType=Prs|Reflex=Yes|Typo=Yes": {POS: PRON, "morph": "PronType=Prs|Reflex=Yes|Typo=Yes"},
- "PROPN__Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Plur"},
- "PROPN__Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Hum|Case=Acc|Gender=Masc|Number=Sing"},
- "PROPN__Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Plur"},
- "PROPN__Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Hum|Case=Dat|Gender=Masc|Number=Sing"},
- "PROPN__Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Plur"},
- "PROPN__Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Hum|Case=Gen|Gender=Masc|Number=Sing"},
- "PROPN__Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Hum|Case=Ins|Gender=Masc|Number=Sing"},
- "PROPN__Animacy=Hum|Case=Loc|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Plur"},
- "PROPN__Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Hum|Case=Loc|Gender=Masc|Number=Sing"},
- "PROPN__Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Plur"},
- "PROPN__Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Hum|Case=Nom|Gender=Masc|Number=Sing"},
- "PROPN__Animacy=Hum|Case=Voc|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Hum|Case=Voc|Gender=Masc|Number=Plur"},
- "PROPN__Animacy=Hum|Case=Voc|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Hum|Case=Voc|Gender=Masc|Number=Sing"},
- "PROPN__Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Plur"},
- "PROPN__Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Inan|Case=Acc|Gender=Masc|Number=Sing"},
- "PROPN__Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Inan|Case=Dat|Gender=Masc|Number=Sing"},
- "PROPN__Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Plur"},
- "PROPN__Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Inan|Case=Gen|Gender=Masc|Number=Sing"},
- "PROPN__Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Inan|Case=Ins|Gender=Masc|Number=Sing"},
- "PROPN__Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Inan|Case=Loc|Gender=Masc|Number=Sing"},
- "PROPN__Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Plur"},
- "PROPN__Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Inan|Case=Nom|Gender=Masc|Number=Sing"},
- "PROPN__Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Nhum|Case=Acc|Gender=Masc|Number=Sing"},
- "PROPN__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Plur"},
- "PROPN__Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Nhum|Case=Gen|Gender=Masc|Number=Sing"},
- "PROPN__Animacy=Nhum|Case=Ins|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Nhum|Case=Ins|Gender=Masc|Number=Sing"},
- "PROPN__Animacy=Nhum|Case=Loc|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Nhum|Case=Loc|Gender=Masc|Number=Sing"},
- "PROPN__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur": {POS: PROPN, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Plur"},
- "PROPN__Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing": {POS: PROPN, "morph": "Animacy=Nhum|Case=Nom|Gender=Masc|Number=Sing"},
- "PROPN__Case=Acc|Gender=Fem|Number=Plur": {POS: PROPN, "morph": "Case=Acc|Gender=Fem|Number=Plur"},
- "PROPN__Case=Acc|Gender=Fem|Number=Sing": {POS: PROPN, "morph": "Case=Acc|Gender=Fem|Number=Sing"},
- "PROPN__Case=Acc|Gender=Neut|Number=Plur": {POS: PROPN, "morph": "Case=Acc|Gender=Neut|Number=Plur"},
- "PROPN__Case=Acc|Gender=Neut|Number=Sing": {POS: PROPN, "morph": "Case=Acc|Gender=Neut|Number=Sing"},
- "PROPN__Case=Dat|Gender=Fem|Number=Plur": {POS: PROPN, "morph": "Case=Dat|Gender=Fem|Number=Plur"},
- "PROPN__Case=Dat|Gender=Fem|Number=Sing": {POS: PROPN, "morph": "Case=Dat|Gender=Fem|Number=Sing"},
- "PROPN__Case=Dat|Gender=Neut|Number=Sing": {POS: PROPN, "morph": "Case=Dat|Gender=Neut|Number=Sing"},
- "PROPN__Case=Gen|Gender=Fem|Number=Plur": {POS: PROPN, "morph": "Case=Gen|Gender=Fem|Number=Plur"},
- "PROPN__Case=Gen|Gender=Fem|Number=Sing": {POS: PROPN, "morph": "Case=Gen|Gender=Fem|Number=Sing"},
- "PROPN__Case=Gen|Gender=Neut|Number=Plur": {POS: PROPN, "morph": "Case=Gen|Gender=Neut|Number=Plur"},
- "PROPN__Case=Gen|Gender=Neut|Number=Sing": {POS: PROPN, "morph": "Case=Gen|Gender=Neut|Number=Sing"},
- "PROPN__Case=Ins|Gender=Fem|Number=Plur": {POS: PROPN, "morph": "Case=Ins|Gender=Fem|Number=Plur"},
- "PROPN__Case=Ins|Gender=Fem|Number=Sing": {POS: PROPN, "morph": "Case=Ins|Gender=Fem|Number=Sing"},
- "PROPN__Case=Ins|Gender=Neut|Number=Plur": {POS: PROPN, "morph": "Case=Ins|Gender=Neut|Number=Plur"},
- "PROPN__Case=Ins|Gender=Neut|Number=Sing": {POS: PROPN, "morph": "Case=Ins|Gender=Neut|Number=Sing"},
- "PROPN__Case=Loc|Gender=Fem|Number=Sing": {POS: PROPN, "morph": "Case=Loc|Gender=Fem|Number=Sing"},
- "PROPN__Case=Loc|Gender=Neut|Number=Plur": {POS: PROPN, "morph": "Case=Loc|Gender=Neut|Number=Plur"},
- "PROPN__Case=Loc|Gender=Neut|Number=Sing": {POS: PROPN, "morph": "Case=Loc|Gender=Neut|Number=Sing"},
- "PROPN__Case=Nom|Gender=Fem|Number=Sing": {POS: PROPN, "morph": "Case=Nom|Gender=Fem|Number=Sing"},
- "PROPN__Case=Nom|Gender=Neut|Number=Plur": {POS: PROPN, "morph": "Case=Nom|Gender=Neut|Number=Plur"},
- "PROPN__Case=Nom|Gender=Neut|Number=Sing": {POS: PROPN, "morph": "Case=Nom|Gender=Neut|Number=Sing"},
- "PROPN__Case=Voc|Gender=Fem|Number=Sing": {POS: PROPN, "morph": "Case=Voc|Gender=Fem|Number=Sing"},
- "PROPN__Case=Voc|Gender=Neut|Number=Plur": {POS: PROPN, "morph": "Case=Voc|Gender=Neut|Number=Plur"},
- "PUNCT___": {POS: PUNCT},
- "SCONJ___": {POS: SCONJ},
- "VERB___": {POS: VERB},
- "VERB__Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Hum|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Animacy=Hum|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Hum|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Animacy=Hum|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Hum|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Inan|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Inan|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Animacy=Nhum|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Nhum|Aspect=Imp|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Animacy=Nhum|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Nhum|Aspect=Imp|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Animacy=Nhum|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Nhum|Aspect=Perf|Gender=Masc|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Animacy=Nhum|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Animacy=Nhum|Aspect=Perf|Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Aspect=Imp|Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Aspect=Imp|Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Aspect=Imp|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Aspect=Imp|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Aspect=Imp|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Aspect=Imp|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Aspect=Imp|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Aspect=Imp|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Aspect=Imp|Mood=Imp|Number=Plur|Person=1|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Imp|Mood=Imp|Number=Plur|Person=1|VerbForm=Fin"},
- "VERB__Aspect=Imp|Mood=Imp|Number=Plur|Person=2|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Imp|Mood=Imp|Number=Plur|Person=2|VerbForm=Fin"},
- "VERB__Aspect=Imp|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Imp|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin"},
- "VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin"},
- "VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin"},
- "VERB__Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin"},
- "VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin"},
- "VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin"},
- "VERB__Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin"},
- "VERB__Aspect=Imp|Tense=Pres|VerbForm=Conv": {POS: VERB, "morph": "Aspect=Imp|Tense=Pres|VerbForm=Conv"},
- "VERB__Aspect=Imp|VerbForm=Inf": {POS: VERB, "morph": "Aspect=Imp|VerbForm=Inf"},
- "VERB__Aspect=Perf|Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Aspect=Perf|Gender=Fem|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Aspect=Perf|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Aspect=Perf|Gender=Fem|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Aspect=Perf|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Aspect=Perf|Gender=Neut|Number=Plur|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Aspect=Perf|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act": {POS: VERB, "morph": "Aspect=Perf|Gender=Neut|Number=Sing|Tense=Past|VerbForm=Part|Voice=Act"},
- "VERB__Aspect=Perf|Mood=Imp|Number=Plur|Person=1|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Perf|Mood=Imp|Number=Plur|Person=1|VerbForm=Fin"},
- "VERB__Aspect=Perf|Mood=Imp|Number=Plur|Person=2|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Perf|Mood=Imp|Number=Plur|Person=2|VerbForm=Fin"},
- "VERB__Aspect=Perf|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Perf|Mood=Imp|Number=Sing|Person=2|VerbForm=Fin"},
- "VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin"},
- "VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Plur|Person=2|Tense=Pres|VerbForm=Fin"},
- "VERB__Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin"},
- "VERB__Aspect=Perf|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin"},
- "VERB__Aspect=Perf|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin"},
- "VERB__Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin": {POS: VERB, "morph": "Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin"},
- "VERB__Aspect=Perf|Tense=Past|VerbForm=Conv": {POS: VERB, "morph": "Aspect=Perf|Tense=Past|VerbForm=Conv"},
- "VERB__Aspect=Perf|VerbForm=Inf": {POS: VERB, "morph": "Aspect=Perf|VerbForm=Inf"},
- "X___": {POS: X},
- "X__Abbr=Yes": {POS: X, "morph": "Abbr=Yes"}
-}
-# fmt: on
diff --git a/spacy/lang/pt/__init__.py b/spacy/lang/pt/__init__.py
index c09996126..0447099f0 100644
--- a/spacy/lang/pt/__init__.py
+++ b/spacy/lang/pt/__init__.py
@@ -1,27 +1,16 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .stop_words import STOP_WORDS
from .lex_attrs import LEX_ATTRS
-from .tag_map import TAG_MAP
-
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES
from ...language import Language
-from ...attrs import LANG
-from ...util import update_exc
class PortugueseDefaults(Language.Defaults):
- lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
- lex_attr_getters[LANG] = lambda text: "pt"
- lex_attr_getters.update(LEX_ATTRS)
- tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
- stop_words = STOP_WORDS
- tag_map = TAG_MAP
+ tokenizer_exceptions = TOKENIZER_EXCEPTIONS
infixes = TOKENIZER_INFIXES
prefixes = TOKENIZER_PREFIXES
+ lex_attr_getters = LEX_ATTRS
+ stop_words = STOP_WORDS
class Portuguese(Language):
diff --git a/spacy/lang/pt/examples.py b/spacy/lang/pt/examples.py
index b7206ffd7..13f3512cf 100644
--- a/spacy/lang/pt/examples.py
+++ b/spacy/lang/pt/examples.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
"""
Example sentences to test spaCy and its language models.
diff --git a/spacy/lang/pt/lex_attrs.py b/spacy/lang/pt/lex_attrs.py
index 4ad0eeecb..3c6979ab4 100644
--- a/spacy/lang/pt/lex_attrs.py
+++ b/spacy/lang/pt/lex_attrs.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ...attrs import LIKE_NUM
diff --git a/spacy/lang/pt/punctuation.py b/spacy/lang/pt/punctuation.py
index 370e6aaad..08e31f9d0 100644
--- a/spacy/lang/pt/punctuation.py
+++ b/spacy/lang/pt/punctuation.py
@@ -1,6 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
from ..punctuation import TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES
from ..punctuation import TOKENIZER_SUFFIXES as BASE_TOKENIZER_SUFFIXES
from ..punctuation import TOKENIZER_INFIXES as BASE_TOKENIZER_INFIXES
diff --git a/spacy/lang/pt/stop_words.py b/spacy/lang/pt/stop_words.py
index 774b06809..ff45ad3a7 100644
--- a/spacy/lang/pt/stop_words.py
+++ b/spacy/lang/pt/stop_words.py
@@ -1,7 +1,3 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-
STOP_WORDS = set(
"""
à às área acerca ademais adeus agora ainda algo algumas alguns ali além ambas ambos antes
diff --git a/spacy/lang/pt/tag_map.py b/spacy/lang/pt/tag_map.py
deleted file mode 100644
index cdc7de57e..000000000
--- a/spacy/lang/pt/tag_map.py
+++ /dev/null
@@ -1,5057 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, PUNCT, SYM, ADJ, NUM, DET, ADV, ADP, X, VERB, CCONJ
-from ...symbols import NOUN, PROPN, PART, INTJ, SPACE, PRON, SCONJ, AUX
-
-
-TAG_MAP = {
- "<-sam>||DET|F|P|@P<": {POS: PRON},
- "<-sam>||DET|M|P|@P<": {POS: PRON},
- "<-sam>||ART|@>A": {POS: DET},
- "<-sam>||ART|@>N": {POS: DET},
- "<-sam>||ART|F|P|@>N": {POS: DET},
- "<-sam>||ART|F|S|@>N": {POS: DET},
- "<-sam>||ART|F|S|@P<": {POS: DET},
- "<-sam>||ART|M|P|@>A": {POS: DET},
- "<-sam>||ART|M|P|@>N": {POS: DET},
- "<-sam>||ART|M|S|@||ART|M|S|@>A": {POS: DET},
- "<-sam>||ART|M|S|@>N": {POS: DET},
- "<-sam>||ART|M|S|@N<": {POS: DET},
- "<-sam>||ART|M|S|@P<": {POS: DET},
- "<-sam>||DET|F|P|@>N": {POS: DET},
- "<-sam>||DET|F|S|@>N": {POS: DET},
- "<-sam>||DET|M|P|@>N": {POS: DET},
- "<-sam>||DET|M|S/P|@>N": {POS: DET},
- "<-sam>||DET|M|S|@>N": {POS: DET},
- "<-sam>||DET|M|S|@P<": {POS: PRON},
- "<-sam>||ART|F|S|@>N": {POS: DET},
- "<-sam>||ART|M|S|@>N": {POS: DET},
- "<-sam>||DET|F|S|@>N": {POS: DET},
- "<-sam>||DET|M|S|@>N": {POS: DET},
- "<-sam>||NUM|M|S|@P<": {POS: NUM},
- "<-sam>|||DET|M|S|@P<": {POS: PRON},
- "<-sam>||DET|F|P|@>N": {POS: DET},
- "<-sam>||DET|F|P|@P<": {POS: PRON},
- "<-sam>||DET|F|S|@>N": {POS: DET},
- "<-sam>||DET|F|S|@P<": {POS: PRON},
- "<-sam>||DET|F|S|@SUBJ>": {POS: PRON},
- "<-sam>||DET|M|P|@>N": {POS: DET},
- "<-sam>||DET|M|P|@P<": {POS: PRON},
- "<-sam>||DET|M|S|@>N": {POS: DET},
- "<-sam>||DET|M|S|@P<": {POS: PRON},
- "<-sam>||INDP|M|S|@P<": {POS: PRON},
- "<-sam>||DET|F|P|@>N": {POS: DET},
- "<-sam>||DET|F|P|@P<": {POS: PRON},
- "<-sam>||DET|F|S|@>N": {POS: DET},
- "<-sam>||DET|M|P|@>N": {POS: DET},
- "<-sam>||DET|M|S|@>N": {POS: DET},
- "<-sam>||DET|M|S|@P<": {POS: PRON},
- "<-sam>||DET|F|P|@>N": {POS: DET},
- "<-sam>||DET|F|P|@P<": {POS: PRON},
- "<-sam>||DET|M|P|@>N": {POS: DET},
- "<-sam>||PERS|F|3S|PIV|@P<": {POS: PRON},
- "<-sam>||PERS|M|3S|PIV|@P<": {POS: PRON},
- "<-sam>||INDP|M|P|@SUBJ>": {POS: PRON},
- "<-sam>||INDP|M|S|@P<": {POS: PRON},
- "<-sam>|ADV|@ADVL>": {POS: ADV},
- "<-sam>|ADV|@P<": {POS: ADV},
- "<-sam>|ART|@>N": {POS: DET},
- "<-sam>|ART|F|P|@>N": {POS: DET},
- "<-sam>|ART|F|S|@>N": {POS: DET},
- "<-sam>|ART|M|P|@>N": {POS: DET},
- "<-sam>|ART|M|S|@>N": {POS: DET},
- "<-sam>|DET|@>N": {POS: DET},
- "<-sam>|DET|F|P|@P<": {POS: PRON},
- "<-sam>|DET|F|S|@>N": {POS: DET},
- "<-sam>|DET|F|S|@P<": {POS: PRON},
- "<-sam>|DET|M|P|@P<": {POS: PRON},
- "<-sam>|DET|M|S|@>A": {POS: DET},
- "<-sam>|DET|M|S|@>N": {POS: DET},
- "<-sam>|DET|M|S|@P<": {POS: PRON},
- "<-sam>|INDP|M|S|@P<": {POS: PRON},
- "<-sam>|INDP|M|S|@SUBJ>": {POS: PRON},
- "<-sam>|PERS|F|1P|PIV|@P<": {POS: PRON},
- "<-sam>|PERS|F|1S|PIV|@P<": {POS: PRON},
- "<-sam>|PERS|F|3P|NOM/PIV|@P<": {POS: PRON},
- "<-sam>|PERS|F|3P|NOM|@P<": {POS: PRON},
- "<-sam>|PERS|F|3P|PIV|@P<": {POS: PRON},
- "<-sam>|PERS|F|3S|ACC|@ACC>": {POS: PRON},
- "<-sam>|PERS|F|3S|NOM/PIV|@P<": {POS: PRON},
- "<-sam>|PERS|F|3S|NOM|@SUBJ>": {POS: PRON},
- "<-sam>|PERS|F|3S|PIV|@P<": {POS: PRON},
- "<-sam>|PERS|M/F|2P|PIV|@P<": {POS: PRON},
- "<-sam>|PERS|M|3P|NOM/PIV|@P<": {POS: PRON},
- "<-sam>|PERS|M|3P|NOM|@P<": {POS: PRON},
- "<-sam>|PERS|M|3P|PIV|@P<": {POS: PRON},
- "<-sam>|PERS|M|3S|ACC|@NPHR": {POS: PRON},
- "<-sam>|PERS|M|3S|NOM/PIV|@P<": {POS: PRON},
- "<-sam>|PERS|M|3S|NOM|@P<": {POS: PRON},
- "<-sam>|PERS|M|3S|NOM|@SUBJ>": {POS: PRON},
- "<-sam>|PERS|M|3S|PIV|@P<": {POS: PRON},
- "<-sam>|PRP|@N<": {POS: ADP},
- "|ADJ|F|P|@|ADJ|F|P|@|ADJ|F|P|@>N": {POS: ADJ},
- "|ADJ|F|P|@N<": {POS: ADJ},
- "|ADJ|F|P|@P<": {POS: ADJ},
- "|ADJ|F|S|@|ADJ|F|S|@|ADJ|F|S|@>N": {POS: ADJ},
- "|ADJ|F|S|@N<": {POS: ADJ},
- "|ADJ|F|S|@N|ADJ|F|S|@P<": {POS: ADJ},
- "|ADJ|F|S|@SC>": {POS: ADJ},
- "|ADJ|M/F|S|@|ADJ|M|P|@|ADJ|M|P|@>N": {POS: ADJ},
- "|ADJ|M|P|@ADVL>": {POS: ADJ},
- "|ADJ|M|P|@N<": {POS: ADJ},
- "|ADJ|M|S|@|ADJ|M|S|@|ADJ|M|S|@|ADJ|M|S|@>A": {POS: ADJ},
- "|ADJ|M|S|@>N": {POS: ADJ},
- "|ADJ|M|S|@ADVL>": {POS: ADJ},
- "|ADJ|M|S|@AS<": {POS: ADJ},
- "|ADJ|M|S|@N<": {POS: ADJ},
- "|ADJ|M|S|@P<": {POS: ADJ},
- "|ADJ|M|S|@SC>": {POS: ADJ},
- "||PRP|@||ADJ|M|P|@N<": {POS: ADJ},
- "||DET|F|S|@P<": {POS: PRON},
- "||DET|M|P|@P<": {POS: PRON},
- "||N|F|S|@SUBJ>": {POS: NOUN},
- "|||ADJ|F|S|@P<": {POS: ADJ},
- "||N|M|P|@||N|F|P|@|ADV|@ICL-N<": {POS: ADV},
- "|ADV|@N|ADV|@|ADV|@|ADV|@>A": {POS: ADV},
- "|ADV|@ADVL>": {POS: ADV},
- "|ADV|@P<": {POS: ADV},
- "||ADJ|F|P|@||ADJ|F|P|@>N": {POS: ADJ},
- "||ADJ|F|P|@N<": {POS: ADJ},
- "||ADJ|F|S|@||ADJ|F|S|@>N": {POS: ADJ},
- "||ADJ|F|S|@N<": {POS: ADJ},
- "||ADJ|F|S|@N||ADJ|F|S|@SC>": {POS: ADJ},
- "||ADJ|M/F|S|@||ADJ|M/F|S|@||ADJ|M|P|@||ADJ|M|P|@||ADJ|M|P|@>N": {POS: ADJ},
- "||ADJ|M|P|@N<": {POS: ADJ},
- "||ADJ|M|P|@N||ADJ|M|P|@P<": {POS: ADJ},
- "||ADJ|M|S|@||ADJ|M|S|@||ADJ|M|S|@||ADJ|M|S|@>N": {POS: ADJ},
- "||ADJ|M|S|@N<": {POS: ADJ},
- "||ADJ|M|S|@N||ADJ|M|S|@P<": {POS: ADJ},
- "||ADJ|M|S|@PRED>": {POS: ADJ},
- "||ADJ|M|S|@SC>": {POS: ADJ},
- "||ADV|@||ADV|@>N": {POS: ADV},
- "||ADV|@ADVL>": {POS: ADV},
- "|||ADJ|F|P|@>N": {POS: ADJ},
- "|||ADJ|F|S|@>N": {POS: ADJ},
- "|||ADJ|F|S|@N<": {POS: ADJ},
- "|||ADJ|M|P|@|||ADJ|M|P|@>N": {POS: ADJ},
- "|||ADJ|M|S|@|||ADJ|M|S|@>N": {POS: ADJ},
- "|||ADJ|M|S|@N<": {POS: ADJ},
- "|||ADJ|M|S|@SC>": {POS: ADJ},
- "|||ADV|@|||ADV|@ADVL>": {POS: ADV},
- "|||||ADJ|M|S|@|||||ADJ|M|S|@SC>": {POS: ADJ},
- "