mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 21:57:15 +03:00
eddeb36c96
<!--- Provide a general summary of your changes in the title. --> ## Description - [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files. - [x] Update flake8 config to exclude very large files (lemmatization tables etc.) - [x] Update code to be compatible with flake8 rules - [x] Fix various small bugs, inconsistencies and messy stuff in the language data - [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means) Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results. At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information. ### Types of change enhancement, code style ## Checklist <!--- Before you submit the PR, go over this checklist and make sure you can tick off all the boxes. [] -> [x] --> - [x] I have submitted the spaCy Contributor Agreement. - [x] I ran the tests, and all new and existing tests passed. - [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
105 lines
2.2 KiB
Python
105 lines
2.2 KiB
Python
# coding: utf8
|
|
from __future__ import unicode_literals
|
|
|
|
from .lookup import LOOKUP # noqa: F401
|
|
|
|
|
|
LEMMA_RULES = {
|
|
"noun": [
|
|
["t", ""],
|
|
["n", ""],
|
|
["na", ""],
|
|
["na", "e"],
|
|
["or", "a"],
|
|
["orna", "a"],
|
|
["et", ""],
|
|
["en", ""],
|
|
["en", "e"],
|
|
["er", ""],
|
|
["erna", ""],
|
|
["ar", "e"],
|
|
["ar", ""],
|
|
["lar", "el"],
|
|
["arna", "e"],
|
|
["arna", ""],
|
|
["larna", "el"],
|
|
],
|
|
"verb": [
|
|
["r", ""],
|
|
["de", ""],
|
|
["t", ""],
|
|
["er", ""],
|
|
["te", ""],
|
|
["a", ""],
|
|
["e", ""],
|
|
["t", "d"],
|
|
["tt", "d"],
|
|
["tt", ""],
|
|
["ev", "iv"],
|
|
["ack", "ick"],
|
|
["ög", "yg"],
|
|
["it", ""],
|
|
["uckit", "ick"],
|
|
["ugit", "yg"],
|
|
["it", "et"],
|
|
["id", "ed"],
|
|
["ip", "ep"],
|
|
["iv", "ev"],
|
|
["in", "en"],
|
|
["ik", "ek"],
|
|
["ig", "eg"],
|
|
["ind", ""],
|
|
["inn", "ann"],
|
|
["nder", "nd"],
|
|
["inner", "inn"],
|
|
["and", "ind"],
|
|
["ann", "inn"],
|
|
["s", ""],
|
|
["anns", "inn"],
|
|
["undit", "ind"],
|
|
["unnit", "inn"],
|
|
["unnits", "inn"],
|
|
["uppit", "ipp"],
|
|
["ungit", "ing"],
|
|
["öd", "ud"],
|
|
["öt", "jut"],
|
|
["öt", "ut"],
|
|
["ög", "ug"],
|
|
["ögg", "ugg"],
|
|
["öng", "ung"],
|
|
["önk", "unk"],
|
|
["öt", "yt"],
|
|
["utit", "yt"],
|
|
["ös", "ys"],
|
|
["öv", "yv"],
|
|
["uvit", "yv"],
|
|
["öp", "yp"],
|
|
["upit", "yp"],
|
|
["ök", "yk"],
|
|
["ukit", "yk"],
|
|
["or", "ar"],
|
|
["öll", "all"],
|
|
["ät", "åt"],
|
|
["öll", "åll"],
|
|
["or", "är"],
|
|
["urit", "är"],
|
|
["åt", "ät"],
|
|
["ar", "är"],
|
|
["alt", "ält"],
|
|
["ultit", "ält"],
|
|
],
|
|
"adj": [
|
|
["are", ""],
|
|
["ast", ""],
|
|
["re", ""],
|
|
["st", ""],
|
|
["ägre", "åg"],
|
|
["ägst", "åg"],
|
|
["ängre", "ång"],
|
|
["ängst", "ång"],
|
|
["örre", "or"],
|
|
["örst", "or"],
|
|
],
|
|
"punct": [["“", '"'], ["”", '"'], ["\u2018", "'"], ["\u2019", "'"]],
|
|
}
|