2017-03-23 13:10:22 +03:00
|
|
|
# encoding: utf8
|
|
|
|
from __future__ import unicode_literals
|
2017-05-08 16:51:22 +03:00
|
|
|
|
2017-05-08 23:29:04 +03:00
|
|
|
from ...symbols import ORTH, LEMMA
|
2017-05-08 16:51:22 +03:00
|
|
|
|
|
|
|
|
|
|
|
_exc = {}
|
|
|
|
|
|
|
|
|
|
|
|
for exc_data in [
|
|
|
|
{ORTH: "jan.", LEMMA: "januar"},
|
|
|
|
{ORTH: "feb.", LEMMA: "februar"},
|
2019-07-08 11:28:47 +03:00
|
|
|
{ORTH: "mar.", LEMMA: "mars"},
|
|
|
|
{ORTH: "apr.", LEMMA: "april"},
|
|
|
|
{ORTH: "jun.", LEMMA: "juni"},
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 19:03:03 +03:00
|
|
|
{ORTH: "jul.", LEMMA: "juli"},
|
2019-07-08 11:28:47 +03:00
|
|
|
{ORTH: "aug.", LEMMA: "august"},
|
|
|
|
{ORTH: "sep.", LEMMA: "september"},
|
|
|
|
{ORTH: "okt.", LEMMA: "oktober"},
|
|
|
|
{ORTH: "nov.", LEMMA: "november"},
|
|
|
|
{ORTH: "des.", LEMMA: "desember"},
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 19:03:03 +03:00
|
|
|
]:
|
2017-11-02 01:02:45 +03:00
|
|
|
_exc[exc_data[ORTH]] = [exc_data]
|
2017-05-08 16:51:22 +03:00
|
|
|
|
|
|
|
|
|
|
|
for orth in [
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 19:03:03 +03:00
|
|
|
"adm.dir.",
|
|
|
|
"a.m.",
|
2019-07-08 11:28:47 +03:00
|
|
|
"andelsnr",
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 19:03:03 +03:00
|
|
|
"Aq.",
|
|
|
|
"b.c.",
|
|
|
|
"bl.a.",
|
|
|
|
"bla.",
|
|
|
|
"bm.",
|
2019-07-08 11:28:47 +03:00
|
|
|
"bnr.",
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 19:03:03 +03:00
|
|
|
"bto.",
|
|
|
|
"ca.",
|
|
|
|
"cand.mag.",
|
|
|
|
"c.c.",
|
|
|
|
"co.",
|
|
|
|
"d.d.",
|
|
|
|
"dept.",
|
|
|
|
"d.m.",
|
|
|
|
"dr.philos.",
|
|
|
|
"dvs.",
|
|
|
|
"d.y.",
|
|
|
|
"E. coli",
|
|
|
|
"eg.",
|
|
|
|
"ekskl.",
|
|
|
|
"e.Kr.",
|
|
|
|
"el.",
|
|
|
|
"e.l.",
|
|
|
|
"et.",
|
2019-07-08 11:28:47 +03:00
|
|
|
"etc.",
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 19:03:03 +03:00
|
|
|
"etg.",
|
|
|
|
"ev.",
|
|
|
|
"evt.",
|
|
|
|
"f.",
|
|
|
|
"f.eks.",
|
|
|
|
"fhv.",
|
|
|
|
"fk.",
|
|
|
|
"f.Kr.",
|
|
|
|
"f.o.m.",
|
|
|
|
"foreg.",
|
|
|
|
"fork.",
|
|
|
|
"fv.",
|
|
|
|
"fvt.",
|
|
|
|
"g.",
|
|
|
|
"gt.",
|
|
|
|
"gl.",
|
|
|
|
"gno.",
|
|
|
|
"gnr.",
|
|
|
|
"grl.",
|
|
|
|
"hhv.",
|
|
|
|
"hoh.",
|
|
|
|
"hr.",
|
|
|
|
"h.r.adv.",
|
|
|
|
"ifb.",
|
|
|
|
"ifm.",
|
|
|
|
"iht.",
|
|
|
|
"inkl.",
|
|
|
|
"istf.",
|
|
|
|
"jf.",
|
|
|
|
"jr.",
|
|
|
|
"jun.",
|
|
|
|
"kfr.",
|
|
|
|
"kgl.res.",
|
|
|
|
"kl.",
|
|
|
|
"komm.",
|
2019-07-08 11:28:47 +03:00
|
|
|
"kr.",
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 19:03:03 +03:00
|
|
|
"kst.",
|
|
|
|
"lø.",
|
|
|
|
"ma.",
|
|
|
|
"mag.art.",
|
|
|
|
"m.a.o.",
|
|
|
|
"md.",
|
|
|
|
"mfl.",
|
|
|
|
"mill.",
|
|
|
|
"min.",
|
|
|
|
"m.m.",
|
|
|
|
"mnd.",
|
|
|
|
"moh.",
|
|
|
|
"Mr.",
|
|
|
|
"muh.",
|
|
|
|
"mv.",
|
|
|
|
"mva.",
|
|
|
|
"ndf.",
|
|
|
|
"no.",
|
|
|
|
"nov.",
|
|
|
|
"nr.",
|
|
|
|
"nto.",
|
|
|
|
"nyno.",
|
|
|
|
"n.å.",
|
|
|
|
"o.a.",
|
|
|
|
"off.",
|
|
|
|
"ofl.",
|
|
|
|
"okt.",
|
|
|
|
"o.l.",
|
|
|
|
"on.",
|
|
|
|
"op.",
|
2019-07-08 11:28:47 +03:00
|
|
|
"org."
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 19:03:03 +03:00
|
|
|
"osv.",
|
|
|
|
"ovf.",
|
|
|
|
"p.",
|
|
|
|
"p.a.",
|
|
|
|
"Pb.",
|
|
|
|
"pga.",
|
|
|
|
"ph.d.",
|
|
|
|
"pkt.",
|
|
|
|
"p.m.",
|
|
|
|
"pr.",
|
|
|
|
"pst.",
|
|
|
|
"p.t.",
|
|
|
|
"red.anm.",
|
|
|
|
"ref.",
|
|
|
|
"res.",
|
|
|
|
"res.kap.",
|
|
|
|
"resp.",
|
|
|
|
"rv.",
|
|
|
|
"s.",
|
|
|
|
"s.d.",
|
|
|
|
"sen.",
|
|
|
|
"sep.",
|
|
|
|
"siviling.",
|
|
|
|
"sms.",
|
2019-07-08 11:28:47 +03:00
|
|
|
"snr.",
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 19:03:03 +03:00
|
|
|
"spm.",
|
|
|
|
"sr.",
|
|
|
|
"sst.",
|
|
|
|
"st.",
|
|
|
|
"stip.",
|
|
|
|
"stk.",
|
|
|
|
"st.meld.",
|
|
|
|
"st.prp.",
|
|
|
|
"stud.",
|
|
|
|
"s.u.",
|
|
|
|
"sv.",
|
|
|
|
"sø.",
|
|
|
|
"s.å.",
|
|
|
|
"såk.",
|
|
|
|
"temp.",
|
|
|
|
"ti.",
|
|
|
|
"tils.",
|
|
|
|
"tilsv.",
|
|
|
|
"tl;dr",
|
|
|
|
"tlf.",
|
|
|
|
"to.",
|
|
|
|
"t.o.m.",
|
|
|
|
"ult.",
|
|
|
|
"utg.",
|
|
|
|
"v.",
|
|
|
|
"vedk.",
|
|
|
|
"vedr.",
|
|
|
|
"vg.",
|
|
|
|
"vgs.",
|
|
|
|
"vha.",
|
|
|
|
"vit.ass.",
|
|
|
|
"vn.",
|
|
|
|
"vol.",
|
|
|
|
"vs.",
|
|
|
|
"vsa.",
|
|
|
|
"årg.",
|
|
|
|
"årh.",
|
|
|
|
]:
|
2017-05-08 16:51:22 +03:00
|
|
|
_exc[orth] = [{ORTH: orth}]
|
|
|
|
|
|
|
|
|
2017-10-31 23:05:29 +03:00
|
|
|
TOKENIZER_EXCEPTIONS = _exc
|