spaCy/spacy/lang/ro/tokenizer_exceptions.py
Ines Montani db55577c45
Drop Python 2.7 and 3.5 (#4828)
* Remove unicode declarations

* Remove Python 3.5 and 2.7 from CI

* Don't require pathlib

* Replace compat helpers

* Remove OrderedDict

* Use f-strings

* Set Cython compiler language level

* Fix typo

* Re-add OrderedDict for Table

* Update setup.cfg

* Revert CONTRIBUTING.md

* Revert lookups.md

* Revert top-level.md

* Small adjustments and docs [ci skip]
2019-12-22 01:53:56 +01:00

50 lines
646 B
Python

from ...symbols import ORTH
_exc = {}
# Source: https://en.wiktionary.org/wiki/Category:Romanian_abbreviations
for orth in [
"1-a",
"2-a",
"3-a",
"4-a",
"5-a",
"6-a",
"7-a",
"8-a",
"9-a",
"10-a",
"11-a",
"12-a",
"1-ul",
"2-lea",
"3-lea",
"4-lea",
"5-lea",
"6-lea",
"7-lea",
"8-lea",
"9-lea",
"10-lea",
"11-lea",
"12-lea",
"d-voastră",
"dvs.",
"ing.",
"dr.",
"Rom.",
"str.",
"nr.",
"etc.",
"d.p.d.v.",
"dpdv",
"șamd.",
"ș.a.m.d.",
]:
_exc[orth] = [{ORTH: orth}]
TOKENIZER_EXCEPTIONS = _exc