mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
db55577c45
* Remove unicode declarations * Remove Python 3.5 and 2.7 from CI * Don't require pathlib * Replace compat helpers * Remove OrderedDict * Use f-strings * Set Cython compiler language level * Fix typo * Re-add OrderedDict for Table * Update setup.cfg * Revert CONTRIBUTING.md * Revert lookups.md * Revert top-level.md * Small adjustments and docs [ci skip]
50 lines
646 B
Python
50 lines
646 B
Python
from ...symbols import ORTH
|
|
|
|
|
|
_exc = {}
|
|
|
|
|
|
# Source: https://en.wiktionary.org/wiki/Category:Romanian_abbreviations
|
|
for orth in [
|
|
"1-a",
|
|
"2-a",
|
|
"3-a",
|
|
"4-a",
|
|
"5-a",
|
|
"6-a",
|
|
"7-a",
|
|
"8-a",
|
|
"9-a",
|
|
"10-a",
|
|
"11-a",
|
|
"12-a",
|
|
"1-ul",
|
|
"2-lea",
|
|
"3-lea",
|
|
"4-lea",
|
|
"5-lea",
|
|
"6-lea",
|
|
"7-lea",
|
|
"8-lea",
|
|
"9-lea",
|
|
"10-lea",
|
|
"11-lea",
|
|
"12-lea",
|
|
"d-voastră",
|
|
"dvs.",
|
|
"ing.",
|
|
"dr.",
|
|
"Rom.",
|
|
"str.",
|
|
"nr.",
|
|
"etc.",
|
|
"d.p.d.v.",
|
|
"dpdv",
|
|
"șamd.",
|
|
"ș.a.m.d.",
|
|
]:
|
|
_exc[orth] = [{ORTH: orth}]
|
|
|
|
|
|
TOKENIZER_EXCEPTIONS = _exc
|