mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
18 lines
374 B
Python
18 lines
374 B
Python
# encoding: utf8
|
|
from __future__ import unicode_literals
|
|
|
|
from ...symbols import ORTH, LEMMA
|
|
|
|
|
|
_exc = {}
|
|
|
|
|
|
for orth in [
|
|
"A/S", "beg.", "bl.a.", "ca.", "d.s.s.", "dvs.", "f.eks.", "fr.", "hhv.",
|
|
"if.", "iflg.", "m.a.o.", "mht.", "min.", "osv.", "pga.", "resp.", "self.",
|
|
"t.o.m.", "vha.", ""]:
|
|
_exc[orth] = [{ORTH: orth}]
|
|
|
|
|
|
TOKENIZER_EXCEPTIONS = dict(_exc)
|