spaCy/spacy/ru/tokenizer_exceptions.py

31 lines
621 B
Python
Raw Normal View History

2017-10-12 18:24:20 +03:00
# encoding: utf8
from __future__ import unicode_literals
from ..symbols import *
TOKENIZER_EXCEPTIONS = {
"Пн.": [
{ORTH: "Пн.", LEMMA: "Понедельник"}
],
"Вт.": [
{ORTH: "Вт.", LEMMA: "Вторник"}
],
"Ср.": [
{ORTH: "Ср.", LEMMA: "Среда"}
],
"Чт.": [
{ORTH: "Чт.", LEMMA: "Четверг"}
],
"Пт.": [
{ORTH: "Пт.", LEMMA: "Пятница"}
],
"Сб.": [
{ORTH: "Сб.", LEMMA: "Суббота"}
],
"Вс.": [
{ORTH: "Вс.", LEMMA: "Воскресенье"}
],
}