Move shared functions and constants to global language data

This commit is contained in:
Ines Montani 2016-12-17 12:32:48 +01:00
parent 6a60a61086
commit 08162dce67
12 changed files with 12 additions and 48 deletions

View File

@ -8,9 +8,9 @@ from ..attrs import LANG
from . import language_data
from ..language_data import update_exc
from ..language_data import strings_to_exc
from ..language_data import EMOTICONS
from .language_data import ORTH_ONLY
from .language_data import strings_to_exc
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)

View File

@ -2,18 +2,12 @@
from __future__ import unicode_literals
from ..symbols import *
from ..language_data import PRON_LEMMA
from ..language_data import TOKENIZER_PREFIXES
from ..language_data import TOKENIZER_SUFFIXES
from ..language_data import TOKENIZER_INFIXES
def strings_to_exc(orths):
return {orth: [{ORTH: orth}] for orth in orths}
PRON_LEMMA = "-PRON-"
TAG_MAP = {
"$(": {TAG: PUNCT, "PunctType": "brck"},
"$,": {TAG: PUNCT, "PunctType": "comm"},

View File

@ -12,9 +12,9 @@ from ..tokenizer import Tokenizer
from ..attrs import LANG
from ..language_data import update_exc
from ..language_data import strings_to_exc
from ..language_data import EMOTICONS
from .language_data import ORTH_ONLY
from .language_data import strings_to_exc
from .language_data import get_time_exc

View File

@ -2,15 +2,12 @@
from __future__ import unicode_literals
from ..symbols import *
from ..language_data import PRON_LEMMA
from ..language_data import TOKENIZER_PREFIXES
from ..language_data import TOKENIZER_SUFFIXES
from ..language_data import TOKENIZER_INFIXES
def strings_to_exc(orths):
return {orth: [{ORTH: orth}] for orth in orths}
def get_time_exc(hours):
exc = {}
for hour in hours:
@ -36,9 +33,6 @@ def get_time_exc(hours):
return exc
PRON_LEMMA = "-PRON-"
TAG_MAP = {
".": {POS: PUNCT, "PunctType": "peri"},
",": {POS: PUNCT, "PunctType": "comm"},

View File

@ -8,9 +8,9 @@ from . import language_data
from ..attrs import LANG
from ..language_data import update_exc
from ..language_data import strings_to_exc
from ..language_data import EMOTICONS
from .language_data import ORTH_ONLY
from .language_data import strings_to_exc
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)

View File

@ -2,18 +2,12 @@
from __future__ import unicode_literals
from ..symbols import *
from ..language_data import PRON_LEMMA
from ..language_data import TOKENIZER_PREFIXES
from ..language_data import TOKENIZER_SUFFIXES
from ..language_data import TOKENIZER_INFIXES
def strings_to_exc(orths):
return {orth: [{ORTH: orth}] for orth in orths}
PRON_LEMMA = "-PRON-"
TAG_MAP = {
}

View File

@ -8,9 +8,9 @@ from . import language_data
from ..attrs import LANG
from ..language_data import update_exc
from ..language_data import strings_to_exc
from ..language_data import EMOTICONS
from .language_data import ORTH_ONLY
from .language_data import strings_to_exc
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)

View File

@ -2,18 +2,12 @@
from __future__ import unicode_literals
from ..symbols import *
from ..language_data import PRON_LEMMA
from ..language_data import TOKENIZER_PREFIXES
from ..language_data import TOKENIZER_SUFFIXES
from ..language_data import TOKENIZER_INFIXES
def strings_to_exc(orths):
return {orth: [{ORTH: orth}] for orth in orths}
PRON_LEMMA = "-PRON-"
TAG_MAP = {
}

View File

@ -8,9 +8,9 @@ from . import language_data
from ..attrs import LANG
from ..language_data import update_exc
from ..language_data import strings_to_exc
from ..language_data import EMOTICONS
from .language_data import ORTH_ONLY
from .language_data import strings_to_exc
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)

View File

@ -2,18 +2,12 @@
from __future__ import unicode_literals
from ..symbols import *
from ..language_data import PRON_LEMMA
from ..language_data import TOKENIZER_PREFIXES
from ..language_data import TOKENIZER_SUFFIXES
from ..language_data import TOKENIZER_INFIXES
def strings_to_exc(orths):
return {orth: [{ORTH: orth}] for orth in orths}
PRON_LEMMA = "-PRON-"
TAG_MAP = {
}

View File

@ -8,9 +8,9 @@ from . import language_data
from ..attrs import LANG
from ..language_data import update_exc
from ..language_data import strings_to_exc
from ..language_data import EMOTICONS
from .language_data import ORTH_ONLY
from .language_data import strings_to_exc
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)

View File

@ -2,18 +2,12 @@
from __future__ import unicode_literals
from ..symbols import *
from ..language_data import PRON_LEMMA
from ..language_data import TOKENIZER_PREFIXES
from ..language_data import TOKENIZER_SUFFIXES
from ..language_data import TOKENIZER_INFIXES
def strings_to_exc(orths):
return {orth: [{ORTH: orth}] for orth in orths}
PRON_LEMMA = "-PRON-"
TAG_MAP = {
}