mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-04 13:40:34 +03:00
Move shared functions and constants to global language data
This commit is contained in:
parent
6a60a61086
commit
08162dce67
|
@ -8,9 +8,9 @@ from ..attrs import LANG
|
||||||
from . import language_data
|
from . import language_data
|
||||||
|
|
||||||
from ..language_data import update_exc
|
from ..language_data import update_exc
|
||||||
|
from ..language_data import strings_to_exc
|
||||||
from ..language_data import EMOTICONS
|
from ..language_data import EMOTICONS
|
||||||
from .language_data import ORTH_ONLY
|
from .language_data import ORTH_ONLY
|
||||||
from .language_data import strings_to_exc
|
|
||||||
|
|
||||||
|
|
||||||
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
|
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
|
||||||
|
|
|
@ -2,18 +2,12 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ..symbols import *
|
from ..symbols import *
|
||||||
|
from ..language_data import PRON_LEMMA
|
||||||
from ..language_data import TOKENIZER_PREFIXES
|
from ..language_data import TOKENIZER_PREFIXES
|
||||||
from ..language_data import TOKENIZER_SUFFIXES
|
from ..language_data import TOKENIZER_SUFFIXES
|
||||||
from ..language_data import TOKENIZER_INFIXES
|
from ..language_data import TOKENIZER_INFIXES
|
||||||
|
|
||||||
|
|
||||||
def strings_to_exc(orths):
|
|
||||||
return {orth: [{ORTH: orth}] for orth in orths}
|
|
||||||
|
|
||||||
|
|
||||||
PRON_LEMMA = "-PRON-"
|
|
||||||
|
|
||||||
|
|
||||||
TAG_MAP = {
|
TAG_MAP = {
|
||||||
"$(": {TAG: PUNCT, "PunctType": "brck"},
|
"$(": {TAG: PUNCT, "PunctType": "brck"},
|
||||||
"$,": {TAG: PUNCT, "PunctType": "comm"},
|
"$,": {TAG: PUNCT, "PunctType": "comm"},
|
||||||
|
|
|
@ -12,9 +12,9 @@ from ..tokenizer import Tokenizer
|
||||||
from ..attrs import LANG
|
from ..attrs import LANG
|
||||||
|
|
||||||
from ..language_data import update_exc
|
from ..language_data import update_exc
|
||||||
|
from ..language_data import strings_to_exc
|
||||||
from ..language_data import EMOTICONS
|
from ..language_data import EMOTICONS
|
||||||
from .language_data import ORTH_ONLY
|
from .language_data import ORTH_ONLY
|
||||||
from .language_data import strings_to_exc
|
|
||||||
from .language_data import get_time_exc
|
from .language_data import get_time_exc
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,15 +2,12 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ..symbols import *
|
from ..symbols import *
|
||||||
|
from ..language_data import PRON_LEMMA
|
||||||
from ..language_data import TOKENIZER_PREFIXES
|
from ..language_data import TOKENIZER_PREFIXES
|
||||||
from ..language_data import TOKENIZER_SUFFIXES
|
from ..language_data import TOKENIZER_SUFFIXES
|
||||||
from ..language_data import TOKENIZER_INFIXES
|
from ..language_data import TOKENIZER_INFIXES
|
||||||
|
|
||||||
|
|
||||||
def strings_to_exc(orths):
|
|
||||||
return {orth: [{ORTH: orth}] for orth in orths}
|
|
||||||
|
|
||||||
|
|
||||||
def get_time_exc(hours):
|
def get_time_exc(hours):
|
||||||
exc = {}
|
exc = {}
|
||||||
for hour in hours:
|
for hour in hours:
|
||||||
|
@ -36,9 +33,6 @@ def get_time_exc(hours):
|
||||||
return exc
|
return exc
|
||||||
|
|
||||||
|
|
||||||
PRON_LEMMA = "-PRON-"
|
|
||||||
|
|
||||||
|
|
||||||
TAG_MAP = {
|
TAG_MAP = {
|
||||||
".": {POS: PUNCT, "PunctType": "peri"},
|
".": {POS: PUNCT, "PunctType": "peri"},
|
||||||
",": {POS: PUNCT, "PunctType": "comm"},
|
",": {POS: PUNCT, "PunctType": "comm"},
|
||||||
|
|
|
@ -8,9 +8,9 @@ from . import language_data
|
||||||
from ..attrs import LANG
|
from ..attrs import LANG
|
||||||
|
|
||||||
from ..language_data import update_exc
|
from ..language_data import update_exc
|
||||||
|
from ..language_data import strings_to_exc
|
||||||
from ..language_data import EMOTICONS
|
from ..language_data import EMOTICONS
|
||||||
from .language_data import ORTH_ONLY
|
from .language_data import ORTH_ONLY
|
||||||
from .language_data import strings_to_exc
|
|
||||||
|
|
||||||
|
|
||||||
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
|
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
|
||||||
|
|
|
@ -2,18 +2,12 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ..symbols import *
|
from ..symbols import *
|
||||||
|
from ..language_data import PRON_LEMMA
|
||||||
from ..language_data import TOKENIZER_PREFIXES
|
from ..language_data import TOKENIZER_PREFIXES
|
||||||
from ..language_data import TOKENIZER_SUFFIXES
|
from ..language_data import TOKENIZER_SUFFIXES
|
||||||
from ..language_data import TOKENIZER_INFIXES
|
from ..language_data import TOKENIZER_INFIXES
|
||||||
|
|
||||||
|
|
||||||
def strings_to_exc(orths):
|
|
||||||
return {orth: [{ORTH: orth}] for orth in orths}
|
|
||||||
|
|
||||||
|
|
||||||
PRON_LEMMA = "-PRON-"
|
|
||||||
|
|
||||||
|
|
||||||
TAG_MAP = {
|
TAG_MAP = {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,9 +8,9 @@ from . import language_data
|
||||||
from ..attrs import LANG
|
from ..attrs import LANG
|
||||||
|
|
||||||
from ..language_data import update_exc
|
from ..language_data import update_exc
|
||||||
|
from ..language_data import strings_to_exc
|
||||||
from ..language_data import EMOTICONS
|
from ..language_data import EMOTICONS
|
||||||
from .language_data import ORTH_ONLY
|
from .language_data import ORTH_ONLY
|
||||||
from .language_data import strings_to_exc
|
|
||||||
|
|
||||||
|
|
||||||
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
|
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
|
||||||
|
|
|
@ -2,18 +2,12 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ..symbols import *
|
from ..symbols import *
|
||||||
|
from ..language_data import PRON_LEMMA
|
||||||
from ..language_data import TOKENIZER_PREFIXES
|
from ..language_data import TOKENIZER_PREFIXES
|
||||||
from ..language_data import TOKENIZER_SUFFIXES
|
from ..language_data import TOKENIZER_SUFFIXES
|
||||||
from ..language_data import TOKENIZER_INFIXES
|
from ..language_data import TOKENIZER_INFIXES
|
||||||
|
|
||||||
|
|
||||||
def strings_to_exc(orths):
|
|
||||||
return {orth: [{ORTH: orth}] for orth in orths}
|
|
||||||
|
|
||||||
|
|
||||||
PRON_LEMMA = "-PRON-"
|
|
||||||
|
|
||||||
|
|
||||||
TAG_MAP = {
|
TAG_MAP = {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,9 +8,9 @@ from . import language_data
|
||||||
from ..attrs import LANG
|
from ..attrs import LANG
|
||||||
|
|
||||||
from ..language_data import update_exc
|
from ..language_data import update_exc
|
||||||
|
from ..language_data import strings_to_exc
|
||||||
from ..language_data import EMOTICONS
|
from ..language_data import EMOTICONS
|
||||||
from .language_data import ORTH_ONLY
|
from .language_data import ORTH_ONLY
|
||||||
from .language_data import strings_to_exc
|
|
||||||
|
|
||||||
|
|
||||||
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
|
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
|
||||||
|
|
|
@ -2,18 +2,12 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ..symbols import *
|
from ..symbols import *
|
||||||
|
from ..language_data import PRON_LEMMA
|
||||||
from ..language_data import TOKENIZER_PREFIXES
|
from ..language_data import TOKENIZER_PREFIXES
|
||||||
from ..language_data import TOKENIZER_SUFFIXES
|
from ..language_data import TOKENIZER_SUFFIXES
|
||||||
from ..language_data import TOKENIZER_INFIXES
|
from ..language_data import TOKENIZER_INFIXES
|
||||||
|
|
||||||
|
|
||||||
def strings_to_exc(orths):
|
|
||||||
return {orth: [{ORTH: orth}] for orth in orths}
|
|
||||||
|
|
||||||
|
|
||||||
PRON_LEMMA = "-PRON-"
|
|
||||||
|
|
||||||
|
|
||||||
TAG_MAP = {
|
TAG_MAP = {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,9 +8,9 @@ from . import language_data
|
||||||
from ..attrs import LANG
|
from ..attrs import LANG
|
||||||
|
|
||||||
from ..language_data import update_exc
|
from ..language_data import update_exc
|
||||||
|
from ..language_data import strings_to_exc
|
||||||
from ..language_data import EMOTICONS
|
from ..language_data import EMOTICONS
|
||||||
from .language_data import ORTH_ONLY
|
from .language_data import ORTH_ONLY
|
||||||
from .language_data import strings_to_exc
|
|
||||||
|
|
||||||
|
|
||||||
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
|
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
|
||||||
|
|
|
@ -2,18 +2,12 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ..symbols import *
|
from ..symbols import *
|
||||||
|
from ..language_data import PRON_LEMMA
|
||||||
from ..language_data import TOKENIZER_PREFIXES
|
from ..language_data import TOKENIZER_PREFIXES
|
||||||
from ..language_data import TOKENIZER_SUFFIXES
|
from ..language_data import TOKENIZER_SUFFIXES
|
||||||
from ..language_data import TOKENIZER_INFIXES
|
from ..language_data import TOKENIZER_INFIXES
|
||||||
|
|
||||||
|
|
||||||
def strings_to_exc(orths):
|
|
||||||
return {orth: [{ORTH: orth}] for orth in orths}
|
|
||||||
|
|
||||||
|
|
||||||
PRON_LEMMA = "-PRON-"
|
|
||||||
|
|
||||||
|
|
||||||
TAG_MAP = {
|
TAG_MAP = {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user