mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Clean up imports, unused code, whitespace, docstrings
This commit is contained in:
parent
561f2a3eb4
commit
d24589aa72
|
@ -1,27 +1,13 @@
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from .util import set_lang_class, get_lang_class, parse_package_meta
|
from .util import set_lang_class, get_lang_class, parse_package_meta
|
||||||
from .deprecated import resolve_model_name
|
from .deprecated import resolve_model_name
|
||||||
from .cli import info
|
from .cli import info
|
||||||
|
|
||||||
from . import en
|
from . import en, de, zh, es, it, hu, fr, pt, nl, sv, fi, bn, he
|
||||||
from . import de
|
|
||||||
from . import zh
|
|
||||||
from . import es
|
|
||||||
from . import it
|
|
||||||
from . import hu
|
|
||||||
from . import fr
|
|
||||||
from . import pt
|
|
||||||
from . import nl
|
|
||||||
from . import sv
|
|
||||||
from . import fi
|
|
||||||
from . import bn
|
|
||||||
from . import he
|
|
||||||
|
|
||||||
from .about import *
|
|
||||||
|
|
||||||
|
|
||||||
set_lang_class(en.English.lang, en.English)
|
set_lang_class(en.English.lang, en.English)
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
|
||||||
IDS = {
|
IDS = {
|
||||||
"": NULL_ATTR,
|
"": NULL_ATTR,
|
||||||
"IS_ALPHA": IS_ALPHA,
|
"IS_ALPHA": IS_ALPHA,
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from libc.stdio cimport fopen, fclose, fread, fwrite
|
from libc.stdio cimport fopen, fclose, fread, fwrite
|
||||||
from libc.string cimport memcpy
|
from libc.string cimport memcpy
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import io
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from .converters import conllu2json
|
from .converters import conllu2json
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from ...gold import read_json_file, merge_sents
|
|
||||||
from ... import util
|
from ... import util
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import pip
|
|
||||||
import requests
|
import requests
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
|
@ -5,8 +5,6 @@ import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from ..scorer import Scorer
|
from ..scorer import Scorer
|
||||||
from ..tagger import Tagger
|
|
||||||
from ..syntax.parser import Parser
|
|
||||||
from ..gold import GoldParse, merge_sents
|
from ..gold import GoldParse, merge_sents
|
||||||
from ..gold import read_json_file as read_gold_json
|
from ..gold import read_json_file as read_gold_json
|
||||||
from .. import util
|
from .. import util
|
||||||
|
@ -60,7 +58,6 @@ def train_model(Language, train_data, dev_data, output_path, tagger_cfg, parser_
|
||||||
print("Itn.\tN weight\tN feats\tUAS\tNER F.\tTag %\tToken %")
|
print("Itn.\tN weight\tN feats\tUAS\tNER F.\tTag %\tToken %")
|
||||||
|
|
||||||
with Language.train(output_path, train_data, tagger_cfg, parser_cfg, entity_cfg) as trainer:
|
with Language.train(output_path, train_data, tagger_cfg, parser_cfg, entity_cfg) as trainer:
|
||||||
loss = 0
|
|
||||||
for itn, epoch in enumerate(trainer.epochs(n_iter, augment_data=None)):
|
for itn, epoch in enumerate(trainer.epochs(n_iter, augment_data=None)):
|
||||||
for doc, gold in epoch:
|
for doc, gold in epoch:
|
||||||
trainer.update(doc, gold)
|
trainer.update(doc, gold)
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from . import about
|
from . import about
|
||||||
from . import util
|
from . import util
|
||||||
from .cli import download
|
from .cli import download
|
||||||
|
|
|
@ -11,12 +11,6 @@ from ..deprecated import fix_glove_vectors_loading
|
||||||
from .language_data import *
|
from .language_data import *
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
basestring
|
|
||||||
except NameError:
|
|
||||||
basestring = str
|
|
||||||
|
|
||||||
|
|
||||||
class English(Language):
|
class English(Language):
|
||||||
lang = 'en'
|
lang = 'en'
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,11 @@
|
||||||
# cython: profile=True
|
# cython: profile=True
|
||||||
|
# coding: utf8
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
import io
|
import io
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
import os
|
import ujson
|
||||||
from os import path
|
from pathlib import Path
|
||||||
|
|
||||||
import ujson as json
|
|
||||||
|
|
||||||
from .syntax import nonproj
|
from .syntax import nonproj
|
||||||
|
|
||||||
|
@ -303,7 +301,8 @@ cdef class GoldParse:
|
||||||
self.heads = proj_heads
|
self.heads = proj_heads
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
"""Get the number of gold-standard tokens.
|
"""
|
||||||
|
Get the number of gold-standard tokens.
|
||||||
|
|
||||||
Returns (int): The number of gold-standard tokens.
|
Returns (int): The number of gold-standard tokens.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1,9 +1,7 @@
|
||||||
from __future__ import absolute_import
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, unicode_literals
|
||||||
import pathlib
|
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
import ujson
|
import ujson
|
||||||
|
|
||||||
|
|
||||||
|
@ -21,19 +19,18 @@ from .tokenizer import Tokenizer
|
||||||
from .vocab import Vocab
|
from .vocab import Vocab
|
||||||
from .tagger import Tagger
|
from .tagger import Tagger
|
||||||
from .matcher import Matcher
|
from .matcher import Matcher
|
||||||
from . import attrs
|
|
||||||
from . import orth
|
|
||||||
from . import util
|
|
||||||
from . import language_data
|
|
||||||
from .lemmatizer import Lemmatizer
|
from .lemmatizer import Lemmatizer
|
||||||
from .train import Trainer
|
from .train import Trainer
|
||||||
|
|
||||||
from .attrs import TAG, DEP, ENT_IOB, ENT_TYPE, HEAD, PROB, LANG, IS_STOP
|
|
||||||
from .syntax.parser import get_templates
|
from .syntax.parser import get_templates
|
||||||
from .syntax.nonproj import PseudoProjectivity
|
from .syntax.nonproj import PseudoProjectivity
|
||||||
from .pipeline import DependencyParser, EntityRecognizer
|
from .pipeline import DependencyParser, EntityRecognizer
|
||||||
from .syntax.arc_eager import ArcEager
|
from .syntax.arc_eager import ArcEager
|
||||||
from .syntax.ner import BiluoPushDown
|
from .syntax.ner import BiluoPushDown
|
||||||
|
from .attrs import IS_STOP
|
||||||
|
from . import attrs
|
||||||
|
from . import orth
|
||||||
|
from . import util
|
||||||
|
from . import language_data
|
||||||
|
|
||||||
|
|
||||||
class BaseDefaults(object):
|
class BaseDefaults(object):
|
||||||
|
@ -150,25 +147,15 @@ class BaseDefaults(object):
|
||||||
return pipeline
|
return pipeline
|
||||||
|
|
||||||
token_match = language_data.TOKEN_MATCH
|
token_match = language_data.TOKEN_MATCH
|
||||||
|
|
||||||
prefixes = tuple(language_data.TOKENIZER_PREFIXES)
|
prefixes = tuple(language_data.TOKENIZER_PREFIXES)
|
||||||
|
|
||||||
suffixes = tuple(language_data.TOKENIZER_SUFFIXES)
|
suffixes = tuple(language_data.TOKENIZER_SUFFIXES)
|
||||||
|
|
||||||
infixes = tuple(language_data.TOKENIZER_INFIXES)
|
infixes = tuple(language_data.TOKENIZER_INFIXES)
|
||||||
|
|
||||||
tag_map = dict(language_data.TAG_MAP)
|
tag_map = dict(language_data.TAG_MAP)
|
||||||
|
|
||||||
tokenizer_exceptions = {}
|
tokenizer_exceptions = {}
|
||||||
|
|
||||||
parser_features = get_templates('parser')
|
parser_features = get_templates('parser')
|
||||||
|
|
||||||
entity_features = get_templates('ner')
|
entity_features = get_templates('ner')
|
||||||
|
|
||||||
tagger_features = Tagger.feature_templates # TODO -- fix this
|
tagger_features = Tagger.feature_templates # TODO -- fix this
|
||||||
|
|
||||||
stop_words = set()
|
stop_words = set()
|
||||||
|
|
||||||
lemma_rules = {}
|
lemma_rules = {}
|
||||||
lemma_exc = {}
|
lemma_exc = {}
|
||||||
lemma_index = {}
|
lemma_index = {}
|
||||||
|
@ -313,7 +300,8 @@ class Language(object):
|
||||||
self.pipeline = [self.tagger, self.parser, self.matcher, self.entity]
|
self.pipeline = [self.tagger, self.parser, self.matcher, self.entity]
|
||||||
|
|
||||||
def __call__(self, text, tag=True, parse=True, entity=True):
|
def __call__(self, text, tag=True, parse=True, entity=True):
|
||||||
"""Apply the pipeline to some text. The text can span multiple sentences,
|
"""
|
||||||
|
Apply the pipeline to some text. The text can span multiple sentences,
|
||||||
and can contain arbtrary whitespace. Alignment into the original string
|
and can contain arbtrary whitespace. Alignment into the original string
|
||||||
is preserved.
|
is preserved.
|
||||||
|
|
||||||
|
@ -397,4 +385,3 @@ class Language(object):
|
||||||
# to taking nlp.path
|
# to taking nlp.path
|
||||||
if path is not None:
|
if path is not None:
|
||||||
self.save_to_directory(path)
|
self.save_to_directory(path)
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,8 @@
|
||||||
from __future__ import unicode_literals, print_function
|
# coding: utf8
|
||||||
import codecs
|
from __future__ import unicode_literals
|
||||||
import pathlib
|
|
||||||
|
|
||||||
import ujson as json
|
|
||||||
|
|
||||||
from .symbols import POS, NOUN, VERB, ADJ, PUNCT
|
from .symbols import POS, NOUN, VERB, ADJ, PUNCT
|
||||||
from .symbols import VerbForm_inf, VerbForm_none
|
from .symbols import VerbForm_inf, VerbForm_none, Number_sing, Degree_pos
|
||||||
from .symbols import Number_sing
|
|
||||||
from .symbols import Degree_pos
|
|
||||||
|
|
||||||
|
|
||||||
class Lemmatizer(object):
|
class Lemmatizer(object):
|
||||||
|
|
|
@ -1,4 +1,7 @@
|
||||||
# cython: embedsignature=True
|
# cython: embedsignature=True
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
from libc.math cimport sqrt
|
from libc.math cimport sqrt
|
||||||
from cpython.ref cimport Py_INCREF
|
from cpython.ref cimport Py_INCREF
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
|
@ -9,14 +12,11 @@ from cython.view cimport array as cvarray
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
np.import_array()
|
np.import_array()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
from libc.string cimport memset
|
from libc.string cimport memset
|
||||||
|
import numpy
|
||||||
|
|
||||||
from .orth cimport word_shape
|
from .orth cimport word_shape
|
||||||
from .typedefs cimport attr_t, flags_t
|
from .typedefs cimport attr_t, flags_t
|
||||||
import numpy
|
|
||||||
|
|
||||||
from .attrs cimport IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_SPACE
|
from .attrs cimport IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_SPACE
|
||||||
from .attrs cimport IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM, LIKE_EMAIL, IS_STOP
|
from .attrs cimport IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM, LIKE_EMAIL, IS_STOP
|
||||||
from .attrs cimport IS_BRACKET
|
from .attrs cimport IS_BRACKET
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
# cython: profile=True
|
# cython: profile=True
|
||||||
# cython: infer_types=True
|
# cython: infer_types=True
|
||||||
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .typedefs cimport attr_t
|
from .typedefs cimport attr_t
|
||||||
|
|
|
@ -1,13 +1,9 @@
|
||||||
# cython: infer_types
|
# cython: infer_types
|
||||||
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from libc.string cimport memset
|
from libc.string cimport memset
|
||||||
|
|
||||||
try:
|
|
||||||
import ujson as json
|
|
||||||
except ImportError:
|
|
||||||
import json
|
|
||||||
|
|
||||||
from .parts_of_speech cimport ADJ, VERB, NOUN, PUNCT
|
from .parts_of_speech cimport ADJ, VERB, NOUN, PUNCT
|
||||||
from .attrs cimport POS, IS_SPACE
|
from .attrs cimport POS, IS_SPACE
|
||||||
from .parts_of_speech import IDS as POS_IDS
|
from .parts_of_speech import IDS as POS_IDS
|
||||||
|
|
|
@ -1,8 +0,0 @@
|
||||||
class RegexMerger(object):
|
|
||||||
def __init__(self, regexes):
|
|
||||||
self.regexes = regexes
|
|
||||||
|
|
||||||
def __call__(self, tokens):
|
|
||||||
for tag, entity_type, regex in self.regexes:
|
|
||||||
for m in regex.finditer(tokens.string):
|
|
||||||
tokens.merge(m.start(), m.end(), tag, m.group(), entity_type)
|
|
|
@ -1,6 +1,7 @@
|
||||||
# coding: utf8
|
|
||||||
# cython: infer_types=True
|
# cython: infer_types=True
|
||||||
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import unicodedata
|
import unicodedata
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .syntax.parser cimport Parser
|
from .syntax.parser cimport Parser
|
||||||
from .syntax.beam_parser cimport BeamParser
|
from .syntax.beam_parser cimport BeamParser
|
||||||
from .syntax.ner cimport BiluoPushDown
|
from .syntax.ner cimport BiluoPushDown
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
from __future__ import division
|
# coding: utf8
|
||||||
from __future__ import print_function
|
from __future__ import division, print_function, unicode_literals
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .gold import tags_to_entities
|
from .gold import tags_to_entities
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,11 @@
|
||||||
# cython: infer_types=True
|
# cython: infer_types=True
|
||||||
|
# coding: utf8
|
||||||
from __future__ import unicode_literals, absolute_import
|
from __future__ import unicode_literals, absolute_import
|
||||||
|
|
||||||
cimport cython
|
cimport cython
|
||||||
from libc.string cimport memcpy
|
from libc.string cimport memcpy
|
||||||
from libc.stdint cimport uint64_t, uint32_t
|
from libc.stdint cimport uint64_t, uint32_t
|
||||||
|
|
||||||
from murmurhash.mrmr cimport hash64, hash32
|
from murmurhash.mrmr cimport hash64, hash32
|
||||||
|
|
||||||
from preshed.maps cimport map_iter, key_t
|
from preshed.maps cimport map_iter, key_t
|
||||||
|
|
||||||
from .typedefs cimport hash_t
|
from .typedefs cimport hash_t
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
IDS = {
|
IDS = {
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
import json
|
import json
|
||||||
import pathlib
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
|
@ -12,7 +14,6 @@ from thinc.linalg cimport VecVec
|
||||||
from .tokens.doc cimport Doc
|
from .tokens.doc cimport Doc
|
||||||
from .attrs cimport TAG
|
from .attrs cimport TAG
|
||||||
from .gold cimport GoldParse
|
from .gold cimport GoldParse
|
||||||
|
|
||||||
from .attrs cimport *
|
from .attrs cimport *
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# cython: embedsignature=True
|
# cython: embedsignature=True
|
||||||
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import pathlib
|
|
||||||
|
|
||||||
from cython.operator cimport dereference as deref
|
from cython.operator cimport dereference as deref
|
||||||
from cython.operator cimport preincrement as preinc
|
from cython.operator cimport preincrement as preinc
|
||||||
|
|
|
@ -1,11 +1,10 @@
|
||||||
from __future__ import absolute_import
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import absolute_import, unicode_literals
|
||||||
|
|
||||||
import random
|
import random
|
||||||
import tqdm
|
import tqdm
|
||||||
from .gold import GoldParse
|
from .gold import GoldParse, merge_sents
|
||||||
from .scorer import Scorer
|
from .scorer import Scorer
|
||||||
from .gold import merge_sents
|
|
||||||
|
|
||||||
|
|
||||||
class Trainer(object):
|
class Trainer(object):
|
||||||
|
|
|
@ -1,11 +1,10 @@
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
import os
|
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import os.path
|
from pathlib import Path
|
||||||
import pathlib
|
|
||||||
import sys
|
import sys
|
||||||
import textwrap
|
import textwrap
|
||||||
|
|
||||||
|
@ -23,7 +22,7 @@ except NameError: # Python 3
|
||||||
|
|
||||||
|
|
||||||
LANGUAGES = {}
|
LANGUAGES = {}
|
||||||
_data_path = pathlib.Path(__file__).parent / 'data'
|
_data_path = Path(__file__).parent / 'data'
|
||||||
|
|
||||||
|
|
||||||
def set_lang_class(name, cls):
|
def set_lang_class(name, cls):
|
||||||
|
@ -163,8 +162,8 @@ def is_python2():
|
||||||
|
|
||||||
|
|
||||||
def parse_package_meta(package_path, package, require=True):
|
def parse_package_meta(package_path, package, require=True):
|
||||||
location = os.path.join(str(package_path), package, 'meta.json')
|
location = package_path / package / 'meta.json'
|
||||||
if os.path.isfile(location):
|
if location.is_file():
|
||||||
with io.open(location, encoding='utf8') as f:
|
with io.open(location, encoding='utf8') as f:
|
||||||
meta = json.load(f)
|
meta = json.load(f)
|
||||||
return meta
|
return meta
|
||||||
|
@ -209,10 +208,9 @@ def print_markdown(data, **kwargs):
|
||||||
which will be converted to a list of tuples."""
|
which will be converted to a list of tuples."""
|
||||||
|
|
||||||
def excl_value(value):
|
def excl_value(value):
|
||||||
# don't print value if it contains absolute path of directory
|
# don't print value if it contains absolute path of directory (i.e.
|
||||||
# (i.e. personal info that shouldn't need to be shared)
|
# personal info). Other conditions can be included here if necessary.
|
||||||
# other conditions can be included here if necessary
|
if unicode_(Path(__file__).parent) in value:
|
||||||
if str(pathlib.Path(__file__).parent) in value:
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
if type(data) == dict:
|
if type(data) == dict:
|
||||||
|
|
|
@ -1,10 +1,6 @@
|
||||||
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from libc.string cimport memset
|
|
||||||
from libc.stdint cimport int32_t
|
|
||||||
from libc.math cimport sqrt
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
import bz2
|
import bz2
|
||||||
import ujson as json
|
import ujson as json
|
||||||
import re
|
import re
|
||||||
|
@ -14,28 +10,28 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import pickle
|
import pickle
|
||||||
|
|
||||||
|
from libc.string cimport memset
|
||||||
|
from libc.stdint cimport int32_t
|
||||||
|
from libc.math cimport sqrt
|
||||||
|
from cymem.cymem cimport Address
|
||||||
from .lexeme cimport EMPTY_LEXEME
|
from .lexeme cimport EMPTY_LEXEME
|
||||||
from .lexeme cimport Lexeme
|
from .lexeme cimport Lexeme
|
||||||
from .strings cimport hash_string
|
from .strings cimport hash_string
|
||||||
from .typedefs cimport attr_t
|
from .typedefs cimport attr_t
|
||||||
from .cfile cimport CFile, StringCFile
|
from .cfile cimport CFile, StringCFile
|
||||||
from .lemmatizer import Lemmatizer
|
|
||||||
from .attrs import intify_attrs
|
|
||||||
from .tokens.token cimport Token
|
from .tokens.token cimport Token
|
||||||
|
|
||||||
from . import attrs
|
|
||||||
from . import symbols
|
|
||||||
|
|
||||||
from cymem.cymem cimport Address
|
|
||||||
from .serialize.packer cimport Packer
|
from .serialize.packer cimport Packer
|
||||||
from .attrs cimport PROB, LANG
|
from .attrs cimport PROB, LANG
|
||||||
from . import util
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import copy_reg
|
import copy_reg
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import copyreg as copy_reg
|
import copyreg as copy_reg
|
||||||
|
from .lemmatizer import Lemmatizer
|
||||||
|
from .attrs import intify_attrs
|
||||||
|
from . import util
|
||||||
|
from . import attrs
|
||||||
|
from . import symbols
|
||||||
|
|
||||||
|
|
||||||
DEF MAX_VEC_SIZE = 100000
|
DEF MAX_VEC_SIZE = 100000
|
||||||
|
|
Loading…
Reference in New Issue
Block a user