mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Remove trailing whitespace
This commit is contained in:
parent
4e95737c6c
commit
bcc1d50d09
|
@ -53,7 +53,7 @@ class BaseDefaults(object):
|
||||||
else:
|
else:
|
||||||
return Vocab.load(nlp.path, lex_attr_getters=cls.lex_attr_getters,
|
return Vocab.load(nlp.path, lex_attr_getters=cls.lex_attr_getters,
|
||||||
tag_map=cls.tag_map, lemmatizer=lemmatizer)
|
tag_map=cls.tag_map, lemmatizer=lemmatizer)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def add_vectors(cls, nlp=None):
|
def add_vectors(cls, nlp=None):
|
||||||
if nlp is None or nlp.path is None:
|
if nlp is None or nlp.path is None:
|
||||||
|
@ -150,9 +150,9 @@ class BaseDefaults(object):
|
||||||
tag_map = {}
|
tag_map = {}
|
||||||
|
|
||||||
tokenizer_exceptions = {}
|
tokenizer_exceptions = {}
|
||||||
|
|
||||||
parser_features = get_templates('parser')
|
parser_features = get_templates('parser')
|
||||||
|
|
||||||
entity_features = get_templates('ner')
|
entity_features = get_templates('ner')
|
||||||
|
|
||||||
tagger_features = Tagger.feature_templates # TODO -- fix this
|
tagger_features = Tagger.feature_templates # TODO -- fix this
|
||||||
|
@ -257,7 +257,7 @@ class Language(object):
|
||||||
path = util.match_best_version(self.lang, '', util.get_data_path())
|
path = util.match_best_version(self.lang, '', util.get_data_path())
|
||||||
|
|
||||||
self.path = path
|
self.path = path
|
||||||
|
|
||||||
self.vocab = self.Defaults.create_vocab(self) \
|
self.vocab = self.Defaults.create_vocab(self) \
|
||||||
if 'vocab' not in overrides \
|
if 'vocab' not in overrides \
|
||||||
else overrides['vocab']
|
else overrides['vocab']
|
||||||
|
@ -299,7 +299,7 @@ class Language(object):
|
||||||
"""Apply the pipeline to some text. The text can span multiple sentences,
|
"""Apply the pipeline to some text. The text can span multiple sentences,
|
||||||
and can contain arbtrary whitespace. Alignment into the original string
|
and can contain arbtrary whitespace. Alignment into the original string
|
||||||
is preserved.
|
is preserved.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text (unicode): The text to be processed.
|
text (unicode): The text to be processed.
|
||||||
|
|
||||||
|
@ -327,9 +327,9 @@ class Language(object):
|
||||||
|
|
||||||
def pipe(self, texts, tag=True, parse=True, entity=True, n_threads=2, batch_size=1000):
|
def pipe(self, texts, tag=True, parse=True, entity=True, n_threads=2, batch_size=1000):
|
||||||
'''Process texts as a stream, and yield Doc objects in order.
|
'''Process texts as a stream, and yield Doc objects in order.
|
||||||
|
|
||||||
Supports GIL-free multi-threading.
|
Supports GIL-free multi-threading.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
texts (iterator)
|
texts (iterator)
|
||||||
tag (bool)
|
tag (bool)
|
||||||
|
@ -352,7 +352,7 @@ class Language(object):
|
||||||
path = self.path
|
path = self.path
|
||||||
elif isinstance(path, basestring):
|
elif isinstance(path, basestring):
|
||||||
path = pathlib.Path(path)
|
path = pathlib.Path(path)
|
||||||
|
|
||||||
if self.tagger:
|
if self.tagger:
|
||||||
self.tagger.model.end_training()
|
self.tagger.model.end_training()
|
||||||
self.tagger.model.dump(str(path / 'pos' / 'model'))
|
self.tagger.model.dump(str(path / 'pos' / 'model'))
|
||||||
|
@ -362,7 +362,7 @@ class Language(object):
|
||||||
if self.entity:
|
if self.entity:
|
||||||
self.entity.model.end_training()
|
self.entity.model.end_training()
|
||||||
self.entity.model.dump(str(path / 'ner' / 'model'))
|
self.entity.model.dump(str(path / 'ner' / 'model'))
|
||||||
|
|
||||||
strings_loc = path / 'vocab' / 'strings.json'
|
strings_loc = path / 'vocab' / 'strings.json'
|
||||||
with strings_loc.open('w', encoding='utf8') as file_:
|
with strings_loc.open('w', encoding='utf8') as file_:
|
||||||
self.vocab.strings.dump(file_)
|
self.vocab.strings.dump(file_)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user