mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
Add more hyper-parameters to spacy ud-train
* subword_features: Controls whether subword features are used in the word embeddings. True by default (specifically, prefix, suffix and word shape). Should be set to False for languages like Chinese and Japanese. * conv_depth: Depth of the convolutional layers. Defaults to 4.
This commit is contained in:
parent
51a9efbf3b
commit
9c33d4d1df
|
@ -290,7 +290,9 @@ def initialize_pipeline(nlp, docs, golds, config, device):
|
|||
for tag in gold.tags:
|
||||
if tag is not None:
|
||||
nlp.tagger.add_label(tag)
|
||||
return nlp.begin_training(lambda: golds_to_gold_tuples(docs, golds), device=device)
|
||||
return nlp.begin_training(
|
||||
lambda: golds_to_gold_tuples(docs, golds), device=device,
|
||||
subword_features=config.subword_features, config.conv_depth=conv_depth)
|
||||
|
||||
|
||||
########################
|
||||
|
@ -300,10 +302,10 @@ def initialize_pipeline(nlp, docs, golds, config, device):
|
|||
class Config(object):
|
||||
def __init__(self, vectors=None, max_doc_length=10, multitask_tag=True,
|
||||
multitask_sent=True, multitask_dep=True, multitask_vectors=False,
|
||||
nr_epoch=30, batch_size=1000, dropout=0.2):
|
||||
nr_epoch=30, batch_size=1000, dropout=0.2,
|
||||
conv_depth=4, subword_features=True):
|
||||
for key, value in locals().items():
|
||||
setattr(self, key, value)
|
||||
|
||||
|
||||
@classmethod
|
||||
def load(cls, loc):
|
||||
|
@ -365,7 +367,7 @@ def main(ud_dir, parses_dir, config, corpus, limit=0, use_gpu=-1, vectors_dir=No
|
|||
nlp = load_nlp(paths.lang, config, vectors=vectors_dir)
|
||||
|
||||
docs, golds = read_data(nlp, paths.train.conllu.open(), paths.train.text.open(),
|
||||
max_doc_length=None, limit=limit)
|
||||
max_doc_length=3, limit=limit)
|
||||
|
||||
optimizer = initialize_pipeline(nlp, docs, golds, config, use_gpu)
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user