mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 09:56:28 +03:00
Merge branch 'master' of https://github.com/explosion/spaCy
This commit is contained in:
commit
c9ec24b257
|
@ -62,10 +62,10 @@ def train_model(Language, train_data, dev_data, output_path, tagger_cfg, parser_
|
||||||
for itn, epoch in enumerate(trainer.epochs(n_iter, augment_data=None)):
|
for itn, epoch in enumerate(trainer.epochs(n_iter, augment_data=None)):
|
||||||
for doc, gold in epoch:
|
for doc, gold in epoch:
|
||||||
trainer.update(doc, gold)
|
trainer.update(doc, gold)
|
||||||
dev_scores = trainer.evaluate(dev_data) if dev_data else []
|
dev_scores = trainer.evaluate(dev_data).scores if dev_data else {}
|
||||||
print_progress(itn, trainer.nlp.parser.model.nr_weight,
|
print_progress(itn, trainer.nlp.parser.model.nr_weight,
|
||||||
trainer.nlp.parser.model.nr_active_feat,
|
trainer.nlp.parser.model.nr_active_feat,
|
||||||
**dev_scores.scores)
|
**dev_scores)
|
||||||
|
|
||||||
|
|
||||||
def evaluate(Language, gold_tuples, output_path):
|
def evaluate(Language, gold_tuples, output_path):
|
||||||
|
|
|
@ -98,6 +98,17 @@ p
|
||||||
| so that Python functions can be used to help you generalise and combine
|
| so that Python functions can be used to help you generalise and combine
|
||||||
| the data as you require.
|
| the data as you require.
|
||||||
|
|
||||||
|
+infobox("For languages with non-latin characters")
|
||||||
|
| In order for the tokenizer to split suffixes, prefixes and infixes, spaCy
|
||||||
|
| needs to know the language's character set. If the language you're adding
|
||||||
|
| uses non-latin characters, you might need to add the required character
|
||||||
|
| classes to the global
|
||||||
|
| #[+src(gh("spacy", "spacy/language_data/punctuation.py")) punctuation.py].
|
||||||
|
| spaCy uses the #[+a("https://pypi.python.org/pypi/regex/") #[code regex] library]
|
||||||
|
| to keep this simple and readable. If the language requires very specific
|
||||||
|
| punctuation rules, you should consider overwriting the default regular
|
||||||
|
| expressions with your own in the language's #[code Defaults].
|
||||||
|
|
||||||
+h(3, "stop-words") Stop words
|
+h(3, "stop-words") Stop words
|
||||||
|
|
||||||
p
|
p
|
||||||
|
|
Loading…
Reference in New Issue
Block a user