mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Merge branch 'master' of https://github.com/explosion/spaCy
This commit is contained in:
commit
92ac3af21d
|
@ -5,8 +5,8 @@ spaCy is a library for advanced natural language processing in Python and
|
|||
Cython. spaCy is built on the very latest research, but it isn't researchware.
|
||||
It was designed from day one to be used in real products. spaCy currently supports
|
||||
English and German, as well as tokenization for Chinese, Spanish, Italian, French,
|
||||
Portuguese, Dutch, Swedish, Finnish, Hungarian and Bengali. It's commercial open-source
|
||||
software, released under the MIT license.
|
||||
Portuguese, Dutch, Swedish, Finnish, Hungarian, Bengali and Hebrew. It's commercial
|
||||
open-source software, released under the MIT license.
|
||||
|
||||
💫 **Version 1.7 out now!** `Read the release notes here. <https://github.com/explosion/spaCy/releases/>`_
|
||||
|
||||
|
|
|
@ -9,7 +9,6 @@ from spacy.cli import link as cli_link
|
|||
from spacy.cli import info as cli_info
|
||||
from spacy.cli import package as cli_package
|
||||
from spacy.cli import train as cli_train
|
||||
from spacy.cli import train_config as cli_train_config
|
||||
|
||||
|
||||
class CLI(object):
|
||||
|
@ -77,36 +76,29 @@ class CLI(object):
|
|||
|
||||
|
||||
@plac.annotations(
|
||||
lang=("language", "positional", None, str),
|
||||
output_dir=("output directory", "positional", None, str),
|
||||
train_data=("training data", "positional", None, str),
|
||||
dev_data=("development data", "positional", None, str),
|
||||
lang=("model language", "positional", None, str),
|
||||
output_dir=("output directory to store model in", "positional", None, str),
|
||||
train_data=("location of JSON-formatted training data", "positional", None, str),
|
||||
dev_data=("location of JSON-formatted development data (optional)", "positional", None, str),
|
||||
n_iter=("number of iterations", "option", "n", int),
|
||||
parser_L1=("L1 regularization penalty for parser", "option", "L", float),
|
||||
no_tagger=("Don't train tagger", "flag", "T", bool),
|
||||
no_parser=("Don't train parser", "flag", "P", bool),
|
||||
no_ner=("Don't train NER", "flag", "N", bool)
|
||||
)
|
||||
def train(self, lang, output_dir, train_data, dev_data, n_iter=15,
|
||||
parser_L1=0.0,
|
||||
no_tagger=False, no_parser=False, no_ner=False):
|
||||
"""Train a model."""
|
||||
cli_train(lang, output_dir, train_data, dev_data, n_iter,
|
||||
not no_tagger, not no_parser, not no_ner,
|
||||
parser_L1)
|
||||
def train(self, lang, output_dir, train_data, dev_data=None, n_iter=15,
|
||||
parser_L1=0.0, no_tagger=False, no_parser=False, no_ner=False):
|
||||
"""
|
||||
Train a model. Expects data in spaCy's JSON format.
|
||||
"""
|
||||
|
||||
|
||||
@plac.annotations(
|
||||
config=("config", "positional", None, str),
|
||||
)
|
||||
def train_config(self, config):
|
||||
"""Train a model from config file."""
|
||||
|
||||
cli_train_config(config)
|
||||
cli_train(lang, output_dir, train_data, dev_data, n_iter, not no_tagger,
|
||||
not no_parser, not no_ner, parser_L1)
|
||||
|
||||
|
||||
def __missing__(self, name):
|
||||
print("\n Command %r does not exist\n" % name)
|
||||
print("\n Command %r does not exist."
|
||||
"\n Use the --help flag for a list of available commands.\n" % name)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -79,3 +79,33 @@ p
|
|||
+h(2, "named-entities") Named Entity Recognition
|
||||
|
||||
include _annotation/_named-entities
|
||||
|
||||
+h(2, "json-input") JSON input format for training
|
||||
|
||||
p
|
||||
| spaCy takes training data in the following format:
|
||||
|
||||
+code("Example structure").
|
||||
doc: {
|
||||
id: string,
|
||||
paragraphs: [{
|
||||
raw: string,
|
||||
sents: [int],
|
||||
tokens: [{
|
||||
start: int,
|
||||
tag: string,
|
||||
head: int,
|
||||
dep: string
|
||||
}],
|
||||
ner: [{
|
||||
start: int,
|
||||
end: int,
|
||||
label: string
|
||||
}],
|
||||
brackets: [{
|
||||
start: int,
|
||||
end: int,
|
||||
label: string
|
||||
}]
|
||||
}]
|
||||
}
|
||||
|
|
|
@ -138,3 +138,64 @@ p
|
|||
+cell #[code --help], #[code -h]
|
||||
+cell flag
|
||||
+cell Show help message and available arguments.
|
||||
|
||||
+h(2, "train") Train
|
||||
+tag experimental
|
||||
|
||||
p
|
||||
| Train a model. Expects data in spaCy's
|
||||
| #[+a("/docs/api/annotation#json-input") JSON format].
|
||||
|
||||
+code(false, "bash").
|
||||
python -m spacy train [lang] [output_dir] [train_data] [dev_data] [--n_iter] [--parser_L1] [--no_tagger] [--no_parser] [--no_ner]
|
||||
|
||||
+table(["Argument", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code lang]
|
||||
+cell positional
|
||||
+cell Model language.
|
||||
|
||||
+row
|
||||
+cell #[code output_dir]
|
||||
+cell positional
|
||||
+cell Directory to store model in.
|
||||
|
||||
+row
|
||||
+cell #[code train_data]
|
||||
+cell positional
|
||||
+cell Location of JSON-formatted training data.
|
||||
|
||||
+row
|
||||
+cell #[code dev_data]
|
||||
+cell positional
|
||||
+cell Location of JSON-formatted dev data (optional).
|
||||
|
||||
+row
|
||||
+cell #[code --n_iter], #[code -n]
|
||||
+cell option
|
||||
+cell Number of iterations (default: #[code 15]).
|
||||
|
||||
+row
|
||||
+cell #[code --parser_L1], #[code -L]
|
||||
+cell option
|
||||
+cell L1 regularization penalty for parser (default: #[code 0.0]).
|
||||
|
||||
+row
|
||||
+cell #[code --no_tagger], #[code -T]
|
||||
+cell flag
|
||||
+cell Don't train tagger.
|
||||
|
||||
+row
|
||||
+cell #[code --no_parser], #[code -P]
|
||||
+cell flag
|
||||
+cell Don't train parser.
|
||||
|
||||
+row
|
||||
+cell #[code --no_ner], #[code -N]
|
||||
+cell flag
|
||||
+cell Don't train NER.
|
||||
|
||||
+row
|
||||
+cell #[code --help], #[code -h]
|
||||
+cell flag
|
||||
+cell Show help message and available arguments.
|
||||
|
|
Loading…
Reference in New Issue
Block a user