Update CLI docs [ci skip]

2026-01-03 15:33:33 +03:00 · 2019-09-28 13:12:30 +02:00 · 2019-09-28 13:12:30 +02:00 · f8d1e2f214
commit f8d1e2f214
parent 22b9e12159
2 changed files with 6 additions and 9 deletions
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -40,12 +40,7 @@ from .. import about
    parser_multitasks=("Side objectives for parser CNN, e.g. 'dep' or 'dep,tag'", "option", "pt", str),
    entity_multitasks=("Side objectives for NER CNN, e.g. 'dep' or 'dep,tag'", "option", "et", str),
    noise_level=("Amount of corruption for data augmentation", "option", "nl", float),
-    orth_variant_level=(
-        "Amount of orthography variation for data augmentation",
-        "option",
-        "ovl",
-        float,
-    ),
+    orth_variant_level=("Amount of orthography variation for data augmentation", "option", "ovl", float),
    eval_beam_widths=("Beam widths to evaluate, e.g. 4,8", "option", "bw", str),
    gold_preproc=("Use gold preprocessing", "flag", "G", bool),
    learn_tokens=("Make parser learn gold-standard tokenization", "flag", "T", bool),
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@ -181,6 +181,7 @@ All output files generated by this command are compatible with
 | `conll`, `conllu`, `conllubio` | Universal Dependencies `.conllu` or `.conll` format.                                                                                                                                                                                                                                                                                                                                           |
 | `ner`                          | NER with IOB/IOB2 tags, one token per line with columns separated by whitespace. The first column is the token and the final column is the IOB tag. Sentences are separated by blank lines and documents are separated by the line `-DOCSTART- -X- O O`. Supports CoNLL 2003 NER format. See [sample data](https://github.com/explosion/spaCy/tree/master/examples/training/ner_example_data). |
 | `iob`                          | NER with IOB/IOB2 tags, one sentence per line with tokens separated by whitespace and annotation separated by `|`, either `word|B-ENT` or `word|POS|B-ENT`. See [sample data](https://github.com/explosion/spaCy/tree/master/examples/training/ner_example_data).                                                                                                                              |
+
 ## Debug data {#debug-data new="2.2"}

 Analyze, debug and validate your training and development data, get useful
@ -363,8 +364,8 @@ $ python -m spacy train [lang] [output_path] [train_path] [dev_path]
 [--base-model] [--pipeline] [--vectors] [--n-iter] [--n-early-stopping]
 [--n-examples] [--use-gpu] [--version] [--meta-path] [--init-tok2vec]
 [--parser-multitasks] [--entity-multitasks] [--gold-preproc] [--noise-level]
-[--learn-tokens] [--textcat-arch] [--textcat-multilabel] [--textcat-positive-label]
-[--verbose]
+[--orth-variant-level] [--learn-tokens] [--textcat-arch] [--textcat-multilabel]
+[--textcat-positive-label] [--verbose]
 ```

 | Argument                                                        | Type          | Description                                                                                                                                                       |
@ -386,6 +387,7 @@ $ python -m spacy train [lang] [output_path] [train_path] [dev_path]
 | `--parser-multitasks`, `-pt`                                    | option        | Side objectives for parser CNN, e.g. `'dep'` or `'dep,tag'`                                                                                                       |
 | `--entity-multitasks`, `-et`                                    | option        | Side objectives for NER CNN, e.g. `'dep'` or `'dep,tag'`                                                                                                          |
 | `--noise-level`, `-nl`                                          | option        | Float indicating the amount of corruption for data augmentation.                                                                                                  |
+| `--orth-variant-level`, `-ovl` <Tag variant="new">2.2</Tag>     | option        | Float indicating the orthography variation for data augmentation (e.g. `0.3` for making 30% of occurrences of some tokens subject to replacement).                |
 | `--gold-preproc`, `-G`                                          | flag          | Use gold preprocessing.                                                                                                                                           |
 | `--learn-tokens`, `-T`                                          | flag          | Make parser learn gold-standard tokenization by merging ] subtokens. Typically used for languages like Chinese.                                                   |
 | `--textcat-multilabel`, `-TML` <Tag variant="new">2.2</Tag>     | flag          | Text classification classes aren't mutually exclusive (multilabel).                                                                                               |