From 1b0d413e45e1495d58a6756f4f1dad82953a9bb6 Mon Sep 17 00:00:00 2001 From: vincent d warmerdam Date: Fri, 5 Mar 2021 14:31:15 +0100 Subject: [PATCH 1/3] Removed Languages that were listed twice on Docs (#7272) * removed languages that were listed twice * sorted * d0h * the d0h strikes back when you dont hit save --- website/meta/languages.json | 478 ++++++++++++++++++++++++++++-------- 1 file changed, 369 insertions(+), 109 deletions(-) diff --git a/website/meta/languages.json b/website/meta/languages.json index 579dca9fe..e05718047 100644 --- a/website/meta/languages.json +++ b/website/meta/languages.json @@ -1,85 +1,201 @@ { "languages": [ - { "code": "af", "name": "Afrikaans" }, - { "code": "ar", "name": "Arabic", "example": "هذه جملة", "has_examples": true }, - { "code": "bg", "name": "Bulgarian", "example": "Това е изречение", "has_examples": true }, - { "code": "bn", "name": "Bengali", "has_examples": true }, - { "code": "ca", "name": "Catalan", "example": "Això és una frase.", "has_examples": true }, - { "code": "cs", "name": "Czech", "has_examples": true }, + { + "code": "af", + "name": "Afrikaans" + }, + { + "code": "ar", + "name": "Arabic", + "example": "هذه جملة", + "has_examples": true + }, + { + "code": "bg", + "name": "Bulgarian", + "example": "Това е изречение", + "has_examples": true + }, + { + "code": "bn", + "name": "Bengali", + "has_examples": true + }, + { + "code": "ca", + "name": "Catalan", + "example": "Això és una frase.", + "has_examples": true + }, + { + "code": "cs", + "name": "Czech", + "has_examples": true + }, { "code": "da", "name": "Danish", "example": "Dette er en sætning.", "has_examples": true, - "models": ["da_core_news_sm", "da_core_news_md", "da_core_news_lg"] + "models": [ + "da_core_news_sm", + "da_core_news_md", + "da_core_news_lg" + ] }, { "code": "de", "name": "German", - "models": ["de_core_news_sm", "de_core_news_md", "de_core_news_lg", "de_dep_news_trf"], + "models": [ + "de_core_news_sm", + "de_core_news_md", + "de_core_news_lg", + "de_dep_news_trf" + ], "example": "Dies ist ein Satz.", "has_examples": true }, { "code": "el", "name": "Greek", - "models": ["el_core_news_sm", "el_core_news_md", "el_core_news_lg"], + "models": [ + "el_core_news_sm", + "el_core_news_md", + "el_core_news_lg" + ], "example": "Αυτή είναι μια πρόταση.", "has_examples": true }, { "code": "en", "name": "English", - "models": ["en_core_web_sm", "en_core_web_md", "en_core_web_lg", "en_core_web_trf"], + "models": [ + "en_core_web_sm", + "en_core_web_md", + "en_core_web_lg", + "en_core_web_trf" + ], "example": "This is a sentence.", "has_examples": true }, { "code": "es", "name": "Spanish", - "models": ["es_core_news_sm", "es_core_news_md", "es_core_news_lg", "es_dep_news_trf"], + "models": [ + "es_core_news_sm", + "es_core_news_md", + "es_core_news_lg", + "es_dep_news_trf" + ], "example": "Esto es una frase.", "has_examples": true }, - { "code": "et", "name": "Estonian" }, - { "code": "eu", "name": "Basque", "has_examples": true }, - { "code": "fa", "name": "Persian", "has_examples": true }, - { "code": "fi", "name": "Finnish", "has_examples": true }, + { + "code": "et", + "name": "Estonian" + }, + { + "code": "eu", + "name": "Basque", + "has_examples": true + }, + { + "code": "fa", + "name": "Persian", + "has_examples": true + }, + { + "code": "fi", + "name": "Finnish", + "has_examples": true + }, { "code": "fr", "name": "French", - "models": ["fr_core_news_sm", "fr_core_news_md", "fr_core_news_lg", "fr_dep_news_trf"], + "models": [ + "fr_core_news_sm", + "fr_core_news_md", + "fr_core_news_lg", + "fr_dep_news_trf" + ], "example": "C'est une phrase.", "has_examples": true }, - { "code": "ga", "name": "Irish" }, - { "code": "gu", "name": "Gujarati", "has_examples": true }, - { "code": "he", "name": "Hebrew", "example": "זהו משפט.", "has_examples": true }, - { "code": "hi", "name": "Hindi", "example": "यह एक वाक्य है।", "has_examples": true }, - { "code": "hr", "name": "Croatian", "has_examples": true }, - { "code": "hu", "name": "Hungarian", "example": "Ez egy mondat.", "has_examples": true }, - { "code": "hy", "name": "Armenian", "has_examples": true }, + { + "code": "ga", + "name": "Irish" + }, + { + "code": "gu", + "name": "Gujarati", + "has_examples": true + }, + { + "code": "he", + "name": "Hebrew", + "example": "זהו משפט.", + "has_examples": true + }, + { + "code": "hi", + "name": "Hindi", + "example": "यह एक वाक्य है।", + "has_examples": true + }, + { + "code": "hr", + "name": "Croatian", + "has_examples": true + }, + { + "code": "hu", + "name": "Hungarian", + "example": "Ez egy mondat.", + "has_examples": true + }, + { + "code": "hy", + "name": "Armenian", + "has_examples": true + }, { "code": "id", "name": "Indonesian", "example": "Ini adalah sebuah kalimat.", "has_examples": true }, - { "code": "is", "name": "Icelandic" }, + { + "code": "is", + "name": "Icelandic" + }, { "code": "it", "name": "Italian", - "models": ["it_core_news_sm", "it_core_news_md", "it_core_news_lg"], + "models": [ + "it_core_news_sm", + "it_core_news_md", + "it_core_news_lg" + ], "example": "Questa è una frase.", "has_examples": true }, { "code": "ja", "name": "Japanese", - "models": ["ja_core_news_sm", "ja_core_news_md", "ja_core_news_lg"], + "models": [ + "ja_core_news_sm", + "ja_core_news_md", + "ja_core_news_lg" + ], "dependencies": [ - { "name": "Unidic", "url": "http://unidic.ninjal.ac.jp/back_number#unidic_cwj" }, - { "name": "Mecab", "url": "https://github.com/taku910/mecab" }, + { + "name": "Unidic", + "url": "http://unidic.ninjal.ac.jp/back_number#unidic_cwj" + }, + { + "name": "Mecab", + "url": "https://github.com/taku910/mecab" + }, { "name": "SudachiPy", "url": "https://github.com/WorksApplications/SudachiPy" @@ -88,7 +204,11 @@ "example": "これは文章です。", "has_examples": true }, - { "code": "kn", "name": "Kannada", "has_examples": true }, + { + "code": "kn", + "name": "Kannada", + "has_examples": true + }, { "code": "ko", "name": "Korean", @@ -97,8 +217,14 @@ "name": "mecab-ko", "url": "https://bitbucket.org/eunjeon/mecab-ko/src/master/README.md" }, - { "name": "mecab-ko-dic", "url": "https://bitbucket.org/eunjeon/mecab-ko-dic" }, - { "name": "natto-py", "url": "https://github.com/buruzaemon/natto-py" } + { + "name": "mecab-ko-dic", + "url": "https://bitbucket.org/eunjeon/mecab-ko-dic" + }, + { + "name": "natto-py", + "url": "https://github.com/buruzaemon/natto-py" + } ], "example": "이것은 문장입니다.", "has_examples": true @@ -109,7 +235,11 @@ "example": "Адамга эң кыйыны — күн сайын адам болуу", "has_examples": true }, - { "code": "lb", "name": "Luxembourgish", "has_examples": true }, + { + "code": "lb", + "name": "Luxembourgish", + "has_examples": true + }, { "code": "lij", "name": "Ligurian", @@ -120,29 +250,53 @@ "code": "lt", "name": "Lithuanian", "has_examples": true, - "models": ["lt_core_news_sm", "lt_core_news_md", "lt_core_news_lg"] + "models": [ + "lt_core_news_sm", + "lt_core_news_md", + "lt_core_news_lg" + ] + }, + { + "code": "lv", + "name": "Latvian" }, - { "code": "lv", "name": "Latvian" }, { "code": "mk", - "name": "Macedonian", - "has_examples": false, - "models": ["mk_core_news_sm", "mk_core_news_md", "mk_core_news_lg"] + "name": "Macedonian" + }, + { + "code": "ml", + "name": "Malayalam", + "has_examples": true + }, + { + "code": "mr", + "name": "Marathi" }, - { "code": "ml", "name": "Malayalam", "has_examples": true }, - { "code": "mr", "name": "Marathi" }, { "code": "nb", "name": "Norwegian Bokmål", "example": "Dette er en setning.", "has_examples": true, - "models": ["nb_core_news_sm", "nb_core_news_md", "nb_core_news_lg"] + "models": [ + "nb_core_news_sm", + "nb_core_news_md", + "nb_core_news_lg" + ] + }, + { + "code": "ne", + "name": "Nepali", + "has_examples": true }, - { "code": "ne", "name": "Nepali", "has_examples": true }, { "code": "nl", "name": "Dutch", - "models": ["nl_core_news_sm", "nl_core_news_md", "nl_core_news_lg"], + "models": [ + "nl_core_news_sm", + "nl_core_news_md", + "nl_core_news_lg" + ], "example": "Dit is een zin.", "has_examples": true }, @@ -151,12 +305,20 @@ "name": "Polish", "example": "To jest zdanie.", "has_examples": true, - "models": ["pl_core_news_sm", "pl_core_news_md", "pl_core_news_lg"] + "models": [ + "pl_core_news_sm", + "pl_core_news_md", + "pl_core_news_lg" + ] }, { "code": "pt", "name": "Portuguese", - "models": ["pt_core_news_sm", "pt_core_news_md", "pt_core_news_lg"], + "models": [ + "pt_core_news_sm", + "pt_core_news_md", + "pt_core_news_lg" + ], "example": "Esta é uma frase.", "has_examples": true }, @@ -165,95 +327,157 @@ "name": "Romanian", "example": "Aceasta este o propoziție.", "has_examples": true, - "models": ["ro_core_news_sm", "ro_core_news_md", "ro_core_news_lg"] + "models": [ + "ro_core_news_sm", + "ro_core_news_md", + "ro_core_news_lg" + ] }, { "code": "ru", "name": "Russian", "has_examples": true, - "dependencies": [{ "name": "pymorphy2", "url": "https://github.com/kmike/pymorphy2" }], - "models": ["ru_core_news_sm", "ru_core_news_md", "ru_core_news_lg"] + "dependencies": [ + { + "name": "pymorphy2", + "url": "https://github.com/kmike/pymorphy2" + } + ], + "models": [ + "ru_core_news_sm", + "ru_core_news_md", + "ru_core_news_lg" + ] + }, + { + "code": "sa", + "name": "Sanskrit", + "has_examples": true + }, + { + "code": "si", + "name": "Sinhala", + "example": "මෙය වාක්‍යයකි.", + "has_examples": true + }, + { + "code": "sk", + "name": "Slovak", + "has_examples": true + }, + { + "code": "sl", + "name": "Slovenian" }, - { "code": "sa", "name": "Sanskrit", "has_examples": true }, - { "code": "si", "name": "Sinhala", "example": "මෙය වාක්‍යයකි.", "has_examples": true }, - { "code": "sk", "name": "Slovak", "has_examples": true }, - { "code": "sl", "name": "Slovenian" }, { "code": "sq", "name": "Albanian", "example": "Kjo është një fjali.", "has_examples": true }, - { "code": "sr", "name": "Serbian", "has_examples": true }, - { "code": "sv", "name": "Swedish", "has_examples": true }, - { "code": "ta", "name": "Tamil", "has_examples": true }, - { "code": "te", "name": "Telugu", "example": "ఇది ఒక వాక్యం.", "has_examples": true }, + { + "code": "sr", + "name": "Serbian", + "has_examples": true + }, + { + "code": "sv", + "name": "Swedish", + "has_examples": true + }, + { + "code": "ta", + "name": "Tamil", + "has_examples": true + }, + { + "code": "te", + "name": "Telugu", + "example": "ఇది ఒక వాక్యం.", + "has_examples": true + }, { "code": "th", "name": "Thai", "dependencies": [ - { "name": "pythainlp", "url": "https://github.com/wannaphongcom/pythainlp" } + { + "name": "pythainlp", + "url": "https://github.com/wannaphongcom/pythainlp" + } ], "example": "นี่คือประโยค", "has_examples": true }, - { "code": "tl", "name": "Tagalog" }, - { "code": "tn", "name": "Setswana", "has_examples": true }, - { "code": "tr", "name": "Turkish", "example": "Bu bir cümledir.", "has_examples": true }, - { "code": "tt", "name": "Tatar", "has_examples": true }, + { + "code": "tl", + "name": "Tagalog" + }, + { + "code": "tn", + "name": "Setswana", + "has_examples": true + }, + { + "code": "tr", + "name": "Turkish", + "example": "Bu bir cümledir.", + "has_examples": true + }, + { + "code": "tt", + "name": "Tatar", + "has_examples": true + }, { "code": "uk", "name": "Ukrainian", "has_examples": true, - "dependencies": [{ "name": "pymorphy2", "url": "https://github.com/kmike/pymorphy2" }] + "dependencies": [ + { + "name": "pymorphy2", + "url": "https://github.com/kmike/pymorphy2" + } + ] + }, + { + "code": "ur", + "name": "Urdu", + "example": "یہ ایک جملہ ہے", + "has_examples": true }, - { "code": "ur", "name": "Urdu", "example": "یہ ایک جملہ ہے", "has_examples": true }, { "code": "vi", "name": "Vietnamese", - "dependencies": [{ "name": "Pyvi", "url": "https://github.com/trungtv/pyvi" }] - }, - { - "code": "lij", - "name": "Ligurian", - "example": "Sta chì a l'é unna fraxe.", - "has_examples": true - }, - { - "code": "hy", - "name": "Armenian", - "has_examples": true - }, - { - "code": "gu", - "name": "Gujarati", - "has_examples": true - }, - { - "code": "ml", - "name": "Malayalam", - "has_examples": true - }, - { - "code": "ne", - "name": "Nepali", - "has_examples": true - }, - { - "code": "mk", - "name": "Macedonian" + "dependencies": [ + { + "name": "Pyvi", + "url": "https://github.com/trungtv/pyvi" + } + ] }, { "code": "xx", "name": "Multi-language", - "models": ["xx_ent_wiki_sm", "xx_sent_ud_sm"], + "models": [ + "xx_ent_wiki_sm", + "xx_sent_ud_sm" + ], "example": "This is a sentence about Facebook." }, - { "code": "yo", "name": "Yoruba", "has_examples": true }, + { + "code": "yo", + "name": "Yoruba", + "has_examples": true + }, { "code": "zh", "name": "Chinese", - "models": ["zh_core_web_sm", "zh_core_web_md", "zh_core_web_lg", "zh_core_web_trf"], + "models": [ + "zh_core_web_sm", + "zh_core_web_md", + "zh_core_web_lg", + "zh_core_web_trf" + ], "dependencies": [ { "name": "Jieba", @@ -268,21 +492,57 @@ } ], "licenses": [ - { "id": "CC BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, - { "id": "CC BY-SA", "url": "https://creativecommons.org/licenses/by-sa/3.0/" }, - { "id": "CC BY-SA 3.0", "url": "https://creativecommons.org/licenses/by-sa/3.0/" }, - { "id": "CC BY-SA 4.0", "url": "https://creativecommons.org/licenses/by-sa/4.0/" }, - { "id": "CC BY-NC", "url": "https://creativecommons.org/licenses/by-nc/3.0/" }, - { "id": "CC BY-NC 3.0", "url": "https://creativecommons.org/licenses/by-nc/3.0/" }, - { "id": "CC BY-NC 4.0", "url": "https://creativecommons.org/licenses/by-nc/4.0/" }, - { "id": "CC-BY-NC-SA 3.0", "url": "https://creativecommons.org/licenses/by-nc-sa/3.0/" }, - { "id": "GPL", "url": "https://www.gnu.org/licenses/gpl.html" }, - { "id": "GPU GPL 3.0", "url": "https://www.gnu.org/licenses/gpl-3.0.en.html" }, - { "id": "LGPL", "url": "https://www.gnu.org/licenses/lgpl.html" }, - { "id": "MIT", "url": "https://opensource.org/licenses/MIT" }, + { + "id": "CC BY 4.0", + "url": "https://creativecommons.org/licenses/by/4.0/" + }, + { + "id": "CC BY-SA", + "url": "https://creativecommons.org/licenses/by-sa/3.0/" + }, + { + "id": "CC BY-SA 3.0", + "url": "https://creativecommons.org/licenses/by-sa/3.0/" + }, + { + "id": "CC BY-SA 4.0", + "url": "https://creativecommons.org/licenses/by-sa/4.0/" + }, + { + "id": "CC BY-NC", + "url": "https://creativecommons.org/licenses/by-nc/3.0/" + }, + { + "id": "CC BY-NC 3.0", + "url": "https://creativecommons.org/licenses/by-nc/3.0/" + }, + { + "id": "CC BY-NC 4.0", + "url": "https://creativecommons.org/licenses/by-nc/4.0/" + }, + { + "id": "CC-BY-NC-SA 3.0", + "url": "https://creativecommons.org/licenses/by-nc-sa/3.0/" + }, + { + "id": "GPL", + "url": "https://www.gnu.org/licenses/gpl.html" + }, + { + "id": "GPU GPL 3.0", + "url": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + { + "id": "LGPL", + "url": "https://www.gnu.org/licenses/lgpl.html" + }, + { + "id": "MIT", + "url": "https://opensource.org/licenses/MIT" + }, { "id": "LGPL-LR", "url": "https://github.com/UniversalDependencies/UD_French-Sequoia/blob/master/LICENSE.txt" } ] -} +} \ No newline at end of file From 7d085d5b1c7a8efc017e1fc41735b222e776cf13 Mon Sep 17 00:00:00 2001 From: graue70 <23035329+graue70@users.noreply.github.com> Date: Fri, 5 Mar 2021 18:30:09 +0100 Subject: [PATCH 2/3] Fix typo in docs --- website/docs/usage/processing-pipelines.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/usage/processing-pipelines.md b/website/docs/usage/processing-pipelines.md index b9824ea04..0058d40dc 100644 --- a/website/docs/usage/processing-pipelines.md +++ b/website/docs/usage/processing-pipelines.md @@ -1273,7 +1273,7 @@ loss is calculated and to add evaluation scores to the training output. | [`update`](/api/pipe#update) | Learn from a batch of [`Example`](/api/example) objects containing the predictions and gold-standard annotations, and update the component's model. | | [`initialize`](/api/pipe#initialize) | Initialize the model. Typically calls into [`Model.initialize`](https://thinc.ai/docs/api-model#initialize) and can be passed custom arguments via the [`[initialize]`](/api/data-formats#config-initialize) config block that are only loaded during training or when you call [`nlp.initialize`](/api/language#initialize), not at runtime. | | [`get_loss`](/api/pipe#get_loss) | Return a tuple of the loss and the gradient for a batch of [`Example`](/api/example) objects. | -| [`score`](/api/pipe#score) | Score a batch of [`Example`](/api/example) objects and return a dictionary of scores. The [`@Language.factory`](/api/language#factory) decorator can define the `default_socre_weights` of the component to decide which keys of the scores to display during training and how they count towards the final score. | +| [`score`](/api/pipe#score) | Score a batch of [`Example`](/api/example) objects and return a dictionary of scores. The [`@Language.factory`](/api/language#factory) decorator can define the `default_score_weights` of the component to decide which keys of the scores to display during training and how they count towards the final score. | From dfb23a419ee0410f5ef0ce8ebd8b031cd5790e2d Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 6 Mar 2021 17:38:54 +1100 Subject: [PATCH 3/3] =?UTF-8?q?Merge=20branch=20'spacy.io'=C2=A0[ci=20skip?= =?UTF-8?q?]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- website/docs/usage/v2-1.md | 2 +- website/docs/usage/v2-3.md | 33 ++++++++++++++++----------------- website/docs/usage/v2.md | 2 +- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/website/docs/usage/v2-1.md b/website/docs/usage/v2-1.md index 8d310f1a4..500e43803 100644 --- a/website/docs/usage/v2-1.md +++ b/website/docs/usage/v2-1.md @@ -180,7 +180,7 @@ entirely **in Markdown**, without having to compromise on easy-to-use custom UI components. We're hoping that the Markdown source will make it even easier to contribute to the documentation. For more details, check out the [styleguide](/styleguide) and -[source](https://github.com/explosion/spaCy/tree/master/website). While +[source](https://github.com/explosion/spacy/tree/v2.x/website). While converting the pages to Markdown, we've also fixed a bunch of typos, improved the existing pages and added some new content: diff --git a/website/docs/usage/v2-3.md b/website/docs/usage/v2-3.md index b6c4d7dfb..075e1ce81 100644 --- a/website/docs/usage/v2-3.md +++ b/website/docs/usage/v2-3.md @@ -161,8 +161,8 @@ debugging your tokenizer configuration. spaCy's custom warnings have been replaced with native Python [`warnings`](https://docs.python.org/3/library/warnings.html). Instead of -setting `SPACY_WARNING_IGNORE`, use the [`warnings` -filters](https://docs.python.org/3/library/warnings.html#the-warnings-filter) +setting `SPACY_WARNING_IGNORE`, use the +[`warnings` filters](https://docs.python.org/3/library/warnings.html#the-warnings-filter) to manage warnings. ```diff @@ -176,7 +176,7 @@ import spacy #### Normalization tables The normalization tables have moved from the language data in -[`spacy/lang`](https://github.com/explosion/spaCy/tree/master/spacy/lang) to the +[`spacy/lang`](https://github.com/explosion/spacy/tree/v2.x/spacy/lang) to the package [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data). If you're adding data for a new language, the normalization table should be added to `spacy-lookups-data`. See @@ -190,8 +190,8 @@ lexemes will be added to the vocab automatically, just as in small models without vectors. To see the number of unique vectors and number of words with vectors, see -`nlp.meta['vectors']`, for example for `en_core_web_md` there are `20000` -unique vectors and `684830` words with vectors: +`nlp.meta['vectors']`, for example for `en_core_web_md` there are `20000` unique +vectors and `684830` words with vectors: ```python { @@ -210,8 +210,8 @@ for orth in nlp.vocab.vectors: _ = nlp.vocab[orth] ``` -If your workflow previously iterated over `nlp.vocab`, a similar alternative -is to iterate over words with vectors instead: +If your workflow previously iterated over `nlp.vocab`, a similar alternative is +to iterate over words with vectors instead: ```diff - lexemes = [w for w in nlp.vocab] @@ -220,9 +220,9 @@ is to iterate over words with vectors instead: Be aware that the set of preloaded lexemes in a v2.2 model is not equivalent to the set of words with vectors. For English, v2.2 `md/lg` models have 1.3M -provided lexemes but only 685K words with vectors. The vectors have been -updated for most languages in v2.2, but the English models contain the same -vectors for both v2.2 and v2.3. +provided lexemes but only 685K words with vectors. The vectors have been updated +for most languages in v2.2, but the English models contain the same vectors for +both v2.2 and v2.3. #### Lexeme.is_oov and Token.is_oov @@ -234,8 +234,7 @@ fixed in the next patch release v2.3.1. In v2.3, `Lexeme.is_oov` and `Token.is_oov` are `True` if the lexeme does not -have a word vector. This is equivalent to `token.orth not in -nlp.vocab.vectors`. +have a word vector. This is equivalent to `token.orth not in nlp.vocab.vectors`. Previously in v2.2, `is_oov` corresponded to whether a lexeme had stored probability and cluster features. The probability and cluster features are no @@ -270,8 +269,8 @@ as part of the model vocab. To load the probability table into a provided model, first make sure you have `spacy-lookups-data` installed. To load the table, remove the empty provided -`lexeme_prob` table and then access `Lexeme.prob` for any word to load the -table from `spacy-lookups-data`: +`lexeme_prob` table and then access `Lexeme.prob` for any word to load the table +from `spacy-lookups-data`: ```diff + # prerequisite: pip install spacy-lookups-data @@ -321,9 +320,9 @@ the [train CLI](/api/cli#train), you can use the new `--tag-map-path` option to provide in the tag map as a JSON dict. If you want to export a tag map from a provided model for use with the train -CLI, you can save it as a JSON dict. To only use string keys as required by -JSON and to make it easier to read and edit, any internal integer IDs need to -be converted back to strings: +CLI, you can save it as a JSON dict. To only use string keys as required by JSON +and to make it easier to read and edit, any internal integer IDs need to be +converted back to strings: ```python import spacy diff --git a/website/docs/usage/v2.md b/website/docs/usage/v2.md index aee3c24a6..210565c11 100644 --- a/website/docs/usage/v2.md +++ b/website/docs/usage/v2.md @@ -303,7 +303,7 @@ lookup-based lemmatization – and **many new languages**! **API:** [`Language`](/api/language) **Code:** -[`spacy/lang`](https://github.com/explosion/spaCy/tree/master/spacy/lang) +[`spacy/lang`](https://github.com/explosion/spacy/tree/v2.x/spacy/lang) **Usage:** [Adding languages](/usage/adding-languages)