Update alpha languages and add aside for tokenizer dependencies

2025-09-14 08:02:40 +03:00 · 2017-05-03 09:58:31 +02:00 · 2017-05-03 09:58:31 +02:00 · f9384b0fbd
commit f9384b0fbd
parent f0d7a87e18
1 changed files with 7 additions and 6 deletions
--- a/website/docs/api/language-models.jade
+++ b/website/docs/api/language-models.jade
@ -35,14 +35,15 @@ p
    |  Work has started on the following languages. You can help by improving
    |  the existing language data and extending the tokenization patterns.
 +aside("Dependencies")
    |  Some language tokenizers require external dependencies. To use #[strong Chinese],
    |  you need to have #[+a("https://github.com/fxsjy/jieba") Jieba] installed.
    |  The #[strong Japanese] tokenizer requires
    |  #[+a("https://github.com/mocobeta/janome") Janome].
 +table([ "Language", "Source" ])
-    each language, code in { ja: "Japanese", zh: "Chinese", es: "Spanish", it: "Italian", pt: "Portuguese", nl: "Dutch", sv: "Swedish", fi: "Finnish", nb: "Norwegian Bokmål", hu: "Hungarian", bn: "Bengali", he: "Hebrew" }
+    each language, code in { es: "Spanish", it: "Italian", pt: "Portuguese", nl: "Dutch", sv: "Swedish", fi: "Finnish", nb: "Norwegian Bokmål", hu: "Hungarian", bn: "Bengali", he: "Hebrew", zh: "Chinese", ja: "Japanese" }
        +row
            +cell #{language} #[code=code]
            +cell
                +src(gh("spaCy", "spacy/" + code)) spacy/#{code}
 p
    |  Chinese tokenization requires the
    |  #[+a("https://github.com/fxsjy/jieba") Jieba] library. Statistical
    |  models are coming soon.