From 84edade82d83129afe88a6df7748aab551619c38 Mon Sep 17 00:00:00 2001 From: Abhinav Sharma Date: Wed, 8 Nov 2017 13:23:08 +0530 Subject: [PATCH 1/5] Create examples.py Populated the file with the translations of English example sentences --- spacy/lang/hi/examples.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 spacy/lang/hi/examples.py diff --git a/spacy/lang/hi/examples.py b/spacy/lang/hi/examples.py new file mode 100644 index 000000000..df7e21a99 --- /dev/null +++ b/spacy/lang/hi/examples.py @@ -0,0 +1,22 @@ + # coding: utf8 +from __future__ import unicode_literals + + +""" +Example sentences to test spaCy and its language models. + +>>> from spacy.lang.en.examples import sentences +>>> docs = nlp.pipe(sentences) +""" + + +sentences = [ + "एप्पल 1 अरब डॉलर के लिए यू.के. स्टार्टअप खरीदने पर विचार कर रहा है", +     "स्वायत्त कार निर्माताओं की ओर बीमा दायित्व रखती है", +     "सैन फ्रांसिस्को फुटवे डिलीवरी रोबोटों पर प्रतिबंध लगाने का विचार कर रहा है", +     "लंदन यूनाइटेड किंगडम का बड़ा शहर है।", +     "आप कहाँ हैं?", +     "फ्रांस के राष्ट्रपति कौन हैं?", +     "संयुक्त राज्य की राजधानी क्या है?", +     "बराक ओबामा का जन्म हुआ था?" +] From c9c4aaec44478879b81111fc17ad36d6257cdc0c Mon Sep 17 00:00:00 2001 From: Abhinav Sharma Date: Wed, 8 Nov 2017 13:33:15 +0530 Subject: [PATCH 2/5] corrected a typo --- .github/CONTRIBUTOR_AGREEMENT.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CONTRIBUTOR_AGREEMENT.md b/.github/CONTRIBUTOR_AGREEMENT.md index 919fb81fc..02f31d6d3 100644 --- a/.github/CONTRIBUTOR_AGREEMENT.md +++ b/.github/CONTRIBUTOR_AGREEMENT.md @@ -101,6 +101,6 @@ mark both statements: | Name | Abhinav Sharma | | Company name (if applicable) | Fourtek I.T. Solutions Pvt. Ltd. | | Title or role (if applicable) | Machine Learning Engineer | -| Date | 3 Novermber 2017 | +| Date | 3 Novmber 2017 | | GitHub username | abhi18av | | Website (optional) | https://abhi18av.github.io/ | From d097b34059994b3fdd8aa94565d692fe080b98ae Mon Sep 17 00:00:00 2001 From: Abhinav Sharma Date: Wed, 8 Nov 2017 14:16:04 +0530 Subject: [PATCH 3/5] Update CONTRIBUTOR_AGREEMENT.md --- .github/CONTRIBUTOR_AGREEMENT.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CONTRIBUTOR_AGREEMENT.md b/.github/CONTRIBUTOR_AGREEMENT.md index 02f31d6d3..71a6671e9 100644 --- a/.github/CONTRIBUTOR_AGREEMENT.md +++ b/.github/CONTRIBUTOR_AGREEMENT.md @@ -101,6 +101,6 @@ mark both statements: | Name | Abhinav Sharma | | Company name (if applicable) | Fourtek I.T. Solutions Pvt. Ltd. | | Title or role (if applicable) | Machine Learning Engineer | -| Date | 3 Novmber 2017 | +| Date | 3 November 2017 | | GitHub username | abhi18av | | Website (optional) | https://abhi18av.github.io/ | From 42b241ccd02961f215b42733bc55f6c4010b0d2c Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 8 Nov 2017 11:36:38 +0100 Subject: [PATCH 4/5] Update language code in usage example in comment --- spacy/lang/hi/examples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/lang/hi/examples.py b/spacy/lang/hi/examples.py index df7e21a99..651a76e80 100644 --- a/spacy/lang/hi/examples.py +++ b/spacy/lang/hi/examples.py @@ -5,7 +5,7 @@ from __future__ import unicode_literals """ Example sentences to test spaCy and its language models. ->>> from spacy.lang.en.examples import sentences +>>> from spacy.lang.hi.examples import sentences >>> docs = nlp.pipe(sentences) """ From 94cd3d51dbdae3cf718f3843a55b536554e9b799 Mon Sep 17 00:00:00 2001 From: ines Date: Wed, 8 Nov 2017 11:43:00 +0100 Subject: [PATCH 5/5] Update v2 docs and model info Take out speed tables until we fix our benchmark tests on CPU and GPU --- website/_harp.json | 2 +- website/assets/js/models.js | 2 -- website/models/_data.json | 5 ++- .../_facts-figures/_benchmarks-models.jade | 34 ++++++++++++++----- website/usage/_install/_instructions.jade | 6 ++-- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/website/_harp.json b/website/_harp.json index 1ed20ffe0..7edad1b0c 100644 --- a/website/_harp.json +++ b/website/_harp.json @@ -83,7 +83,7 @@ ], "V_CSS": "2.0.0", - "V_JS": "2.0.0", + "V_JS": "2.0.1", "DEFAULT_SYNTAX": "python", "ANALYTICS": "UA-58931649-1", "MAILCHIMP": { diff --git a/website/assets/js/models.js b/website/assets/js/models.js index 57e8f84ae..30fed344e 100644 --- a/website/assets/js/models.js +++ b/website/assets/js/models.js @@ -141,7 +141,6 @@ export class ModelLoader { if (!accuracy && !speed) return; this.renderTable(tpl, 'parser', accuracy, val => val.toFixed(2)); this.renderTable(tpl, 'ner', accuracy, val => val.toFixed(2)); - this.renderTable(tpl, 'speed', speed, Math.round); tpl.get('benchmarks').hidden = false; } @@ -327,7 +326,6 @@ export class ModelComparer { const allKeys = [].concat(...Object.entries(this.benchKeys).map(([_, v]) => Object.keys(v))); for (let key of allKeys) { if (accuracy[key]) this.tpl.fill(`${key}${i}`, accuracy[key].toFixed(2)) - else if (speed[key]) this.tpl.fill(`${key}${i}`, convertNumber(Math.round(speed[key]))) else this.tpl.fill(`${key}${i}`, 'n/a') } } diff --git a/website/models/_data.json b/website/models/_data.json index 3c4fb5bf2..a477747df 100644 --- a/website/models/_data.json +++ b/website/models/_data.json @@ -40,7 +40,7 @@ }, "MODELS": { - "en": ["en_core_web_sm", "en_core_web_lg", "en_vectors_web_lg"], + "en": ["en_core_web_sm", "en_core_web_md", "en_core_web_lg", "en_vectors_web_lg"], "de": ["de_core_news_sm"], "es": ["es_core_news_sm", "es_core_news_md"], "pt": ["pt_core_news_sm"], @@ -86,8 +86,7 @@ "MODEL_BENCHMARKS": { "parser": { "uas": "UAS", "las": "LAS", "tags_acc": "POS" }, - "ner": { "ents_f": "NER F", "ents_p": "NER P", "ents_r": "NER R" }, - "speed": { "nwords": "Words", "cpu": "w/s CPU", "gpu": "w/s GPU" } + "ner": { "ents_f": "NER F", "ents_p": "NER P", "ents_r": "NER R" } }, "LANGUAGES": { diff --git a/website/usage/_facts-figures/_benchmarks-models.jade b/website/usage/_facts-figures/_benchmarks-models.jade index 4c5481f1c..e62e5fc79 100644 --- a/website/usage/_facts-figures/_benchmarks-models.jade +++ b/website/usage/_facts-figures/_benchmarks-models.jade @@ -19,18 +19,28 @@ p +table(["Model", "spaCy", "Type", "UAS", "NER F", "POS", "WPS", "Size"]) +row +cell #[+a("/models/en#en_core_web_sm") #[code en_core_web_sm]] 2.0.0 - each data in ["2.x", "neural"] - +cell("num")=data + +cell("num") 2.x + +cell neural +cell("num") 91.7 +cell("num") 85.3 +cell("num") 97.0 +cell("num") 10.1k +cell("num") #[strong 35MB] + +row + +cell #[+a("/models/en#en_core_web_md") #[code en_core_web_md]] 2.0.0 + +cell("num") 2.x + +cell neural + +cell("num") 91.7 + +cell("num") #[strong 85.9] + +cell("num") 97.1 + +cell("num") 10.0k + +cell("num") 115MB + +row +cell #[+a("/models/en#en_core_web_lg") #[code en_core_web_lg]] 2.0.0 - each data in ["2.x", "neural"] - +cell("num")=data + +cell("num") 2.x + +cell neural +cell("num") #[strong 91.9] +cell("num") #[strong 85.9] +cell("num") #[strong 97.2] @@ -39,15 +49,23 @@ p +row("divider") +cell #[code en_core_web_sm] 1.2.0 - each data in ["1.x", "linear", 86.6, 78.5, 96.6] - +cell("num")=data + +cell("num") 1.x + +cell linear + +cell("num") 86.6 + +cell("num") 78.5 + +cell("num") 96.6 +cell("num") #[strong 25.7k] +cell("num") 50MB +row +cell #[code en_core_web_md] 1.2.1 - each data in ["1.x", "linear", 90.6, 81.4, 96.7, "18.8k", "1GB"] - +cell("num")=data + +cell("num") 1.x + +cell linear + +cell("num") 90.6 + +cell("num") 81.4 + +cell("num") 96.7 + +cell("num") 18.8k + +cell("num") 1GB +h(4, "benchmarks-models-spanish") Spanish diff --git a/website/usage/_install/_instructions.jade b/website/usage/_install/_instructions.jade index 7e22554d2..18cc53612 100644 --- a/website/usage/_install/_instructions.jade +++ b/website/usage/_install/_instructions.jade @@ -31,10 +31,8 @@ p +badge("https://anaconda.org/conda-forge/spacy/badges/version.svg", "https://anaconda.org/conda-forge/spacy") +infobox("Important note", "⚠️") - | We're still waiting for spaCy v2.0 to - | #[+a("https://github.com/conda-forge/spacy-feedstock/pulls") go live] - | on #[code conda-forge], as there's currently a significant - | #[+a("https://www.traviscistatus.com/") backlog] of OSX builds on Travis. + | We're still waiting for spaCy v2.0 to go live on #[code conda-forge], + | as there's currently a backlog of OSX builds on Travis. | In the meantime, you can already try out the new version using pip. The | conda download will follow as soon as possible.