This commit is contained in:
Matthew Honnibal 2017-11-08 12:11:31 +01:00
commit fa7fdd0d9b
7 changed files with 54 additions and 19 deletions

View File

@ -101,6 +101,6 @@ mark both statements:
| Name | Abhinav Sharma | | Name | Abhinav Sharma |
| Company name (if applicable) | Fourtek I.T. Solutions Pvt. Ltd. | | Company name (if applicable) | Fourtek I.T. Solutions Pvt. Ltd. |
| Title or role (if applicable) | Machine Learning Engineer | | Title or role (if applicable) | Machine Learning Engineer |
| Date | 3 Novermber 2017 | | Date | 3 November 2017 |
| GitHub username | abhi18av | | GitHub username | abhi18av |
| Website (optional) | https://abhi18av.github.io/ | | Website (optional) | https://abhi18av.github.io/ |

22
spacy/lang/hi/examples.py Normal file
View File

@ -0,0 +1,22 @@
# coding: utf8
from __future__ import unicode_literals
"""
Example sentences to test spaCy and its language models.
>>> from spacy.lang.hi.examples import sentences
>>> docs = nlp.pipe(sentences)
"""
sentences = [
"एप्पल 1 अरब डॉलर के लिए यू.के. स्टार्टअप खरीदने पर विचार कर रहा है",
     "स्वायत्त कार निर्माताओं की ओर बीमा दायित्व रखती है",
     "सैन फ्रांसिस्को फुटवे डिलीवरी रोबोटों पर प्रतिबंध लगाने का विचार कर रहा है",
     "लंदन यूनाइटेड किंगडम का बड़ा शहर है।",
     "आप कहाँ हैं?",
     "फ्रांस के राष्ट्रपति कौन हैं?",
     "संयुक्त राज्य की राजधानी क्या है?",
     "बराक ओबामा का जन्म हुआ था?"
]

View File

@ -83,7 +83,7 @@
], ],
"V_CSS": "2.0.0", "V_CSS": "2.0.0",
"V_JS": "2.0.0", "V_JS": "2.0.1",
"DEFAULT_SYNTAX": "python", "DEFAULT_SYNTAX": "python",
"ANALYTICS": "UA-58931649-1", "ANALYTICS": "UA-58931649-1",
"MAILCHIMP": { "MAILCHIMP": {

View File

@ -141,7 +141,6 @@ export class ModelLoader {
if (!accuracy && !speed) return; if (!accuracy && !speed) return;
this.renderTable(tpl, 'parser', accuracy, val => val.toFixed(2)); this.renderTable(tpl, 'parser', accuracy, val => val.toFixed(2));
this.renderTable(tpl, 'ner', accuracy, val => val.toFixed(2)); this.renderTable(tpl, 'ner', accuracy, val => val.toFixed(2));
this.renderTable(tpl, 'speed', speed, Math.round);
tpl.get('benchmarks').hidden = false; tpl.get('benchmarks').hidden = false;
} }
@ -327,7 +326,6 @@ export class ModelComparer {
const allKeys = [].concat(...Object.entries(this.benchKeys).map(([_, v]) => Object.keys(v))); const allKeys = [].concat(...Object.entries(this.benchKeys).map(([_, v]) => Object.keys(v)));
for (let key of allKeys) { for (let key of allKeys) {
if (accuracy[key]) this.tpl.fill(`${key}${i}`, accuracy[key].toFixed(2)) if (accuracy[key]) this.tpl.fill(`${key}${i}`, accuracy[key].toFixed(2))
else if (speed[key]) this.tpl.fill(`${key}${i}`, convertNumber(Math.round(speed[key])))
else this.tpl.fill(`${key}${i}`, 'n/a') else this.tpl.fill(`${key}${i}`, 'n/a')
} }
} }

View File

@ -40,7 +40,7 @@
}, },
"MODELS": { "MODELS": {
"en": ["en_core_web_sm", "en_core_web_lg", "en_vectors_web_lg"], "en": ["en_core_web_sm", "en_core_web_md", "en_core_web_lg", "en_vectors_web_lg"],
"de": ["de_core_news_sm"], "de": ["de_core_news_sm"],
"es": ["es_core_news_sm", "es_core_news_md"], "es": ["es_core_news_sm", "es_core_news_md"],
"pt": ["pt_core_news_sm"], "pt": ["pt_core_news_sm"],
@ -86,8 +86,7 @@
"MODEL_BENCHMARKS": { "MODEL_BENCHMARKS": {
"parser": { "uas": "UAS", "las": "LAS", "tags_acc": "POS" }, "parser": { "uas": "UAS", "las": "LAS", "tags_acc": "POS" },
"ner": { "ents_f": "NER F", "ents_p": "NER P", "ents_r": "NER R" }, "ner": { "ents_f": "NER F", "ents_p": "NER P", "ents_r": "NER R" }
"speed": { "nwords": "Words", "cpu": "w/s CPU", "gpu": "w/s GPU" }
}, },
"LANGUAGES": { "LANGUAGES": {

View File

@ -19,18 +19,28 @@ p
+table(["Model", "spaCy", "Type", "UAS", "NER F", "POS", "WPS", "Size"]) +table(["Model", "spaCy", "Type", "UAS", "NER F", "POS", "WPS", "Size"])
+row +row
+cell #[+a("/models/en#en_core_web_sm") #[code en_core_web_sm]] 2.0.0 +cell #[+a("/models/en#en_core_web_sm") #[code en_core_web_sm]] 2.0.0
each data in ["2.x", "neural"] +cell("num") 2.x
+cell("num")=data +cell neural
+cell("num") 91.7 +cell("num") 91.7
+cell("num") 85.3 +cell("num") 85.3
+cell("num") 97.0 +cell("num") 97.0
+cell("num") 10.1k +cell("num") 10.1k
+cell("num") #[strong 35MB] +cell("num") #[strong 35MB]
+row
+cell #[+a("/models/en#en_core_web_md") #[code en_core_web_md]] 2.0.0
+cell("num") 2.x
+cell neural
+cell("num") 91.7
+cell("num") #[strong 85.9]
+cell("num") 97.1
+cell("num") 10.0k
+cell("num") 115MB
+row +row
+cell #[+a("/models/en#en_core_web_lg") #[code en_core_web_lg]] 2.0.0 +cell #[+a("/models/en#en_core_web_lg") #[code en_core_web_lg]] 2.0.0
each data in ["2.x", "neural"] +cell("num") 2.x
+cell("num")=data +cell neural
+cell("num") #[strong 91.9] +cell("num") #[strong 91.9]
+cell("num") #[strong 85.9] +cell("num") #[strong 85.9]
+cell("num") #[strong 97.2] +cell("num") #[strong 97.2]
@ -39,15 +49,23 @@ p
+row("divider") +row("divider")
+cell #[code en_core_web_sm] 1.2.0 +cell #[code en_core_web_sm] 1.2.0
each data in ["1.x", "linear", 86.6, 78.5, 96.6] +cell("num") 1.x
+cell("num")=data +cell linear
+cell("num") 86.6
+cell("num") 78.5
+cell("num") 96.6
+cell("num") #[strong 25.7k] +cell("num") #[strong 25.7k]
+cell("num") 50MB +cell("num") 50MB
+row +row
+cell #[code en_core_web_md] 1.2.1 +cell #[code en_core_web_md] 1.2.1
each data in ["1.x", "linear", 90.6, 81.4, 96.7, "18.8k", "1GB"] +cell("num") 1.x
+cell("num")=data +cell linear
+cell("num") 90.6
+cell("num") 81.4
+cell("num") 96.7
+cell("num") 18.8k
+cell("num") 1GB
+h(4, "benchmarks-models-spanish") Spanish +h(4, "benchmarks-models-spanish") Spanish

View File

@ -31,10 +31,8 @@ p
+badge("https://anaconda.org/conda-forge/spacy/badges/version.svg", "https://anaconda.org/conda-forge/spacy") +badge("https://anaconda.org/conda-forge/spacy/badges/version.svg", "https://anaconda.org/conda-forge/spacy")
+infobox("Important note", "⚠️") +infobox("Important note", "⚠️")
| We're still waiting for spaCy v2.0 to | We're still waiting for spaCy v2.0 to go live on #[code conda-forge],
| #[+a("https://github.com/conda-forge/spacy-feedstock/pulls") go live] | as there's currently a backlog of OSX builds on Travis.
| on #[code conda-forge], as there's currently a significant
| #[+a("https://www.traviscistatus.com/") backlog] of OSX builds on Travis.
| In the meantime, you can already try out the new version using pip. The | In the meantime, you can already try out the new version using pip. The
| conda download will follow as soon as possible. | conda download will follow as soon as possible.