mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Merge branch 'master' of https://github.com/explosion/spaCy
This commit is contained in:
commit
fa7fdd0d9b
2
.github/CONTRIBUTOR_AGREEMENT.md
vendored
2
.github/CONTRIBUTOR_AGREEMENT.md
vendored
|
@ -101,6 +101,6 @@ mark both statements:
|
||||||
| Name | Abhinav Sharma |
|
| Name | Abhinav Sharma |
|
||||||
| Company name (if applicable) | Fourtek I.T. Solutions Pvt. Ltd. |
|
| Company name (if applicable) | Fourtek I.T. Solutions Pvt. Ltd. |
|
||||||
| Title or role (if applicable) | Machine Learning Engineer |
|
| Title or role (if applicable) | Machine Learning Engineer |
|
||||||
| Date | 3 Novermber 2017 |
|
| Date | 3 November 2017 |
|
||||||
| GitHub username | abhi18av |
|
| GitHub username | abhi18av |
|
||||||
| Website (optional) | https://abhi18av.github.io/ |
|
| Website (optional) | https://abhi18av.github.io/ |
|
||||||
|
|
22
spacy/lang/hi/examples.py
Normal file
22
spacy/lang/hi/examples.py
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Example sentences to test spaCy and its language models.
|
||||||
|
|
||||||
|
>>> from spacy.lang.hi.examples import sentences
|
||||||
|
>>> docs = nlp.pipe(sentences)
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
sentences = [
|
||||||
|
"एप्पल 1 अरब डॉलर के लिए यू.के. स्टार्टअप खरीदने पर विचार कर रहा है",
|
||||||
|
"स्वायत्त कार निर्माताओं की ओर बीमा दायित्व रखती है",
|
||||||
|
"सैन फ्रांसिस्को फुटवे डिलीवरी रोबोटों पर प्रतिबंध लगाने का विचार कर रहा है",
|
||||||
|
"लंदन यूनाइटेड किंगडम का बड़ा शहर है।",
|
||||||
|
"आप कहाँ हैं?",
|
||||||
|
"फ्रांस के राष्ट्रपति कौन हैं?",
|
||||||
|
"संयुक्त राज्य की राजधानी क्या है?",
|
||||||
|
"बराक ओबामा का जन्म हुआ था?"
|
||||||
|
]
|
|
@ -83,7 +83,7 @@
|
||||||
],
|
],
|
||||||
|
|
||||||
"V_CSS": "2.0.0",
|
"V_CSS": "2.0.0",
|
||||||
"V_JS": "2.0.0",
|
"V_JS": "2.0.1",
|
||||||
"DEFAULT_SYNTAX": "python",
|
"DEFAULT_SYNTAX": "python",
|
||||||
"ANALYTICS": "UA-58931649-1",
|
"ANALYTICS": "UA-58931649-1",
|
||||||
"MAILCHIMP": {
|
"MAILCHIMP": {
|
||||||
|
|
|
@ -141,7 +141,6 @@ export class ModelLoader {
|
||||||
if (!accuracy && !speed) return;
|
if (!accuracy && !speed) return;
|
||||||
this.renderTable(tpl, 'parser', accuracy, val => val.toFixed(2));
|
this.renderTable(tpl, 'parser', accuracy, val => val.toFixed(2));
|
||||||
this.renderTable(tpl, 'ner', accuracy, val => val.toFixed(2));
|
this.renderTable(tpl, 'ner', accuracy, val => val.toFixed(2));
|
||||||
this.renderTable(tpl, 'speed', speed, Math.round);
|
|
||||||
tpl.get('benchmarks').hidden = false;
|
tpl.get('benchmarks').hidden = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -327,7 +326,6 @@ export class ModelComparer {
|
||||||
const allKeys = [].concat(...Object.entries(this.benchKeys).map(([_, v]) => Object.keys(v)));
|
const allKeys = [].concat(...Object.entries(this.benchKeys).map(([_, v]) => Object.keys(v)));
|
||||||
for (let key of allKeys) {
|
for (let key of allKeys) {
|
||||||
if (accuracy[key]) this.tpl.fill(`${key}${i}`, accuracy[key].toFixed(2))
|
if (accuracy[key]) this.tpl.fill(`${key}${i}`, accuracy[key].toFixed(2))
|
||||||
else if (speed[key]) this.tpl.fill(`${key}${i}`, convertNumber(Math.round(speed[key])))
|
|
||||||
else this.tpl.fill(`${key}${i}`, 'n/a')
|
else this.tpl.fill(`${key}${i}`, 'n/a')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,7 +40,7 @@
|
||||||
},
|
},
|
||||||
|
|
||||||
"MODELS": {
|
"MODELS": {
|
||||||
"en": ["en_core_web_sm", "en_core_web_lg", "en_vectors_web_lg"],
|
"en": ["en_core_web_sm", "en_core_web_md", "en_core_web_lg", "en_vectors_web_lg"],
|
||||||
"de": ["de_core_news_sm"],
|
"de": ["de_core_news_sm"],
|
||||||
"es": ["es_core_news_sm", "es_core_news_md"],
|
"es": ["es_core_news_sm", "es_core_news_md"],
|
||||||
"pt": ["pt_core_news_sm"],
|
"pt": ["pt_core_news_sm"],
|
||||||
|
@ -86,8 +86,7 @@
|
||||||
|
|
||||||
"MODEL_BENCHMARKS": {
|
"MODEL_BENCHMARKS": {
|
||||||
"parser": { "uas": "UAS", "las": "LAS", "tags_acc": "POS" },
|
"parser": { "uas": "UAS", "las": "LAS", "tags_acc": "POS" },
|
||||||
"ner": { "ents_f": "NER F", "ents_p": "NER P", "ents_r": "NER R" },
|
"ner": { "ents_f": "NER F", "ents_p": "NER P", "ents_r": "NER R" }
|
||||||
"speed": { "nwords": "Words", "cpu": "w/s CPU", "gpu": "w/s GPU" }
|
|
||||||
},
|
},
|
||||||
|
|
||||||
"LANGUAGES": {
|
"LANGUAGES": {
|
||||||
|
|
|
@ -19,18 +19,28 @@ p
|
||||||
+table(["Model", "spaCy", "Type", "UAS", "NER F", "POS", "WPS", "Size"])
|
+table(["Model", "spaCy", "Type", "UAS", "NER F", "POS", "WPS", "Size"])
|
||||||
+row
|
+row
|
||||||
+cell #[+a("/models/en#en_core_web_sm") #[code en_core_web_sm]] 2.0.0
|
+cell #[+a("/models/en#en_core_web_sm") #[code en_core_web_sm]] 2.0.0
|
||||||
each data in ["2.x", "neural"]
|
+cell("num") 2.x
|
||||||
+cell("num")=data
|
+cell neural
|
||||||
+cell("num") 91.7
|
+cell("num") 91.7
|
||||||
+cell("num") 85.3
|
+cell("num") 85.3
|
||||||
+cell("num") 97.0
|
+cell("num") 97.0
|
||||||
+cell("num") 10.1k
|
+cell("num") 10.1k
|
||||||
+cell("num") #[strong 35MB]
|
+cell("num") #[strong 35MB]
|
||||||
|
|
||||||
|
+row
|
||||||
|
+cell #[+a("/models/en#en_core_web_md") #[code en_core_web_md]] 2.0.0
|
||||||
|
+cell("num") 2.x
|
||||||
|
+cell neural
|
||||||
|
+cell("num") 91.7
|
||||||
|
+cell("num") #[strong 85.9]
|
||||||
|
+cell("num") 97.1
|
||||||
|
+cell("num") 10.0k
|
||||||
|
+cell("num") 115MB
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[+a("/models/en#en_core_web_lg") #[code en_core_web_lg]] 2.0.0
|
+cell #[+a("/models/en#en_core_web_lg") #[code en_core_web_lg]] 2.0.0
|
||||||
each data in ["2.x", "neural"]
|
+cell("num") 2.x
|
||||||
+cell("num")=data
|
+cell neural
|
||||||
+cell("num") #[strong 91.9]
|
+cell("num") #[strong 91.9]
|
||||||
+cell("num") #[strong 85.9]
|
+cell("num") #[strong 85.9]
|
||||||
+cell("num") #[strong 97.2]
|
+cell("num") #[strong 97.2]
|
||||||
|
@ -39,15 +49,23 @@ p
|
||||||
|
|
||||||
+row("divider")
|
+row("divider")
|
||||||
+cell #[code en_core_web_sm] 1.2.0
|
+cell #[code en_core_web_sm] 1.2.0
|
||||||
each data in ["1.x", "linear", 86.6, 78.5, 96.6]
|
+cell("num") 1.x
|
||||||
+cell("num")=data
|
+cell linear
|
||||||
|
+cell("num") 86.6
|
||||||
|
+cell("num") 78.5
|
||||||
|
+cell("num") 96.6
|
||||||
+cell("num") #[strong 25.7k]
|
+cell("num") #[strong 25.7k]
|
||||||
+cell("num") 50MB
|
+cell("num") 50MB
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code en_core_web_md] 1.2.1
|
+cell #[code en_core_web_md] 1.2.1
|
||||||
each data in ["1.x", "linear", 90.6, 81.4, 96.7, "18.8k", "1GB"]
|
+cell("num") 1.x
|
||||||
+cell("num")=data
|
+cell linear
|
||||||
|
+cell("num") 90.6
|
||||||
|
+cell("num") 81.4
|
||||||
|
+cell("num") 96.7
|
||||||
|
+cell("num") 18.8k
|
||||||
|
+cell("num") 1GB
|
||||||
|
|
||||||
+h(4, "benchmarks-models-spanish") Spanish
|
+h(4, "benchmarks-models-spanish") Spanish
|
||||||
|
|
||||||
|
|
|
@ -31,10 +31,8 @@ p
|
||||||
+badge("https://anaconda.org/conda-forge/spacy/badges/version.svg", "https://anaconda.org/conda-forge/spacy")
|
+badge("https://anaconda.org/conda-forge/spacy/badges/version.svg", "https://anaconda.org/conda-forge/spacy")
|
||||||
|
|
||||||
+infobox("Important note", "⚠️")
|
+infobox("Important note", "⚠️")
|
||||||
| We're still waiting for spaCy v2.0 to
|
| We're still waiting for spaCy v2.0 to go live on #[code conda-forge],
|
||||||
| #[+a("https://github.com/conda-forge/spacy-feedstock/pulls") go live]
|
| as there's currently a backlog of OSX builds on Travis.
|
||||||
| on #[code conda-forge], as there's currently a significant
|
|
||||||
| #[+a("https://www.traviscistatus.com/") backlog] of OSX builds on Travis.
|
|
||||||
| In the meantime, you can already try out the new version using pip. The
|
| In the meantime, you can already try out the new version using pip. The
|
||||||
| conda download will follow as soon as possible.
|
| conda download will follow as soon as possible.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user