From 02008e9a55ea0d4a3ac41cb2324d89c9f837abcd Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 23 Sep 2020 22:02:31 +0200 Subject: [PATCH] Update docs [ci skip] --- website/docs/usage/_benchmarks-models.md | 40 +++++++++++------------- website/docs/usage/facts-figures.md | 19 +++++++++++ website/src/widgets/landing.js | 2 +- 3 files changed, 39 insertions(+), 22 deletions(-) diff --git a/website/docs/usage/_benchmarks-models.md b/website/docs/usage/_benchmarks-models.md index c5ce95e2f..1fe6e2bff 100644 --- a/website/docs/usage/_benchmarks-models.md +++ b/website/docs/usage/_benchmarks-models.md @@ -4,21 +4,16 @@ import { Help } from 'components/typography'; import Link from 'components/link'
-| System | Parser | Tagger | NER | WPS
CPU words per second on CPU, higher is better | WPS
GPU words per second on GPU, higher is better | -| ------------------------------------------------------------------------- | ----------------: | ----------------: | ---: | ------------------------------------------------------------------: | -----------------------------------------------------------------: | -| [`en_core_web_trf`](/models/en#en_core_web_trf) (spaCy v3) | | | | | 6k | -| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | | | | | | -| `en_core_web_lg` (spaCy v2) | 91.9 | 97.2 | 85.9 | 10k | | -| [Stanza](https://stanfordnlp.github.io/stanza/) (StanfordNLP)1 | _n/a_2 | _n/a_2 | 88.8 | 234 | 2k | -| Flair | - | 97.9 | 89.3 | | | +| System | Parser | Tagger | NER | WPS
CPU words per second on CPU, higher is better | WPS
GPU words per second on GPU, higher is better | +| ---------------------------------------------------------- | -----: | -----: | ---: | ------------------------------------------------------------------: | -----------------------------------------------------------------: | +| [`en_core_web_trf`](/models/en#en_core_web_trf) (spaCy v3) | | | | | 6k | +| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | | | | | | +| `en_core_web_lg` (spaCy v2) | 91.9 | 97.2 | 85.9 | 10k | |
**Accuracy and speed on the -[OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19) corpus.**
**1. ** -[Qi et al. (2020)](https://arxiv.org/pdf/2003.07082.pdf). **2. ** _Coming soon_: -Qi et al. don't report parsing and tagging results on OntoNotes. We're working -on training Stanza on this corpus to allow direct comparison. +[OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19) corpus.**
@@ -26,19 +21,22 @@ on training Stanza on this corpus to allow direct comparison.
-| System | POS | UAS | LAS | -| ------------------------------------------------------------------------------ | ---: | ---: | ---: | -| spaCy RoBERTa (2020) | 98.0 | 96.8 | 95.0 | -| spaCy CNN (2020) | | | | -| [Mrini et al.](https://khalilmrini.github.io/Label_Attention_Layer.pdf) (2019) | 97.3 | 97.4 | 96.3 | -| [Zhou and Zhao](https://www.aclweb.org/anthology/P19-1230/) (2019) | 97.3 | 97.2 | 95.7 | +| Named Entity Recognition Model | OntoNotes | CoNLL '03 | +| ------------------------------------------------------------------------------ | --------: | --------- | +| spaCy RoBERTa (2020) | +| spaCy CNN (2020) | | +| spaCy CNN (2017) | 86.4 | +| [Stanza](https://stanfordnlp.github.io/stanza/) (StanfordNLP)1 | 88.8 | +| Flair2 | 89.7 |
-**Accuracy on the Penn Treebank.** See -[NLP-progress](http://nlpprogress.com/english/dependency_parsing.html) for more -results. For spaCy's evaluation, see the -[project template](https://github.com/explosion/projects/tree/v3/benchmarks/parsing_penn_treebank). +**Named entity recognition accuracy** on the +[OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19) and +[CoNLL-2003](https://www.aclweb.org/anthology/W03-0419.pdf) corpora. See +[NLP-progress](http://nlpprogress.com/english/named_entity_recognition.html) for +more results. **1. ** [Qi et al. (2020)](https://arxiv.org/pdf/2003.07082.pdf). +**2. ** [Akbik et al. (2018)](https://www.aclweb.org/anthology/C18-1139/)
diff --git a/website/docs/usage/facts-figures.md b/website/docs/usage/facts-figures.md index 75f92070a..ad6776b2c 100644 --- a/website/docs/usage/facts-figures.md +++ b/website/docs/usage/facts-figures.md @@ -61,6 +61,25 @@ import Benchmarks from 'usage/\_benchmarks-models.md' +
+ +| System | UAS | LAS | +| ------------------------------------------------------------------------------ | ---: | ---: | +| spaCy RoBERTa (2020) | 96.8 | 95.0 | +| spaCy CNN (2020) | 93.7 | 91.8 | +| [Mrini et al.](https://khalilmrini.github.io/Label_Attention_Layer.pdf) (2019) | 97.4 | 96.3 | +| [Zhou and Zhao](https://www.aclweb.org/anthology/P19-1230/) (2019) | 97.2 | 95.7 | + +
+ +**Accuracy on the Penn Treebank.** See +[NLP-progress](http://nlpprogress.com/english/dependency_parsing.html) for more +results. + +
+ +
+ The easiest way to reproduce spaCy's benchmarks on the Penn Treebank is to clone diff --git a/website/src/widgets/landing.js b/website/src/widgets/landing.js index 2e75c893a..6fe7f4cdf 100644 --- a/website/src/widgets/landing.js +++ b/website/src/widgets/landing.js @@ -297,7 +297,7 @@ const Landing = ({ data }) => { to run.

- +