diff --git a/website/docs/usage/_benchmarks-models.md b/website/docs/usage/_benchmarks-models.md
index 4e6da9ad8..33b174f75 100644
--- a/website/docs/usage/_benchmarks-models.md
+++ b/website/docs/usage/_benchmarks-models.md
@@ -7,13 +7,14 @@ import { Help } from 'components/typography'; import Link from 'components/link'
| Pipeline | Parser | Tagger | NER | WPS
CPU words per second on CPU, higher is better | WPS
GPU words per second on GPU, higher is better |
| ---------------------------------------------------------- | -----: | -----: | ---: | ------------------------------------------------------------------: | -----------------------------------------------------------------: |
| [`en_core_web_trf`](/models/en#en_core_web_trf) (spaCy v3) | 95.5 | 98.3 | 89.7 | 1k | 8k |
-| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | 92.2 | 97.4 | 85.8 | 7k | |
-| `en_core_web_lg` (spaCy v2) | 91.9 | 97.2 | | 10k | |
+| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | 92.2 | 97.4 | 85.4 | 7k | |
+| `en_core_web_lg` (spaCy v2) | 91.9 | 97.2 | 85.7 | 10k | |
**Full pipeline accuracy and speed** on the
-[OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19) corpus.
+[OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19) corpus (reported on
+the development set).
diff --git a/website/docs/usage/facts-figures.md b/website/docs/usage/facts-figures.md
index c7a7d0525..52696b7dc 100644
--- a/website/docs/usage/facts-figures.md
+++ b/website/docs/usage/facts-figures.md
@@ -10,6 +10,18 @@ menu:
## Comparison {#comparison hidden="true"}
+spaCy is a **free, open-source library** for advanced **Natural Language
+Processing** (NLP) in Python. It's designed specifically for **production use**
+and helps you build applications that process and "understand" large volumes of
+text. It can be used to build information extraction or natural language
+understanding systems.
+
+### Feature overview {#comparison-features}
+
+import Features from 'widgets/features.js'
+
+
+
### When should I use spaCy? {#comparison-usage}
- ✅ **I'm a beginner and just getting started with NLP.** – spaCy makes it easy
diff --git a/website/src/widgets/features.js b/website/src/widgets/features.js
new file mode 100644
index 000000000..73863d5cc
--- /dev/null
+++ b/website/src/widgets/features.js
@@ -0,0 +1,72 @@
+import React from 'react'
+import { graphql, StaticQuery } from 'gatsby'
+
+import { Ul, Li } from '../components/list'
+
+export default () => (
+ {
+ const { counts } = site.siteMetadata
+ return (
+
+ -
+ ✅ Support for {counts.langs}+ languages
+
+ -
+ ✅ {counts.models} trained pipelines for{' '}
+ {counts.modelLangs} languages
+
+ -
+ ✅ Multi-task learning with pretrained transformers like
+ BERT
+
+ -
+ ✅ Pretrained word vectors
+
+ - ✅ State-of-the-art speed
+ -
+ ✅ Production-ready training system
+
+ -
+ ✅ Linguistically-motivated tokenization
+
+ -
+ ✅ Components for named entity recognition, part-of-speech
+ tagging, dependency parsing, sentence segmentation,{' '}
+ text classification, lemmatization, morphological analysis,
+ entity linking and more
+
+ -
+ ✅ Easily extensible with custom components and attributes
+
+ -
+ ✅ Support for custom models in PyTorch,{' '}
+ TensorFlow and other frameworks
+
+ -
+ ✅ Built in visualizers for syntax and NER
+
+ -
+ ✅ Easy model packaging, deployment and workflow management
+
+ - ✅ Robust, rigorously evaluated accuracy
+
+ )
+ }}
+ />
+)
+
+const query = graphql`
+ query FeaturesQuery {
+ site {
+ siteMetadata {
+ counts {
+ langs
+ modelLangs
+ models
+ }
+ }
+ }
+ }
+`
diff --git a/website/src/widgets/landing.js b/website/src/widgets/landing.js
index 46be93ab5..2cee9460f 100644
--- a/website/src/widgets/landing.js
+++ b/website/src/widgets/landing.js
@@ -14,13 +14,13 @@ import {
LandingBanner,
} from '../components/landing'
import { H2 } from '../components/typography'
-import { Ul, Li } from '../components/list'
import { InlineCode } from '../components/code'
import Button from '../components/button'
import Link from '../components/link'
import QuickstartTraining from './quickstart-training'
import Project from './project'
+import Features from './features'
import courseImage from '../../docs/images/course.jpg'
import prodigyImage from '../../docs/images/prodigy_overview.jpg'
import projectsImage from '../../docs/images/projects.png'
@@ -56,7 +56,7 @@ for entity in doc.ents:
}
const Landing = ({ data }) => {
- const { counts, nightly } = data
+ const { nightly } = data
const codeExample = getCodeExample(nightly)
return (
<>
@@ -98,51 +98,7 @@ const Landing = ({ data }) => {
Features
-
- -
- ✅ Support for {counts.langs}+ languages
-
- -
- ✅ {counts.models} trained pipelines for{' '}
- {counts.modelLangs} languages
-
- -
- ✅ Multi-task learning with pretrained transformers{' '}
- like BERT
-
- -
- ✅ Pretrained word vectors
-
- - ✅ State-of-the-art speed
- -
- ✅ Production-ready training system
-
- -
- ✅ Linguistically-motivated tokenization
-
- -
- ✅ Components for named entity recognition,
- part-of-speech tagging, dependency parsing, sentence segmentation,{' '}
- text classification, lemmatization, morphological
- analysis, entity linking and more
-
- -
- ✅ Easily extensible with custom components and
- attributes
-
- -
- ✅ Support for custom models in PyTorch,{' '}
- TensorFlow and other frameworks
-
- -
- ✅ Built in visualizers for syntax and NER
-
- -
- ✅ Easy model packaging, deployment and workflow
- management
-
- - ✅ Robust, rigorously evaluated accuracy
-
+
@@ -333,11 +289,6 @@ const landingQuery = graphql`
siteMetadata {
nightly
repo
- counts {
- langs
- modelLangs
- models
- }
}
}
}