diff --git a/website/docs/usage/_benchmarks-models.md b/website/docs/usage/_benchmarks-models.md index 4e6da9ad8..33b174f75 100644 --- a/website/docs/usage/_benchmarks-models.md +++ b/website/docs/usage/_benchmarks-models.md @@ -7,13 +7,14 @@ import { Help } from 'components/typography'; import Link from 'components/link' | Pipeline | Parser | Tagger | NER | WPS
CPU words per second on CPU, higher is better | WPS
GPU words per second on GPU, higher is better | | ---------------------------------------------------------- | -----: | -----: | ---: | ------------------------------------------------------------------: | -----------------------------------------------------------------: | | [`en_core_web_trf`](/models/en#en_core_web_trf) (spaCy v3) | 95.5 | 98.3 | 89.7 | 1k | 8k | -| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | 92.2 | 97.4 | 85.8 | 7k | | -| `en_core_web_lg` (spaCy v2) | 91.9 | 97.2 | | 10k | | +| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | 92.2 | 97.4 | 85.4 | 7k | | +| `en_core_web_lg` (spaCy v2) | 91.9 | 97.2 | 85.7 | 10k | |
**Full pipeline accuracy and speed** on the -[OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19) corpus. +[OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19) corpus (reported on +the development set).
diff --git a/website/docs/usage/facts-figures.md b/website/docs/usage/facts-figures.md index c7a7d0525..52696b7dc 100644 --- a/website/docs/usage/facts-figures.md +++ b/website/docs/usage/facts-figures.md @@ -10,6 +10,18 @@ menu: ## Comparison {#comparison hidden="true"} +spaCy is a **free, open-source library** for advanced **Natural Language +Processing** (NLP) in Python. It's designed specifically for **production use** +and helps you build applications that process and "understand" large volumes of +text. It can be used to build information extraction or natural language +understanding systems. + +### Feature overview {#comparison-features} + +import Features from 'widgets/features.js' + + + ### When should I use spaCy? {#comparison-usage} - ✅ **I'm a beginner and just getting started with NLP.** – spaCy makes it easy diff --git a/website/src/widgets/features.js b/website/src/widgets/features.js new file mode 100644 index 000000000..73863d5cc --- /dev/null +++ b/website/src/widgets/features.js @@ -0,0 +1,72 @@ +import React from 'react' +import { graphql, StaticQuery } from 'gatsby' + +import { Ul, Li } from '../components/list' + +export default () => ( + { + const { counts } = site.siteMetadata + return ( + + ) + }} + /> +) + +const query = graphql` + query FeaturesQuery { + site { + siteMetadata { + counts { + langs + modelLangs + models + } + } + } + } +` diff --git a/website/src/widgets/landing.js b/website/src/widgets/landing.js index 46be93ab5..2cee9460f 100644 --- a/website/src/widgets/landing.js +++ b/website/src/widgets/landing.js @@ -14,13 +14,13 @@ import { LandingBanner, } from '../components/landing' import { H2 } from '../components/typography' -import { Ul, Li } from '../components/list' import { InlineCode } from '../components/code' import Button from '../components/button' import Link from '../components/link' import QuickstartTraining from './quickstart-training' import Project from './project' +import Features from './features' import courseImage from '../../docs/images/course.jpg' import prodigyImage from '../../docs/images/prodigy_overview.jpg' import projectsImage from '../../docs/images/projects.png' @@ -56,7 +56,7 @@ for entity in doc.ents: } const Landing = ({ data }) => { - const { counts, nightly } = data + const { nightly } = data const codeExample = getCodeExample(nightly) return ( <> @@ -98,51 +98,7 @@ const Landing = ({ data }) => {

Features

-
    -
  • - ✅ Support for {counts.langs}+ languages -
  • -
  • - ✅ {counts.models} trained pipelines for{' '} - {counts.modelLangs} languages -
  • -
  • - ✅ Multi-task learning with pretrained transformers{' '} - like BERT -
  • -
  • - ✅ Pretrained word vectors -
  • -
  • ✅ State-of-the-art speed
  • -
  • - ✅ Production-ready training system -
  • -
  • - ✅ Linguistically-motivated tokenization -
  • -
  • - ✅ Components for named entity recognition, - part-of-speech tagging, dependency parsing, sentence segmentation,{' '} - text classification, lemmatization, morphological - analysis, entity linking and more -
  • -
  • - ✅ Easily extensible with custom components and - attributes -
  • -
  • - ✅ Support for custom models in PyTorch,{' '} - TensorFlow and other frameworks -
  • -
  • - ✅ Built in visualizers for syntax and NER -
  • -
  • - ✅ Easy model packaging, deployment and workflow - management -
  • -
  • ✅ Robust, rigorously evaluated accuracy
  • -
+
@@ -333,11 +289,6 @@ const landingQuery = graphql` siteMetadata { nightly repo - counts { - langs - modelLangs - models - } } } }