Update docs [ci skip]

This commit is contained in:
Ines Montani 2020-10-15 11:16:06 +02:00
parent 4e17ddf75e
commit 4fa869e6f7
4 changed files with 91 additions and 55 deletions

View File

@ -7,13 +7,14 @@ import { Help } from 'components/typography'; import Link from 'components/link'
| Pipeline | Parser | Tagger | NER | WPS<br />CPU <Help>words per second on CPU, higher is better</Help> | WPS<br/>GPU <Help>words per second on GPU, higher is better</Help> |
| ---------------------------------------------------------- | -----: | -----: | ---: | ------------------------------------------------------------------: | -----------------------------------------------------------------: |
| [`en_core_web_trf`](/models/en#en_core_web_trf) (spaCy v3) | 95.5 | 98.3 | 89.7 | 1k | 8k |
| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | 92.2 | 97.4 | 85.8 | 7k | |
| `en_core_web_lg` (spaCy v2) | 91.9 | 97.2 | | 10k | |
| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | 92.2 | 97.4 | 85.4 | 7k | |
| `en_core_web_lg` (spaCy v2) | 91.9 | 97.2 | 85.7 | 10k | |
<figcaption class="caption">
**Full pipeline accuracy and speed** on the
[OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19) corpus.
[OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19) corpus (reported on
the development set).
</figcaption>

View File

@ -10,6 +10,18 @@ menu:
## Comparison {#comparison hidden="true"}
spaCy is a **free, open-source library** for advanced **Natural Language
Processing** (NLP) in Python. It's designed specifically for **production use**
and helps you build applications that process and "understand" large volumes of
text. It can be used to build information extraction or natural language
understanding systems.
### Feature overview {#comparison-features}
import Features from 'widgets/features.js'
<Features />
### When should I use spaCy? {#comparison-usage}
- ✅ **I'm a beginner and just getting started with NLP.** spaCy makes it easy

View File

@ -0,0 +1,72 @@
import React from 'react'
import { graphql, StaticQuery } from 'gatsby'
import { Ul, Li } from '../components/list'
export default () => (
<StaticQuery
query={query}
render={({ site }) => {
const { counts } = site.siteMetadata
return (
<Ul>
<Li>
Support for <strong>{counts.langs}+ languages</strong>
</Li>
<Li>
<strong>{counts.models} trained pipelines</strong> for{' '}
{counts.modelLangs} languages
</Li>
<Li>
Multi-task learning with pretrained <strong>transformers</strong> like
BERT
</Li>
<Li>
Pretrained <strong>word vectors</strong>
</Li>
<Li> State-of-the-art speed</Li>
<Li>
Production-ready <strong>training system</strong>
</Li>
<Li>
Linguistically-motivated <strong>tokenization</strong>
</Li>
<Li>
Components for <strong>named entity</strong> recognition, part-of-speech
tagging, dependency parsing, sentence segmentation,{' '}
<strong>text classification</strong>, lemmatization, morphological analysis,
entity linking and more
</Li>
<Li>
Easily extensible with <strong>custom components</strong> and attributes
</Li>
<Li>
Support for custom models in <strong>PyTorch</strong>,{' '}
<strong>TensorFlow</strong> and other frameworks
</Li>
<Li>
Built in <strong>visualizers</strong> for syntax and NER
</Li>
<Li>
Easy <strong>model packaging</strong>, deployment and workflow management
</Li>
<Li> Robust, rigorously evaluated accuracy</Li>
</Ul>
)
}}
/>
)
const query = graphql`
query FeaturesQuery {
site {
siteMetadata {
counts {
langs
modelLangs
models
}
}
}
}
`

View File

@ -14,13 +14,13 @@ import {
LandingBanner,
} from '../components/landing'
import { H2 } from '../components/typography'
import { Ul, Li } from '../components/list'
import { InlineCode } from '../components/code'
import Button from '../components/button'
import Link from '../components/link'
import QuickstartTraining from './quickstart-training'
import Project from './project'
import Features from './features'
import courseImage from '../../docs/images/course.jpg'
import prodigyImage from '../../docs/images/prodigy_overview.jpg'
import projectsImage from '../../docs/images/projects.png'
@ -56,7 +56,7 @@ for entity in doc.ents:
}
const Landing = ({ data }) => {
const { counts, nightly } = data
const { nightly } = data
const codeExample = getCodeExample(nightly)
return (
<>
@ -98,51 +98,7 @@ const Landing = ({ data }) => {
<LandingCol>
<H2>Features</H2>
<Ul>
<Li>
Support for <strong>{counts.langs}+ languages</strong>
</Li>
<Li>
<strong>{counts.models} trained pipelines</strong> for{' '}
{counts.modelLangs} languages
</Li>
<Li>
Multi-task learning with pretrained <strong>transformers</strong>{' '}
like BERT
</Li>
<Li>
Pretrained <strong>word vectors</strong>
</Li>
<Li> State-of-the-art speed</Li>
<Li>
Production-ready <strong>training system</strong>
</Li>
<Li>
Linguistically-motivated <strong>tokenization</strong>
</Li>
<Li>
Components for <strong>named entity</strong> recognition,
part-of-speech tagging, dependency parsing, sentence segmentation,{' '}
<strong>text classification</strong>, lemmatization, morphological
analysis, entity linking and more
</Li>
<Li>
Easily extensible with <strong>custom components</strong> and
attributes
</Li>
<Li>
Support for custom models in <strong>PyTorch</strong>,{' '}
<strong>TensorFlow</strong> and other frameworks
</Li>
<Li>
Built in <strong>visualizers</strong> for syntax and NER
</Li>
<Li>
Easy <strong>model packaging</strong>, deployment and workflow
management
</Li>
<Li> Robust, rigorously evaluated accuracy</Li>
</Ul>
<Features />
</LandingCol>
</LandingGrid>
@ -333,11 +289,6 @@ const landingQuery = graphql`
siteMetadata {
nightly
repo
counts {
langs
modelLangs
models
}
}
}
}