mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Update docs [ci skip]
This commit is contained in:
parent
4e17ddf75e
commit
4fa869e6f7
|
@ -7,13 +7,14 @@ import { Help } from 'components/typography'; import Link from 'components/link'
|
|||
| Pipeline | Parser | Tagger | NER | WPS<br />CPU <Help>words per second on CPU, higher is better</Help> | WPS<br/>GPU <Help>words per second on GPU, higher is better</Help> |
|
||||
| ---------------------------------------------------------- | -----: | -----: | ---: | ------------------------------------------------------------------: | -----------------------------------------------------------------: |
|
||||
| [`en_core_web_trf`](/models/en#en_core_web_trf) (spaCy v3) | 95.5 | 98.3 | 89.7 | 1k | 8k |
|
||||
| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | 92.2 | 97.4 | 85.8 | 7k | |
|
||||
| `en_core_web_lg` (spaCy v2) | 91.9 | 97.2 | | 10k | |
|
||||
| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | 92.2 | 97.4 | 85.4 | 7k | |
|
||||
| `en_core_web_lg` (spaCy v2) | 91.9 | 97.2 | 85.7 | 10k | |
|
||||
|
||||
<figcaption class="caption">
|
||||
|
||||
**Full pipeline accuracy and speed** on the
|
||||
[OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19) corpus.
|
||||
[OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19) corpus (reported on
|
||||
the development set).
|
||||
|
||||
</figcaption>
|
||||
|
||||
|
|
|
@ -10,6 +10,18 @@ menu:
|
|||
|
||||
## Comparison {#comparison hidden="true"}
|
||||
|
||||
spaCy is a **free, open-source library** for advanced **Natural Language
|
||||
Processing** (NLP) in Python. It's designed specifically for **production use**
|
||||
and helps you build applications that process and "understand" large volumes of
|
||||
text. It can be used to build information extraction or natural language
|
||||
understanding systems.
|
||||
|
||||
### Feature overview {#comparison-features}
|
||||
|
||||
import Features from 'widgets/features.js'
|
||||
|
||||
<Features />
|
||||
|
||||
### When should I use spaCy? {#comparison-usage}
|
||||
|
||||
- ✅ **I'm a beginner and just getting started with NLP.** – spaCy makes it easy
|
||||
|
|
72
website/src/widgets/features.js
Normal file
72
website/src/widgets/features.js
Normal file
|
@ -0,0 +1,72 @@
|
|||
import React from 'react'
|
||||
import { graphql, StaticQuery } from 'gatsby'
|
||||
|
||||
import { Ul, Li } from '../components/list'
|
||||
|
||||
export default () => (
|
||||
<StaticQuery
|
||||
query={query}
|
||||
render={({ site }) => {
|
||||
const { counts } = site.siteMetadata
|
||||
return (
|
||||
<Ul>
|
||||
<Li>
|
||||
✅ Support for <strong>{counts.langs}+ languages</strong>
|
||||
</Li>
|
||||
<Li>
|
||||
✅ <strong>{counts.models} trained pipelines</strong> for{' '}
|
||||
{counts.modelLangs} languages
|
||||
</Li>
|
||||
<Li>
|
||||
✅ Multi-task learning with pretrained <strong>transformers</strong> like
|
||||
BERT
|
||||
</Li>
|
||||
<Li>
|
||||
✅ Pretrained <strong>word vectors</strong>
|
||||
</Li>
|
||||
<Li>✅ State-of-the-art speed</Li>
|
||||
<Li>
|
||||
✅ Production-ready <strong>training system</strong>
|
||||
</Li>
|
||||
<Li>
|
||||
✅ Linguistically-motivated <strong>tokenization</strong>
|
||||
</Li>
|
||||
<Li>
|
||||
✅ Components for <strong>named entity</strong> recognition, part-of-speech
|
||||
tagging, dependency parsing, sentence segmentation,{' '}
|
||||
<strong>text classification</strong>, lemmatization, morphological analysis,
|
||||
entity linking and more
|
||||
</Li>
|
||||
<Li>
|
||||
✅ Easily extensible with <strong>custom components</strong> and attributes
|
||||
</Li>
|
||||
<Li>
|
||||
✅ Support for custom models in <strong>PyTorch</strong>,{' '}
|
||||
<strong>TensorFlow</strong> and other frameworks
|
||||
</Li>
|
||||
<Li>
|
||||
✅ Built in <strong>visualizers</strong> for syntax and NER
|
||||
</Li>
|
||||
<Li>
|
||||
✅ Easy <strong>model packaging</strong>, deployment and workflow management
|
||||
</Li>
|
||||
<Li>✅ Robust, rigorously evaluated accuracy</Li>
|
||||
</Ul>
|
||||
)
|
||||
}}
|
||||
/>
|
||||
)
|
||||
|
||||
const query = graphql`
|
||||
query FeaturesQuery {
|
||||
site {
|
||||
siteMetadata {
|
||||
counts {
|
||||
langs
|
||||
modelLangs
|
||||
models
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
`
|
|
@ -14,13 +14,13 @@ import {
|
|||
LandingBanner,
|
||||
} from '../components/landing'
|
||||
import { H2 } from '../components/typography'
|
||||
import { Ul, Li } from '../components/list'
|
||||
import { InlineCode } from '../components/code'
|
||||
import Button from '../components/button'
|
||||
import Link from '../components/link'
|
||||
|
||||
import QuickstartTraining from './quickstart-training'
|
||||
import Project from './project'
|
||||
import Features from './features'
|
||||
import courseImage from '../../docs/images/course.jpg'
|
||||
import prodigyImage from '../../docs/images/prodigy_overview.jpg'
|
||||
import projectsImage from '../../docs/images/projects.png'
|
||||
|
@ -56,7 +56,7 @@ for entity in doc.ents:
|
|||
}
|
||||
|
||||
const Landing = ({ data }) => {
|
||||
const { counts, nightly } = data
|
||||
const { nightly } = data
|
||||
const codeExample = getCodeExample(nightly)
|
||||
return (
|
||||
<>
|
||||
|
@ -98,51 +98,7 @@ const Landing = ({ data }) => {
|
|||
|
||||
<LandingCol>
|
||||
<H2>Features</H2>
|
||||
<Ul>
|
||||
<Li>
|
||||
✅ Support for <strong>{counts.langs}+ languages</strong>
|
||||
</Li>
|
||||
<Li>
|
||||
✅ <strong>{counts.models} trained pipelines</strong> for{' '}
|
||||
{counts.modelLangs} languages
|
||||
</Li>
|
||||
<Li>
|
||||
✅ Multi-task learning with pretrained <strong>transformers</strong>{' '}
|
||||
like BERT
|
||||
</Li>
|
||||
<Li>
|
||||
✅ Pretrained <strong>word vectors</strong>
|
||||
</Li>
|
||||
<Li>✅ State-of-the-art speed</Li>
|
||||
<Li>
|
||||
✅ Production-ready <strong>training system</strong>
|
||||
</Li>
|
||||
<Li>
|
||||
✅ Linguistically-motivated <strong>tokenization</strong>
|
||||
</Li>
|
||||
<Li>
|
||||
✅ Components for <strong>named entity</strong> recognition,
|
||||
part-of-speech tagging, dependency parsing, sentence segmentation,{' '}
|
||||
<strong>text classification</strong>, lemmatization, morphological
|
||||
analysis, entity linking and more
|
||||
</Li>
|
||||
<Li>
|
||||
✅ Easily extensible with <strong>custom components</strong> and
|
||||
attributes
|
||||
</Li>
|
||||
<Li>
|
||||
✅ Support for custom models in <strong>PyTorch</strong>,{' '}
|
||||
<strong>TensorFlow</strong> and other frameworks
|
||||
</Li>
|
||||
<Li>
|
||||
✅ Built in <strong>visualizers</strong> for syntax and NER
|
||||
</Li>
|
||||
<Li>
|
||||
✅ Easy <strong>model packaging</strong>, deployment and workflow
|
||||
management
|
||||
</Li>
|
||||
<Li>✅ Robust, rigorously evaluated accuracy</Li>
|
||||
</Ul>
|
||||
<Features />
|
||||
</LandingCol>
|
||||
</LandingGrid>
|
||||
|
||||
|
@ -333,11 +289,6 @@ const landingQuery = graphql`
|
|||
siteMetadata {
|
||||
nightly
|
||||
repo
|
||||
counts {
|
||||
langs
|
||||
modelLangs
|
||||
models
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user