mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 02:36:32 +03:00
Update docs [ci skip]
This commit is contained in:
parent
4e17ddf75e
commit
4fa869e6f7
|
@ -7,13 +7,14 @@ import { Help } from 'components/typography'; import Link from 'components/link'
|
||||||
| Pipeline | Parser | Tagger | NER | WPS<br />CPU <Help>words per second on CPU, higher is better</Help> | WPS<br/>GPU <Help>words per second on GPU, higher is better</Help> |
|
| Pipeline | Parser | Tagger | NER | WPS<br />CPU <Help>words per second on CPU, higher is better</Help> | WPS<br/>GPU <Help>words per second on GPU, higher is better</Help> |
|
||||||
| ---------------------------------------------------------- | -----: | -----: | ---: | ------------------------------------------------------------------: | -----------------------------------------------------------------: |
|
| ---------------------------------------------------------- | -----: | -----: | ---: | ------------------------------------------------------------------: | -----------------------------------------------------------------: |
|
||||||
| [`en_core_web_trf`](/models/en#en_core_web_trf) (spaCy v3) | 95.5 | 98.3 | 89.7 | 1k | 8k |
|
| [`en_core_web_trf`](/models/en#en_core_web_trf) (spaCy v3) | 95.5 | 98.3 | 89.7 | 1k | 8k |
|
||||||
| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | 92.2 | 97.4 | 85.8 | 7k | |
|
| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | 92.2 | 97.4 | 85.4 | 7k | |
|
||||||
| `en_core_web_lg` (spaCy v2) | 91.9 | 97.2 | | 10k | |
|
| `en_core_web_lg` (spaCy v2) | 91.9 | 97.2 | 85.7 | 10k | |
|
||||||
|
|
||||||
<figcaption class="caption">
|
<figcaption class="caption">
|
||||||
|
|
||||||
**Full pipeline accuracy and speed** on the
|
**Full pipeline accuracy and speed** on the
|
||||||
[OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19) corpus.
|
[OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19) corpus (reported on
|
||||||
|
the development set).
|
||||||
|
|
||||||
</figcaption>
|
</figcaption>
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,18 @@ menu:
|
||||||
|
|
||||||
## Comparison {#comparison hidden="true"}
|
## Comparison {#comparison hidden="true"}
|
||||||
|
|
||||||
|
spaCy is a **free, open-source library** for advanced **Natural Language
|
||||||
|
Processing** (NLP) in Python. It's designed specifically for **production use**
|
||||||
|
and helps you build applications that process and "understand" large volumes of
|
||||||
|
text. It can be used to build information extraction or natural language
|
||||||
|
understanding systems.
|
||||||
|
|
||||||
|
### Feature overview {#comparison-features}
|
||||||
|
|
||||||
|
import Features from 'widgets/features.js'
|
||||||
|
|
||||||
|
<Features />
|
||||||
|
|
||||||
### When should I use spaCy? {#comparison-usage}
|
### When should I use spaCy? {#comparison-usage}
|
||||||
|
|
||||||
- ✅ **I'm a beginner and just getting started with NLP.** – spaCy makes it easy
|
- ✅ **I'm a beginner and just getting started with NLP.** – spaCy makes it easy
|
||||||
|
|
72
website/src/widgets/features.js
Normal file
72
website/src/widgets/features.js
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
import React from 'react'
|
||||||
|
import { graphql, StaticQuery } from 'gatsby'
|
||||||
|
|
||||||
|
import { Ul, Li } from '../components/list'
|
||||||
|
|
||||||
|
export default () => (
|
||||||
|
<StaticQuery
|
||||||
|
query={query}
|
||||||
|
render={({ site }) => {
|
||||||
|
const { counts } = site.siteMetadata
|
||||||
|
return (
|
||||||
|
<Ul>
|
||||||
|
<Li>
|
||||||
|
✅ Support for <strong>{counts.langs}+ languages</strong>
|
||||||
|
</Li>
|
||||||
|
<Li>
|
||||||
|
✅ <strong>{counts.models} trained pipelines</strong> for{' '}
|
||||||
|
{counts.modelLangs} languages
|
||||||
|
</Li>
|
||||||
|
<Li>
|
||||||
|
✅ Multi-task learning with pretrained <strong>transformers</strong> like
|
||||||
|
BERT
|
||||||
|
</Li>
|
||||||
|
<Li>
|
||||||
|
✅ Pretrained <strong>word vectors</strong>
|
||||||
|
</Li>
|
||||||
|
<Li>✅ State-of-the-art speed</Li>
|
||||||
|
<Li>
|
||||||
|
✅ Production-ready <strong>training system</strong>
|
||||||
|
</Li>
|
||||||
|
<Li>
|
||||||
|
✅ Linguistically-motivated <strong>tokenization</strong>
|
||||||
|
</Li>
|
||||||
|
<Li>
|
||||||
|
✅ Components for <strong>named entity</strong> recognition, part-of-speech
|
||||||
|
tagging, dependency parsing, sentence segmentation,{' '}
|
||||||
|
<strong>text classification</strong>, lemmatization, morphological analysis,
|
||||||
|
entity linking and more
|
||||||
|
</Li>
|
||||||
|
<Li>
|
||||||
|
✅ Easily extensible with <strong>custom components</strong> and attributes
|
||||||
|
</Li>
|
||||||
|
<Li>
|
||||||
|
✅ Support for custom models in <strong>PyTorch</strong>,{' '}
|
||||||
|
<strong>TensorFlow</strong> and other frameworks
|
||||||
|
</Li>
|
||||||
|
<Li>
|
||||||
|
✅ Built in <strong>visualizers</strong> for syntax and NER
|
||||||
|
</Li>
|
||||||
|
<Li>
|
||||||
|
✅ Easy <strong>model packaging</strong>, deployment and workflow management
|
||||||
|
</Li>
|
||||||
|
<Li>✅ Robust, rigorously evaluated accuracy</Li>
|
||||||
|
</Ul>
|
||||||
|
)
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
|
||||||
|
const query = graphql`
|
||||||
|
query FeaturesQuery {
|
||||||
|
site {
|
||||||
|
siteMetadata {
|
||||||
|
counts {
|
||||||
|
langs
|
||||||
|
modelLangs
|
||||||
|
models
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
`
|
|
@ -14,13 +14,13 @@ import {
|
||||||
LandingBanner,
|
LandingBanner,
|
||||||
} from '../components/landing'
|
} from '../components/landing'
|
||||||
import { H2 } from '../components/typography'
|
import { H2 } from '../components/typography'
|
||||||
import { Ul, Li } from '../components/list'
|
|
||||||
import { InlineCode } from '../components/code'
|
import { InlineCode } from '../components/code'
|
||||||
import Button from '../components/button'
|
import Button from '../components/button'
|
||||||
import Link from '../components/link'
|
import Link from '../components/link'
|
||||||
|
|
||||||
import QuickstartTraining from './quickstart-training'
|
import QuickstartTraining from './quickstart-training'
|
||||||
import Project from './project'
|
import Project from './project'
|
||||||
|
import Features from './features'
|
||||||
import courseImage from '../../docs/images/course.jpg'
|
import courseImage from '../../docs/images/course.jpg'
|
||||||
import prodigyImage from '../../docs/images/prodigy_overview.jpg'
|
import prodigyImage from '../../docs/images/prodigy_overview.jpg'
|
||||||
import projectsImage from '../../docs/images/projects.png'
|
import projectsImage from '../../docs/images/projects.png'
|
||||||
|
@ -56,7 +56,7 @@ for entity in doc.ents:
|
||||||
}
|
}
|
||||||
|
|
||||||
const Landing = ({ data }) => {
|
const Landing = ({ data }) => {
|
||||||
const { counts, nightly } = data
|
const { nightly } = data
|
||||||
const codeExample = getCodeExample(nightly)
|
const codeExample = getCodeExample(nightly)
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
|
@ -98,51 +98,7 @@ const Landing = ({ data }) => {
|
||||||
|
|
||||||
<LandingCol>
|
<LandingCol>
|
||||||
<H2>Features</H2>
|
<H2>Features</H2>
|
||||||
<Ul>
|
<Features />
|
||||||
<Li>
|
|
||||||
✅ Support for <strong>{counts.langs}+ languages</strong>
|
|
||||||
</Li>
|
|
||||||
<Li>
|
|
||||||
✅ <strong>{counts.models} trained pipelines</strong> for{' '}
|
|
||||||
{counts.modelLangs} languages
|
|
||||||
</Li>
|
|
||||||
<Li>
|
|
||||||
✅ Multi-task learning with pretrained <strong>transformers</strong>{' '}
|
|
||||||
like BERT
|
|
||||||
</Li>
|
|
||||||
<Li>
|
|
||||||
✅ Pretrained <strong>word vectors</strong>
|
|
||||||
</Li>
|
|
||||||
<Li>✅ State-of-the-art speed</Li>
|
|
||||||
<Li>
|
|
||||||
✅ Production-ready <strong>training system</strong>
|
|
||||||
</Li>
|
|
||||||
<Li>
|
|
||||||
✅ Linguistically-motivated <strong>tokenization</strong>
|
|
||||||
</Li>
|
|
||||||
<Li>
|
|
||||||
✅ Components for <strong>named entity</strong> recognition,
|
|
||||||
part-of-speech tagging, dependency parsing, sentence segmentation,{' '}
|
|
||||||
<strong>text classification</strong>, lemmatization, morphological
|
|
||||||
analysis, entity linking and more
|
|
||||||
</Li>
|
|
||||||
<Li>
|
|
||||||
✅ Easily extensible with <strong>custom components</strong> and
|
|
||||||
attributes
|
|
||||||
</Li>
|
|
||||||
<Li>
|
|
||||||
✅ Support for custom models in <strong>PyTorch</strong>,{' '}
|
|
||||||
<strong>TensorFlow</strong> and other frameworks
|
|
||||||
</Li>
|
|
||||||
<Li>
|
|
||||||
✅ Built in <strong>visualizers</strong> for syntax and NER
|
|
||||||
</Li>
|
|
||||||
<Li>
|
|
||||||
✅ Easy <strong>model packaging</strong>, deployment and workflow
|
|
||||||
management
|
|
||||||
</Li>
|
|
||||||
<Li>✅ Robust, rigorously evaluated accuracy</Li>
|
|
||||||
</Ul>
|
|
||||||
</LandingCol>
|
</LandingCol>
|
||||||
</LandingGrid>
|
</LandingGrid>
|
||||||
|
|
||||||
|
@ -333,11 +289,6 @@ const landingQuery = graphql`
|
||||||
siteMetadata {
|
siteMetadata {
|
||||||
nightly
|
nightly
|
||||||
repo
|
repo
|
||||||
counts {
|
|
||||||
langs
|
|
||||||
modelLangs
|
|
||||||
models
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user