mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Update docs [ci skip]
This commit is contained in:
parent
8b4cc29dbd
commit
5ebd1fc2cf
|
@ -6,32 +6,18 @@ menu:
|
||||||
- ['Conventions', 'conventions']
|
- ['Conventions', 'conventions']
|
||||||
---
|
---
|
||||||
|
|
||||||
<!-- Update page, refer to new /api/architectures and training docs -->
|
<!-- TODO: include interactive demo -->
|
||||||
|
|
||||||
This directory includes two types of packages:
|
|
||||||
|
|
||||||
1. **Trained pipelines:** General-purpose spaCy pipelines to predict named
|
|
||||||
entities, part-of-speech tags and syntactic dependencies. Can be used
|
|
||||||
out-of-the-box and fine-tuned on more specific data.
|
|
||||||
2. **Starters:** Transfer learning starter packs with pretrained weights you can
|
|
||||||
initialize your pipeline models with to achieve better accuracy. They can
|
|
||||||
include word vectors (which will be used as features during training) or
|
|
||||||
other pretrained representations like BERT. These packages don't include
|
|
||||||
components for specific tasks like NER or text classification and are
|
|
||||||
intended to be used as base models when training your own models.
|
|
||||||
|
|
||||||
### Quickstart {hidden="true"}
|
### Quickstart {hidden="true"}
|
||||||
|
|
||||||
|
> #### 📖 Installation and usage
|
||||||
|
>
|
||||||
|
> For more details on how to use trained pipelines with spaCy, see the
|
||||||
|
> [usage guide](/usage/models).
|
||||||
|
|
||||||
import QuickstartModels from 'widgets/quickstart-models.js'
|
import QuickstartModels from 'widgets/quickstart-models.js'
|
||||||
|
|
||||||
<QuickstartModels title="Quickstart" id="quickstart" description="Install a default model, get the code to load it from within spaCy and test it." />
|
<QuickstartModels id="quickstart" />
|
||||||
|
|
||||||
<Infobox title="Installation and usage" emoji="📖">
|
|
||||||
|
|
||||||
For more details on how to use trained pipelines with spaCy, see the
|
|
||||||
[usage guide](/usage/models).
|
|
||||||
|
|
||||||
</Infobox>
|
|
||||||
|
|
||||||
## Package naming conventions {#conventions}
|
## Package naming conventions {#conventions}
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,13 @@
|
||||||
import { Help } from 'components/typography'; import Link from 'components/link'
|
import { Help } from 'components/typography'; import Link from 'components/link'
|
||||||
|
|
||||||
<!-- TODO: update numbers -->
|
<!-- TODO: update numbers, add note on previous NER evaluation issues -->
|
||||||
|
|
||||||
<figure>
|
<figure>
|
||||||
|
|
||||||
| Pipeline | Parser | Tagger | NER | WPS<br />CPU <Help>words per second on CPU, higher is better</Help> | WPS<br/>GPU <Help>words per second on GPU, higher is better</Help> |
|
| Pipeline | Parser | Tagger | NER | WPS<br />CPU <Help>words per second on CPU, higher is better</Help> | WPS<br/>GPU <Help>words per second on GPU, higher is better</Help> |
|
||||||
| ---------------------------------------------------------- | -----: | -----: | ---: | ------------------------------------------------------------------: | -----------------------------------------------------------------: |
|
| ---------------------------------------------------------- | -----: | -----: | ---: | ------------------------------------------------------------------: | -----------------------------------------------------------------: |
|
||||||
| [`en_core_web_trf`](/models/en#en_core_web_trf) (spaCy v3) | | | | | 6k |
|
| [`en_core_web_trf`](/models/en#en_core_web_trf) (spaCy v3) | | | | | 6k |
|
||||||
| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | 92.1 | 97.4 | 87.0 | 7k | |
|
| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | 92.2 | 97.4 | 85.8 | 7k | |
|
||||||
| `en_core_web_lg` (spaCy v2) | 91.9 | 97.2 | 85.9 | 10k | |
|
| `en_core_web_lg` (spaCy v2) | 91.9 | 97.2 | 85.9 | 10k | |
|
||||||
|
|
||||||
<figcaption class="caption">
|
<figcaption class="caption">
|
||||||
|
|
|
@ -970,8 +970,8 @@ import spacy
|
||||||
from spacy.tokenizer import Tokenizer
|
from spacy.tokenizer import Tokenizer
|
||||||
|
|
||||||
special_cases = {":)": [{"ORTH": ":)"}]}
|
special_cases = {":)": [{"ORTH": ":)"}]}
|
||||||
prefix_re = re.compile(r'''^[\[\("']''')
|
prefix_re = re.compile(r'''^[\\[\\("']''')
|
||||||
suffix_re = re.compile(r'''[\]\)"']$''')
|
suffix_re = re.compile(r'''[\\]\\)"']$''')
|
||||||
infix_re = re.compile(r'''[-~]''')
|
infix_re = re.compile(r'''[-~]''')
|
||||||
simple_url_re = re.compile(r'''^https?://''')
|
simple_url_re = re.compile(r'''^https?://''')
|
||||||
|
|
||||||
|
@ -1592,7 +1592,9 @@ print("After:", [(token.text, token._.is_musician) for token in doc])
|
||||||
A [`Doc`](/api/doc) object's sentences are available via the `Doc.sents`
|
A [`Doc`](/api/doc) object's sentences are available via the `Doc.sents`
|
||||||
property. To view a `Doc`'s sentences, you can iterate over the `Doc.sents`, a
|
property. To view a `Doc`'s sentences, you can iterate over the `Doc.sents`, a
|
||||||
generator that yields [`Span`](/api/span) objects. You can check whether a `Doc`
|
generator that yields [`Span`](/api/span) objects. You can check whether a `Doc`
|
||||||
has sentence boundaries with the `doc.is_sentenced` attribute.
|
has sentence boundaries by calling
|
||||||
|
[`Doc.has_annotation`](/api/doc#has_annotation) with the attribute name
|
||||||
|
`"SENT_START"`.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
### {executable="true"}
|
### {executable="true"}
|
||||||
|
@ -1600,7 +1602,7 @@ import spacy
|
||||||
|
|
||||||
nlp = spacy.load("en_core_web_sm")
|
nlp = spacy.load("en_core_web_sm")
|
||||||
doc = nlp("This is a sentence. This is another sentence.")
|
doc = nlp("This is a sentence. This is another sentence.")
|
||||||
assert doc.is_sentenced
|
assert doc.has_annotation("SENT_START")
|
||||||
for sent in doc.sents:
|
for sent in doc.sents:
|
||||||
print(sent.text)
|
print(sent.text)
|
||||||
```
|
```
|
||||||
|
|
|
@ -403,8 +403,8 @@ const Models = ({ pageContext, repo, children }) => {
|
||||||
<Section>
|
<Section>
|
||||||
<p>
|
<p>
|
||||||
Starter packs are pretrained weights you can initialize your models with to
|
Starter packs are pretrained weights you can initialize your models with to
|
||||||
achieve better accuracy. They can include word vectors (which will be used
|
achieve better accuracy, like word vectors (which will be used as features
|
||||||
as features during training) or other pretrained representations like BERT.
|
during training).
|
||||||
</p>
|
</p>
|
||||||
</Section>
|
</Section>
|
||||||
)}
|
)}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user