mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Update docs [ci skip]
This commit is contained in:
parent
8b4cc29dbd
commit
5ebd1fc2cf
|
@ -6,32 +6,18 @@ menu:
|
|||
- ['Conventions', 'conventions']
|
||||
---
|
||||
|
||||
<!-- Update page, refer to new /api/architectures and training docs -->
|
||||
|
||||
This directory includes two types of packages:
|
||||
|
||||
1. **Trained pipelines:** General-purpose spaCy pipelines to predict named
|
||||
entities, part-of-speech tags and syntactic dependencies. Can be used
|
||||
out-of-the-box and fine-tuned on more specific data.
|
||||
2. **Starters:** Transfer learning starter packs with pretrained weights you can
|
||||
initialize your pipeline models with to achieve better accuracy. They can
|
||||
include word vectors (which will be used as features during training) or
|
||||
other pretrained representations like BERT. These packages don't include
|
||||
components for specific tasks like NER or text classification and are
|
||||
intended to be used as base models when training your own models.
|
||||
<!-- TODO: include interactive demo -->
|
||||
|
||||
### Quickstart {hidden="true"}
|
||||
|
||||
> #### 📖 Installation and usage
|
||||
>
|
||||
> For more details on how to use trained pipelines with spaCy, see the
|
||||
> [usage guide](/usage/models).
|
||||
|
||||
import QuickstartModels from 'widgets/quickstart-models.js'
|
||||
|
||||
<QuickstartModels title="Quickstart" id="quickstart" description="Install a default model, get the code to load it from within spaCy and test it." />
|
||||
|
||||
<Infobox title="Installation and usage" emoji="📖">
|
||||
|
||||
For more details on how to use trained pipelines with spaCy, see the
|
||||
[usage guide](/usage/models).
|
||||
|
||||
</Infobox>
|
||||
<QuickstartModels id="quickstart" />
|
||||
|
||||
## Package naming conventions {#conventions}
|
||||
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
import { Help } from 'components/typography'; import Link from 'components/link'
|
||||
|
||||
<!-- TODO: update numbers -->
|
||||
<!-- TODO: update numbers, add note on previous NER evaluation issues -->
|
||||
|
||||
<figure>
|
||||
|
||||
| Pipeline | Parser | Tagger | NER | WPS<br />CPU <Help>words per second on CPU, higher is better</Help> | WPS<br/>GPU <Help>words per second on GPU, higher is better</Help> |
|
||||
| ---------------------------------------------------------- | -----: | -----: | ---: | ------------------------------------------------------------------: | -----------------------------------------------------------------: |
|
||||
| [`en_core_web_trf`](/models/en#en_core_web_trf) (spaCy v3) | | | | | 6k |
|
||||
| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | 92.1 | 97.4 | 87.0 | 7k | |
|
||||
| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | 92.2 | 97.4 | 85.8 | 7k | |
|
||||
| `en_core_web_lg` (spaCy v2) | 91.9 | 97.2 | 85.9 | 10k | |
|
||||
|
||||
<figcaption class="caption">
|
||||
|
|
|
@ -970,8 +970,8 @@ import spacy
|
|||
from spacy.tokenizer import Tokenizer
|
||||
|
||||
special_cases = {":)": [{"ORTH": ":)"}]}
|
||||
prefix_re = re.compile(r'''^[\[\("']''')
|
||||
suffix_re = re.compile(r'''[\]\)"']$''')
|
||||
prefix_re = re.compile(r'''^[\\[\\("']''')
|
||||
suffix_re = re.compile(r'''[\\]\\)"']$''')
|
||||
infix_re = re.compile(r'''[-~]''')
|
||||
simple_url_re = re.compile(r'''^https?://''')
|
||||
|
||||
|
@ -1592,7 +1592,9 @@ print("After:", [(token.text, token._.is_musician) for token in doc])
|
|||
A [`Doc`](/api/doc) object's sentences are available via the `Doc.sents`
|
||||
property. To view a `Doc`'s sentences, you can iterate over the `Doc.sents`, a
|
||||
generator that yields [`Span`](/api/span) objects. You can check whether a `Doc`
|
||||
has sentence boundaries with the `doc.is_sentenced` attribute.
|
||||
has sentence boundaries by calling
|
||||
[`Doc.has_annotation`](/api/doc#has_annotation) with the attribute name
|
||||
`"SENT_START"`.
|
||||
|
||||
```python
|
||||
### {executable="true"}
|
||||
|
@ -1600,7 +1602,7 @@ import spacy
|
|||
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
doc = nlp("This is a sentence. This is another sentence.")
|
||||
assert doc.is_sentenced
|
||||
assert doc.has_annotation("SENT_START")
|
||||
for sent in doc.sents:
|
||||
print(sent.text)
|
||||
```
|
||||
|
|
|
@ -403,8 +403,8 @@ const Models = ({ pageContext, repo, children }) => {
|
|||
<Section>
|
||||
<p>
|
||||
Starter packs are pretrained weights you can initialize your models with to
|
||||
achieve better accuracy. They can include word vectors (which will be used
|
||||
as features during training) or other pretrained representations like BERT.
|
||||
achieve better accuracy, like word vectors (which will be used as features
|
||||
during training).
|
||||
</p>
|
||||
</Section>
|
||||
)}
|
||||
|
|
Loading…
Reference in New Issue
Block a user