mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Remove docs references to starters for now (see #6262) [ci skip]
This commit is contained in:
parent
5a6ed01ce0
commit
c655742b8b
|
@ -58,5 +58,7 @@ redirects = [
|
||||||
{from = "/universe", to = "/universe/project/:id", query = {id = ":id"}, force = true},
|
{from = "/universe", to = "/universe/project/:id", query = {id = ":id"}, force = true},
|
||||||
{from = "/universe", to = "/universe/category/:category", query = {category = ":category"}, force = true},
|
{from = "/universe", to = "/universe/category/:category", query = {category = ":category"}, force = true},
|
||||||
# Renamed universe projects
|
# Renamed universe projects
|
||||||
{from = "/universe/project/spacy-pytorch-transformers", to = "/universe/project/spacy-transformers", force = true}
|
{from = "/universe/project/spacy-pytorch-transformers", to = "/universe/project/spacy-transformers", force = true},
|
||||||
|
# Old model pages
|
||||||
|
{from = "/models/en-starters", to = "/models/en", force = true},
|
||||||
]
|
]
|
||||||
|
|
|
@ -68,8 +68,8 @@ representation consists of 300 dimensions of `0`, which means it's practically
|
||||||
nonexistent. If your application will benefit from a **large vocabulary** with
|
nonexistent. If your application will benefit from a **large vocabulary** with
|
||||||
more vectors, you should consider using one of the larger pipeline packages or
|
more vectors, you should consider using one of the larger pipeline packages or
|
||||||
loading in a full vector package, for example,
|
loading in a full vector package, for example,
|
||||||
[`en_vectors_web_lg`](/models/en-starters#en_vectors_web_lg), which includes
|
[`en_core_web_lg`](/models/en#en_core_web_lg), which includes **685k unique
|
||||||
over **1 million unique vectors**.
|
vectors**.
|
||||||
|
|
||||||
spaCy is able to compare two objects, and make a prediction of **how similar
|
spaCy is able to compare two objects, and make a prediction of **how similar
|
||||||
they are**. Predicting similarity is useful for building recommendation systems
|
they are**. Predicting similarity is useful for building recommendation systems
|
||||||
|
|
|
@ -1859,9 +1859,8 @@ pruning the vectors will be taken care of automatically if you set the `--prune`
|
||||||
flag. You can also do it manually in the following steps:
|
flag. You can also do it manually in the following steps:
|
||||||
|
|
||||||
1. Start with a **word vectors package** that covers a huge vocabulary. For
|
1. Start with a **word vectors package** that covers a huge vocabulary. For
|
||||||
instance, the [`en_vectors_web_lg`](/models/en-starters#en_vectors_web_lg)
|
instance, the [`en_core_web_lg`](/models/en#en_core_web_lg) package provides
|
||||||
starter provides 300-dimensional GloVe vectors for over 1 million terms of
|
300-dimensional GloVe vectors for 685k terms of English.
|
||||||
English.
|
|
||||||
2. If your vocabulary has values set for the `Lexeme.prob` attribute, the
|
2. If your vocabulary has values set for the `Lexeme.prob` attribute, the
|
||||||
lexemes will be sorted by descending probability to determine which vectors
|
lexemes will be sorted by descending probability to determine which vectors
|
||||||
to prune. Otherwise, lexemes will be sorted by their order in the `Vocab`.
|
to prune. Otherwise, lexemes will be sorted by their order in the `Vocab`.
|
||||||
|
@ -1869,7 +1868,7 @@ flag. You can also do it manually in the following steps:
|
||||||
vectors you want to keep.
|
vectors you want to keep.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
nlp = spacy.load('en_vectors_web_lg')
|
nlp = spacy.load("en_core_web_lg")
|
||||||
n_vectors = 105000 # number of vectors to keep
|
n_vectors = 105000 # number of vectors to keep
|
||||||
removed_words = nlp.vocab.prune_vectors(n_vectors)
|
removed_words = nlp.vocab.prune_vectors(n_vectors)
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ For more details and a behind-the-scenes look at the new release,
|
||||||
>
|
>
|
||||||
> ```bash
|
> ```bash
|
||||||
> $ python -m spacy pretrain ./raw_text.jsonl
|
> $ python -m spacy pretrain ./raw_text.jsonl
|
||||||
> en_vectors_web_lg ./pretrained-model
|
> en_core_web_lg ./pretrained-model
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
spaCy v2.1 introduces a new CLI command, `spacy pretrain`, that can make your
|
spaCy v2.1 introduces a new CLI command, `spacy pretrain`, that can make your
|
||||||
|
|
|
@ -226,8 +226,6 @@ exports.createPages = ({ graphql, actions }) => {
|
||||||
|
|
||||||
const langs = result.data.site.siteMetadata.languages
|
const langs = result.data.site.siteMetadata.languages
|
||||||
const modelLangs = langs.filter(({ models }) => models && models.length)
|
const modelLangs = langs.filter(({ models }) => models && models.length)
|
||||||
const starterLangs = langs.filter(({ starters }) => starters && starters.length)
|
|
||||||
|
|
||||||
modelLangs.forEach(({ code, name, models, example, has_examples }, i) => {
|
modelLangs.forEach(({ code, name, models, example, has_examples }, i) => {
|
||||||
const slug = `/models/${code}`
|
const slug = `/models/${code}`
|
||||||
const next = i < modelLangs.length - 1 ? modelLangs[i + 1] : null
|
const next = i < modelLangs.length - 1 ? modelLangs[i + 1] : null
|
||||||
|
@ -247,28 +245,6 @@ exports.createPages = ({ graphql, actions }) => {
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
starterLangs.forEach(({ code, name, starters }, i) => {
|
|
||||||
const slug = `/models/${code}-starters`
|
|
||||||
const next = i < starterLangs.length - 1 ? starterLangs[i + 1] : null
|
|
||||||
createPage({
|
|
||||||
path: slug,
|
|
||||||
component: DEFAULT_TEMPLATE,
|
|
||||||
context: {
|
|
||||||
id: `${code}-starters`,
|
|
||||||
slug: slug,
|
|
||||||
isIndex: false,
|
|
||||||
title: name,
|
|
||||||
section: 'models',
|
|
||||||
sectionTitle: sections.models.title,
|
|
||||||
theme: sections.models.theme,
|
|
||||||
next: next
|
|
||||||
? { title: next.name, slug: `/models/${next.code}-starters` }
|
|
||||||
: null,
|
|
||||||
meta: { models: starters, isStarters: true },
|
|
||||||
},
|
|
||||||
})
|
|
||||||
})
|
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
|
@ -52,19 +52,6 @@ const Docs = ({ pageContext, children }) => (
|
||||||
id: model,
|
id: model,
|
||||||
})),
|
})),
|
||||||
}))
|
}))
|
||||||
if (sidebar.items.length > 2) {
|
|
||||||
sidebar.items[2].items = languages
|
|
||||||
.filter(({ starters }) => starters && starters.length)
|
|
||||||
.map(lang => ({
|
|
||||||
text: lang.name,
|
|
||||||
url: `/models/${lang.code}-starters`,
|
|
||||||
isActive: id === `${lang.code}-starters`,
|
|
||||||
menu: lang.starters.map(model => ({
|
|
||||||
text: model,
|
|
||||||
id: model,
|
|
||||||
})),
|
|
||||||
}))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
const sourcePath = source ? github(source) : null
|
const sourcePath = source ? github(source) : null
|
||||||
const currentSource = getCurrentSource(slug, isIndex)
|
const currentSource = getCurrentSource(slug, isIndex)
|
||||||
|
|
|
@ -374,7 +374,7 @@ const Models = ({ pageContext, repo, children }) => {
|
||||||
const [initialized, setInitialized] = useState(false)
|
const [initialized, setInitialized] = useState(false)
|
||||||
const [compatibility, setCompatibility] = useState({})
|
const [compatibility, setCompatibility] = useState({})
|
||||||
const { id, title, meta } = pageContext
|
const { id, title, meta } = pageContext
|
||||||
const { models, isStarters } = meta
|
const { models } = meta
|
||||||
const baseUrl = `https://raw.githubusercontent.com/${repo}/master`
|
const baseUrl = `https://raw.githubusercontent.com/${repo}/master`
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
|
@ -388,26 +388,9 @@ const Models = ({ pageContext, repo, children }) => {
|
||||||
}
|
}
|
||||||
}, [initialized, baseUrl])
|
}, [initialized, baseUrl])
|
||||||
|
|
||||||
const modelTitle = title
|
|
||||||
const modelTeaser = `Available trained pipelines for ${title}`
|
|
||||||
const starterTitle = `${title} starters`
|
|
||||||
const starterTeaser = `Available transfer learning starter packs for ${title}`
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
<Title
|
<Title title={title} teaser={`Available trained pipelines for ${title}`} />
|
||||||
title={isStarters ? starterTitle : modelTitle}
|
|
||||||
teaser={isStarters ? starterTeaser : modelTeaser}
|
|
||||||
/>
|
|
||||||
{isStarters && (
|
|
||||||
<Section>
|
|
||||||
<p>
|
|
||||||
Starter packs are pretrained weights you can initialize your models with to
|
|
||||||
achieve better accuracy, like word vectors (which will be used as features
|
|
||||||
during training).
|
|
||||||
</p>
|
|
||||||
</Section>
|
|
||||||
)}
|
|
||||||
<StaticQuery
|
<StaticQuery
|
||||||
query={query}
|
query={query}
|
||||||
render={({ site }) =>
|
render={({ site }) =>
|
||||||
|
|
Loading…
Reference in New Issue
Block a user