diff --git a/netlify.toml b/netlify.toml index 3c17b876c..e860e4bf4 100644 --- a/netlify.toml +++ b/netlify.toml @@ -58,5 +58,7 @@ redirects = [ {from = "/universe", to = "/universe/project/:id", query = {id = ":id"}, force = true}, {from = "/universe", to = "/universe/category/:category", query = {category = ":category"}, force = true}, # Renamed universe projects - {from = "/universe/project/spacy-pytorch-transformers", to = "/universe/project/spacy-transformers", force = true} + {from = "/universe/project/spacy-pytorch-transformers", to = "/universe/project/spacy-transformers", force = true}, + # Old model pages + {from = "/models/en-starters", to = "/models/en", force = true}, ] diff --git a/website/docs/usage/101/_vectors-similarity.md b/website/docs/usage/101/_vectors-similarity.md index cf5b70af2..2a8733f41 100644 --- a/website/docs/usage/101/_vectors-similarity.md +++ b/website/docs/usage/101/_vectors-similarity.md @@ -68,8 +68,8 @@ representation consists of 300 dimensions of `0`, which means it's practically nonexistent. If your application will benefit from a **large vocabulary** with more vectors, you should consider using one of the larger pipeline packages or loading in a full vector package, for example, -[`en_vectors_web_lg`](/models/en-starters#en_vectors_web_lg), which includes -over **1 million unique vectors**. +[`en_core_web_lg`](/models/en#en_core_web_lg), which includes **685k unique +vectors**. spaCy is able to compare two objects, and make a prediction of **how similar they are**. Predicting similarity is useful for building recommendation systems diff --git a/website/docs/usage/linguistic-features.md b/website/docs/usage/linguistic-features.md index 4077cf293..af07a438f 100644 --- a/website/docs/usage/linguistic-features.md +++ b/website/docs/usage/linguistic-features.md @@ -1859,9 +1859,8 @@ pruning the vectors will be taken care of automatically if you set the `--prune` flag. You can also do it manually in the following steps: 1. Start with a **word vectors package** that covers a huge vocabulary. For - instance, the [`en_vectors_web_lg`](/models/en-starters#en_vectors_web_lg) - starter provides 300-dimensional GloVe vectors for over 1 million terms of - English. + instance, the [`en_core_web_lg`](/models/en#en_core_web_lg) package provides + 300-dimensional GloVe vectors for 685k terms of English. 2. If your vocabulary has values set for the `Lexeme.prob` attribute, the lexemes will be sorted by descending probability to determine which vectors to prune. Otherwise, lexemes will be sorted by their order in the `Vocab`. @@ -1869,7 +1868,7 @@ flag. You can also do it manually in the following steps: vectors you want to keep. ```python -nlp = spacy.load('en_vectors_web_lg') +nlp = spacy.load("en_core_web_lg") n_vectors = 105000 # number of vectors to keep removed_words = nlp.vocab.prune_vectors(n_vectors) diff --git a/website/docs/usage/v2-1.md b/website/docs/usage/v2-1.md index 4a8ef5a37..8d310f1a4 100644 --- a/website/docs/usage/v2-1.md +++ b/website/docs/usage/v2-1.md @@ -22,7 +22,7 @@ For more details and a behind-the-scenes look at the new release, > > ```bash > $ python -m spacy pretrain ./raw_text.jsonl -> en_vectors_web_lg ./pretrained-model +> en_core_web_lg ./pretrained-model > ``` spaCy v2.1 introduces a new CLI command, `spacy pretrain`, that can make your diff --git a/website/gatsby-node.js b/website/gatsby-node.js index 56a65aeae..b5d8c22c3 100644 --- a/website/gatsby-node.js +++ b/website/gatsby-node.js @@ -226,8 +226,6 @@ exports.createPages = ({ graphql, actions }) => { const langs = result.data.site.siteMetadata.languages const modelLangs = langs.filter(({ models }) => models && models.length) - const starterLangs = langs.filter(({ starters }) => starters && starters.length) - modelLangs.forEach(({ code, name, models, example, has_examples }, i) => { const slug = `/models/${code}` const next = i < modelLangs.length - 1 ? modelLangs[i + 1] : null @@ -247,28 +245,6 @@ exports.createPages = ({ graphql, actions }) => { }, }) }) - - starterLangs.forEach(({ code, name, starters }, i) => { - const slug = `/models/${code}-starters` - const next = i < starterLangs.length - 1 ? starterLangs[i + 1] : null - createPage({ - path: slug, - component: DEFAULT_TEMPLATE, - context: { - id: `${code}-starters`, - slug: slug, - isIndex: false, - title: name, - section: 'models', - sectionTitle: sections.models.title, - theme: sections.models.theme, - next: next - ? { title: next.name, slug: `/models/${next.code}-starters` } - : null, - meta: { models: starters, isStarters: true }, - }, - }) - }) }) ) }) diff --git a/website/src/templates/docs.js b/website/src/templates/docs.js index 7bb62fd21..8343a16a8 100644 --- a/website/src/templates/docs.js +++ b/website/src/templates/docs.js @@ -52,19 +52,6 @@ const Docs = ({ pageContext, children }) => ( id: model, })), })) - if (sidebar.items.length > 2) { - sidebar.items[2].items = languages - .filter(({ starters }) => starters && starters.length) - .map(lang => ({ - text: lang.name, - url: `/models/${lang.code}-starters`, - isActive: id === `${lang.code}-starters`, - menu: lang.starters.map(model => ({ - text: model, - id: model, - })), - })) - } } const sourcePath = source ? github(source) : null const currentSource = getCurrentSource(slug, isIndex) diff --git a/website/src/templates/models.js b/website/src/templates/models.js index 9c6f595da..b9658dacd 100644 --- a/website/src/templates/models.js +++ b/website/src/templates/models.js @@ -374,7 +374,7 @@ const Models = ({ pageContext, repo, children }) => { const [initialized, setInitialized] = useState(false) const [compatibility, setCompatibility] = useState({}) const { id, title, meta } = pageContext - const { models, isStarters } = meta + const { models } = meta const baseUrl = `https://raw.githubusercontent.com/${repo}/master` useEffect(() => { @@ -388,26 +388,9 @@ const Models = ({ pageContext, repo, children }) => { } }, [initialized, baseUrl]) - const modelTitle = title - const modelTeaser = `Available trained pipelines for ${title}` - const starterTitle = `${title} starters` - const starterTeaser = `Available transfer learning starter packs for ${title}` - return ( <> - - {isStarters && ( - <Section> - <p> - Starter packs are pretrained weights you can initialize your models with to - achieve better accuracy, like word vectors (which will be used as features - during training). - </p> - </Section> - )} + <Title title={title} teaser={`Available trained pipelines for ${title}`} /> <StaticQuery query={query} render={({ site }) =>