diff --git a/website/docs/api/annotation.md b/website/docs/api/annotation.md index 048af5f4c..366e15980 100644 --- a/website/docs/api/annotation.md +++ b/website/docs/api/annotation.md @@ -78,7 +78,7 @@ assigned by spaCy's [models](/models). The individual mapping is specific to the training corpus and can be defined in the respective language data's [`tag_map.py`](/usage/adding-languages#tag-map). - + spaCy also maps all language-specific part-of-speech tags to a small, fixed set of word type tags following the @@ -269,7 +269,7 @@ This section lists the syntactic dependency labels assigned by spaCy's [models](/models). The individual labels are language-specific and depend on the training corpus. - + The [Universal Dependencies scheme](http://universaldependencies.org/u/dep/) is used in all languages trained on Universal Dependency Corpora. diff --git a/website/docs/usage/101/_pipelines.md b/website/docs/usage/101/_pipelines.md index c13f7f0fe..64c2f6c98 100644 --- a/website/docs/usage/101/_pipelines.md +++ b/website/docs/usage/101/_pipelines.md @@ -33,9 +33,22 @@ list containing the component names: import Accordion from 'components/accordion.js' - + -No +In spaCy v2.x, the statistical components like the tagger or parser are +independent and don't share any data between themselves. For example, the named +entity recognizer doesn't use any features set by the tagger and parser, and so +on. This means that you can swap them, or remove single components from the +pipeline without affecting the others. + +However, custom components may depend on annotations set by other components. +For example, a custom lemmatizer may need the part-of-speech tags assigned, so +it'll only work if it's added after the tagger. The parser will respect +pre-defined sentence boundaries, so if a previous component in the pipeline sets +them, its dependency predictions may be different. Similarly, it matters if you +add the [`EntityRuler`](/api/entityruler) before or after the statistical entity +recognizer: if it's added before, the entity recognizer will take the existing +entities into account when making predictions. diff --git a/website/docs/usage/adding-languages.md b/website/docs/usage/adding-languages.md index 236df6402..100d94976 100644 --- a/website/docs/usage/adding-languages.md +++ b/website/docs/usage/adding-languages.md @@ -39,7 +39,7 @@ and morphological analysis. - + - [Language data 101](#101) - [The Language subclass](#language-subclass) diff --git a/website/docs/usage/linguistic-features.md b/website/docs/usage/linguistic-features.md index 925deff3e..db2d06e0f 100644 --- a/website/docs/usage/linguistic-features.md +++ b/website/docs/usage/linguistic-features.md @@ -298,9 +298,9 @@ different languages, see the The best way to understand spaCy's dependency parser is interactively. To make this easier, spaCy v2.0+ comes with a visualization module. You can pass a `Doc` or a list of `Doc` objects to displaCy and run -[`displacy.serve`](top-level#displacy.serve) to run the web server, or -[`displacy.render`](top-level#displacy.render) to generate the raw markup. If -you want to know how to write rules that hook into some type of syntactic +[`displacy.serve`](/api/top-level#displacy.serve) to run the web server, or +[`displacy.render`](/api/top-level#displacy.render) to generate the raw markup. +If you want to know how to write rules that hook into some type of syntactic construction, just plug the sentence into the visualizer and see how spaCy annotates it. @@ -621,7 +621,7 @@ For more details on the language-specific data, see the usage guide on - + Tokenization rules that are specific to one language, but can be **generalized across that language** should ideally live in the language data in diff --git a/website/docs/usage/models.md b/website/docs/usage/models.md index e38e3c374..420d14bfc 100644 --- a/website/docs/usage/models.md +++ b/website/docs/usage/models.md @@ -41,7 +41,7 @@ contribute to model development. > If a model is available for a language, you can download it using the > [`spacy download`](/api/cli#download) command. In order to use languages that > don't yet come with a model, you have to import them directly, or use -> [`spacy.blank`](api/top-level#spacy.blank): +> [`spacy.blank`](/api/top-level#spacy.blank): > > ```python > from spacy.lang.fi import Finnish diff --git a/website/docs/usage/processing-pipelines.md b/website/docs/usage/processing-pipelines.md index 264774b7c..0400061c1 100644 --- a/website/docs/usage/processing-pipelines.md +++ b/website/docs/usage/processing-pipelines.md @@ -46,7 +46,8 @@ components. spaCy then does the following: 3. Add each pipeline component to the pipeline in order, using [`add_pipe`](/api/language#add_pipe). 4. Make the **model data** available to the `Language` class by calling - [`from_disk`](language#from_disk) with the path to the model data directory. + [`from_disk`](/api/language#from_disk) with the path to the model data + directory. So when you call this... @@ -426,7 +427,7 @@ spaCy, and implement your own models trained with other machine learning libraries. It also lets you take advantage of spaCy's data structures and the `Doc` object as the "single source of truth". - + Writing to a `._` attribute instead of to the `Doc` directly keeps a clearer separation and makes it easier to ensure backwards compatibility. For example, @@ -437,7 +438,7 @@ immediately know what's built-in and what's custom – for example, - + Extension definitions – the defaults, methods, getters and setters you pass in to `set_extension` – are stored in class attributes on the `Underscore` class. diff --git a/website/docs/usage/rule-based-matching.md b/website/docs/usage/rule-based-matching.md index 719644a57..4f58a4d3a 100644 --- a/website/docs/usage/rule-based-matching.md +++ b/website/docs/usage/rule-based-matching.md @@ -15,7 +15,7 @@ their relationships. This means you can easily access and analyze the surrounding tokens, merge spans into single tokens or add entries to the named entities in `doc.ents`. - + For complex tasks, it's usually better to train a statistical entity recognition model. However, statistical models require training data, so for many @@ -41,7 +41,7 @@ on [rule-based entity recognition](#entityruler). - + The `PhraseMatcher` is useful if you already have a large terminology list or gazetteer consisting of single or multi-token phrases that you want to find diff --git a/website/docs/usage/spacy-101.md b/website/docs/usage/spacy-101.md index d9be018d6..badda4a0b 100644 --- a/website/docs/usage/spacy-101.md +++ b/website/docs/usage/spacy-101.md @@ -50,7 +50,7 @@ systems, or to pre-process text for **deep learning**. - + - [Features](#features) - [Linguistic annotations](#annotations) diff --git a/website/docs/usage/v2.md b/website/docs/usage/v2.md index 9a9d5f7a0..9e54106c7 100644 --- a/website/docs/usage/v2.md +++ b/website/docs/usage/v2.md @@ -39,7 +39,7 @@ also add your own custom attributes, properties and methods to the `Doc`, - + - [Summary](#summary) - [New features](#features) diff --git a/website/docs/usage/visualizers.md b/website/docs/usage/visualizers.md index 6d53fc150..31cb17a9b 100644 --- a/website/docs/usage/visualizers.md +++ b/website/docs/usage/visualizers.md @@ -75,7 +75,7 @@ arcs. | `font` | unicode | Font name or font family for all text. | `"Arial"` | For a list of all available options, see the -[`displacy` API documentation](top-level#displacy_options). +[`displacy` API documentation](/api/top-level#displacy_options). > #### Options example > diff --git a/website/package.json b/website/package.json index 916ac4d6d..f43b9a6a0 100644 --- a/website/package.json +++ b/website/package.json @@ -12,7 +12,6 @@ "@mdx-js/tag": "^0.17.5", "@phosphor/widgets": "^1.6.0", "@rehooks/online-status": "^1.0.0", - "@sindresorhus/slugify": "^0.8.0", "@svgr/webpack": "^4.1.0", "autoprefixer": "^9.4.7", "classnames": "^2.2.6", @@ -62,7 +61,8 @@ "md-attr-parser": "^1.2.1", "prettier": "^1.16.4", "raw-loader": "^1.0.0", - "unist-util-visit": "^1.4.0" + "unist-util-visit": "^1.4.0", + "@sindresorhus/slugify": "^0.8.0" }, "repository": { "type": "git", diff --git a/website/src/components/accordion.js b/website/src/components/accordion.js index 75485e698..379c0e797 100644 --- a/website/src/components/accordion.js +++ b/website/src/components/accordion.js @@ -1,33 +1,38 @@ -import React, { useState } from 'react' +import React, { useState, useEffect } from 'react' import PropTypes from 'prop-types' import classNames from 'classnames' -import slugify from '@sindresorhus/slugify' import Link from './link' import classes from '../styles/accordion.module.sass' const Accordion = ({ title, id, expanded, children }) => { - const anchorId = id || slugify(title) - const [isExpanded, setIsExpanded] = useState(expanded) + const [isExpanded, setIsExpanded] = useState(true) const contentClassNames = classNames(classes.content, { [classes.hidden]: !isExpanded, }) const iconClassNames = classNames({ [classes.hidden]: isExpanded, }) + // Make sure accordion is expanded if JS is disabled + useEffect(() => setIsExpanded(expanded), []) return ( -
+
-

+

-

+
{children}
diff --git a/website/src/components/infobox.js b/website/src/components/infobox.js index fb23ecedc..6af24a6ca 100644 --- a/website/src/components/infobox.js +++ b/website/src/components/infobox.js @@ -5,13 +5,13 @@ import classNames from 'classnames' import Icon from './icon' import classes from '../styles/infobox.module.sass' -const Infobox = ({ title, variant, className, children }) => { +const Infobox = ({ title, id, variant, className, children }) => { const infoboxClassNames = classNames(classes.root, className, { [classes.warning]: variant === 'warning', [classes.danger]: variant === 'danger', }) return ( -