diff --git a/website/UNIVERSE.md b/website/UNIVERSE.md
index 770bbde13..60d439208 100644
--- a/website/UNIVERSE.md
+++ b/website/UNIVERSE.md
@@ -2,93 +2,103 @@
# spaCy Universe
-The [spaCy Universe](https://spacy.io/universe) collects the many great resources developed with or for spaCy. It
-includes standalone packages, plugins, extensions, educational materials,
-operational utilities and bindings for other languages.
+The [spaCy Universe](https://spacy.io/universe) collects the many great
+resources developed with or for spaCy. It includes standalone packages, plugins,
+extensions, educational materials, operational utilities and bindings for other
+languages.
If you have a project that you want the spaCy community to make use of, you can
suggest it by submitting a pull request to this repository. The Universe
database is open-source and collected in a simple JSON file.
Looking for inspiration for your own spaCy plugin or extension? Check out the
-[`project ideas`](https://github.com/explosion/spaCy/discussions?discussions_q=category%3A%22New+Features+%26+Project+Ideas%22)
+[`project ideas`](https://github.com/explosion/spaCy/discussions?discussions_q=category%3A%22New+Features+%26+Project+Ideas%22)
discussion forum.
## Checklist
### Projects
-✅ Libraries and packages should be **open-source** (with a user-friendly license) and at least somewhat **documented** (e.g. a simple `README` with usage instructions).
+✅ Libraries and packages should be **open-source** (with a user-friendly
+license) and at least somewhat **documented** (e.g. a simple `README` with usage
+instructions).
-✅ We're happy to include work in progress and prereleases, but we'd like to keep the emphasis on projects that should be useful to the community **right away**.
+✅ We're happy to include work in progress and prereleases, but we'd like to
+keep the emphasis on projects that should be useful to the community **right
+away**.
✅ Demos and visualizers should be available via a **public URL**.
### Educational Materials
-✅ Books should be **available for purchase or download** (not just pre-order). Ebooks and self-published books are fine, too, if they include enough substantial content.
+✅ Books should be **available for purchase or download** (not just pre-order).
+Ebooks and self-published books are fine, too, if they include enough
+substantial content.
-✅ The `"url"` of book entries should either point to the publisher's website or a reseller of your choice (ideally one that ships worldwide or as close as possible).
+✅ The `"url"` of book entries should either point to the publisher's website or
+a reseller of your choice (ideally one that ships worldwide or as close as
+possible).
-✅ If an online course is only available behind a paywall, it should at least have a **free excerpt** or chapter available, so users know what to expect.
+✅ If an online course is only available behind a paywall, it should at least
+have a **free excerpt** or chapter available, so users know what to expect.
## JSON format
-To add a project, fork this repository, edit the [`universe.json`](meta/universe.json)
-and add an object of the following format to the list of `"resources"`. Before
-you submit your pull request, make sure to use a linter to verify that your
-markup is correct.
+To add a project, fork this repository, edit the
+[`universe.json`](meta/universe.json) and add an object of the following format
+to the list of `"resources"`. Before you submit your pull request, make sure to
+use a linter to verify that your markup is correct.
```json
{
- "id": "unique-project-id",
- "title": "Project title",
- "slogan": "A short summary",
- "description": "A longer description – *Markdown allowed!*",
- "github": "user/repo",
- "pip": "package-name",
- "code_example": [
- "import spacy",
- "import package_name",
- "",
- "nlp = spacy.load('en')",
- "nlp.add_pipe(package_name)"
- ],
- "code_language": "python",
- "url": "https://example.com",
- "thumb": "https://example.com/thumb.jpg",
- "image": "https://example.com/image.jpg",
- "author": "Your Name",
- "author_links": {
- "twitter": "username",
- "github": "username",
- "website": "https://example.com"
- },
- "category": ["pipeline", "standalone"],
- "tags": ["some-tag", "etc"]
+ "id": "unique-project-id",
+ "title": "Project title",
+ "slogan": "A short summary",
+ "description": "A longer description – *Markdown allowed!*",
+ "github": "user/repo",
+ "pip": "package-name",
+ "code_example": [
+ "import spacy",
+ "import package_name",
+ "",
+ "nlp = spacy.load('en')",
+ "nlp.add_pipe(package_name)"
+ ],
+ "code_language": "python",
+ "url": "https://example.com",
+ "thumb": "https://example.com/thumb.jpg",
+ "image": "https://example.com/image.jpg",
+ "author": "Your Name",
+ "author_links": {
+ "twitter": "username",
+ "github": "username",
+ "website": "https://example.com"
+ },
+ "category": ["pipeline", "standalone"],
+ "tags": ["some-tag", "etc"]
}
```
-| Field | Type | Description |
-| --- | --- | --- |
-| `id` | string | Unique ID of the project. |
-| `title` | string | Project title. If not set, the `id` will be used as the display title. |
-| `slogan` | string | A short description of the project. Displayed in the overview and under the title. |
-| `description` | string | A longer description of the project. Markdown is allowed, but should be limited to basic formatting like bold, italics, code or links. |
-| `github` | string | Associated GitHub repo in the format `user/repo`. Will be displayed as a link and used for release, license and star badges. |
-| `pip` | string | Package name on pip. If available, the installation command will be displayed. |
-| `cran` | string | For R packages: package name on CRAN. If available, the installation command will be displayed. |
-| `code_example` | array | Short example that shows how to use the project. Formatted as an array with one string per line. |
-| `code_language` | string | Defaults to `'python'`. Optional code language used for syntax highlighting with [Prism](http://prismjs.com/). |
-| `url` | string | Optional project link to display as button. |
-| `thumb` | string | Optional URL to project thumbnail to display in overview and project header. Recommended size is 100x100px. |
-| `image` | string | Optional URL to project image to display with description. |
-| `author` | string | Name(s) of project author(s). |
-| `author_links` | object | Usernames and links to display as icons to author info. Currently supports `twitter` and `github` usernames, as well as `website` link. |
-| `category` | list | One or more categories to assign to project. Must be one of the available options. |
-| `tags` | list | Still experimental and not used for filtering: one or more tags to assign to project. |
+| Field | Type | Description |
+| --------------- | ------ | --------------------------------------------------------------------------------------------------------------------------------------- |
+| `id` | string | Unique ID of the project. |
+| `title` | string | Project title. If not set, the `id` will be used as the display title. |
+| `slogan` | string | A short description of the project. Displayed in the overview and under the title. |
+| `description` | string | A longer description of the project. Markdown is allowed, but should be limited to basic formatting like bold, italics, code or links. |
+| `github` | string | Associated GitHub repo in the format `user/repo`. Will be displayed as a link and used for release, license and star badges. |
+| `pip` | string | Package name on pip. If available, the installation command will be displayed. |
+| `cran` | string | For R packages: package name on CRAN. If available, the installation command will be displayed. |
+| `code_example` | array | Short example that shows how to use the project. Formatted as an array with one string per line. |
+| `code_language` | string | Defaults to `'python'`. Optional code language used for syntax highlighting with [Prism](http://prismjs.com/). |
+| `url` | string | Optional project link to display as button. |
+| `thumb` | string | Optional URL to project thumbnail to display in overview and project header. Recommended size is 100x100px. |
+| `image` | string | Optional URL to project image to display with description. |
+| `author` | string | Name(s) of project author(s). |
+| `author_links` | object | Usernames and links to display as icons to author info. Currently supports `twitter` and `github` usernames, as well as `website` link. |
+| `category` | list | One or more categories to assign to project. Must be one of the available options. |
+| `tags` | list | Still experimental and not used for filtering: one or more tags to assign to project. |
To separate them from the projects, educational materials also specify
-`"type": "education`. Books can also set a `"cover"` field containing a URL
-to a cover image. If available, it's used in the overview and displayed on
-the individual book page.
+`"type": "education`. Books can also set a `"cover"` field containing a URL to a
+cover image. If available, it's used in the overview and displayed on the
+individual book page.
diff --git a/website/docs/api/architectures.mdx b/website/docs/api/architectures.mdx
index 4c5447f75..04b96d39d 100644
--- a/website/docs/api/architectures.mdx
+++ b/website/docs/api/architectures.mdx
@@ -390,7 +390,7 @@ in other components, see
| | |
-Mixed-precision support is currently an experimental feature.
+ Mixed-precision support is currently an experimental feature.
@@ -467,7 +467,7 @@ one component.
| **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~ |
-Mixed-precision support is currently an experimental feature.
+ Mixed-precision support is currently an experimental feature.
diff --git a/website/docs/api/attributes.mdx b/website/docs/api/attributes.mdx
index adacd3898..3142b741d 100644
--- a/website/docs/api/attributes.mdx
+++ b/website/docs/api/attributes.mdx
@@ -41,10 +41,9 @@ from string attribute names to internal attribute IDs is stored in
The corresponding [`Token` object attributes](/api/token#attributes) can be
accessed using the same names in lowercase, e.g. `token.orth` or `token.length`.
-For attributes that represent string values, the internal integer ID is
-accessed as `Token.attr`, e.g. `token.dep`, while the string value can be
-retrieved by appending `_` as in `token.dep_`.
-
+For attributes that represent string values, the internal integer ID is accessed
+as `Token.attr`, e.g. `token.dep`, while the string value can be retrieved by
+appending `_` as in `token.dep_`.
| Attribute | Description |
| ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
diff --git a/website/docs/api/cli.mdx b/website/docs/api/cli.mdx
index 8823a3bd8..92a123241 100644
--- a/website/docs/api/cli.mdx
+++ b/website/docs/api/cli.mdx
@@ -474,7 +474,7 @@ report span characteristics such as the average span length and the span (or
span boundary) distinctiveness. The distinctiveness measure shows how different
the tokens are with respect to the rest of the corpus using the KL-divergence of
the token distributions. To learn more, you can check out Papay et al.'s work on
-[*Dissecting Span Identification Tasks with Performance Prediction* (EMNLP 2020)](https://aclanthology.org/2020.emnlp-main.396/).
+[_Dissecting Span Identification Tasks with Performance Prediction_ (EMNLP 2020)](https://aclanthology.org/2020.emnlp-main.396/).
@@ -1187,7 +1187,6 @@ be provided.
> $ python -m spacy find-threshold my_nlp data.spacy spancat threshold spans_sc_f
> ```
-
| Name | Description |
| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `model` | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~ |
diff --git a/website/docs/api/dependencymatcher.mdx b/website/docs/api/dependencymatcher.mdx
index cae4221bf..0ed413340 100644
--- a/website/docs/api/dependencymatcher.mdx
+++ b/website/docs/api/dependencymatcher.mdx
@@ -87,7 +87,6 @@ come directly from
| `A <++ B` | `B` is a right parent of `A`, i.e. `A` is a child of `B` and `A.i < B.i` _(not in Semgrex)_. |
| `A <-- B` | `B` is a left parent of `A`, i.e. `A` is a child of `B` and `A.i > B.i` _(not in Semgrex)_. |
-
## DependencyMatcher.\_\_init\_\_ {#init tag="method"}
Create a `DependencyMatcher`.
diff --git a/website/docs/api/entityruler.mdx b/website/docs/api/entityruler.mdx
index c2ba33f01..909d4674b 100644
--- a/website/docs/api/entityruler.mdx
+++ b/website/docs/api/entityruler.mdx
@@ -99,9 +99,9 @@ be a token pattern (list) or a phrase pattern (string). For example:
## EntityRuler.initialize {#initialize tag="method" new="3"}
Initialize the component with data and used before training to load in rules
-from a [pattern file](/usage/rule-based-matching/#entityruler-files). This method
-is typically called by [`Language.initialize`](/api/language#initialize) and
-lets you customize arguments it receives via the
+from a [pattern file](/usage/rule-based-matching/#entityruler-files). This
+method is typically called by [`Language.initialize`](/api/language#initialize)
+and lets you customize arguments it receives via the
[`[initialize.components]`](/api/data-formats#config-initialize) block in the
config.
@@ -210,10 +210,10 @@ of dicts) or a phrase pattern (string). For more details, see the usage guide on
| ---------- | ---------------------------------------------------------------- |
| `patterns` | The patterns to add. ~~List[Dict[str, Union[str, List[dict]]]]~~ |
-
## EntityRuler.remove {#remove tag="method" new="3.2.1"}
-Remove a pattern by its ID from the entity ruler. A `ValueError` is raised if the ID does not exist.
+Remove a pattern by its ID from the entity ruler. A `ValueError` is raised if
+the ID does not exist.
> #### Example
>
@@ -224,9 +224,9 @@ Remove a pattern by its ID from the entity ruler. A `ValueError` is raised if th
> ruler.remove("apple")
> ```
-| Name | Description |
-| ---------- | ---------------------------------------------------------------- |
-| `id` | The ID of the pattern rule. ~~str~~ |
+| Name | Description |
+| ---- | ----------------------------------- |
+| `id` | The ID of the pattern rule. ~~str~~ |
## EntityRuler.to_disk {#to_disk tag="method"}
diff --git a/website/docs/api/example.mdx b/website/docs/api/example.mdx
index 63768d58f..f98a114a1 100644
--- a/website/docs/api/example.mdx
+++ b/website/docs/api/example.mdx
@@ -288,9 +288,9 @@ Calculate alignment tables between two tokenizations.
### Alignment attributes {#alignment-attributes"}
-Alignment attributes are managed using `AlignmentArray`, which is a
-simplified version of Thinc's [Ragged](https://thinc.ai/docs/api-types#ragged)
-type that only supports the `data` and `length` attributes.
+Alignment attributes are managed using `AlignmentArray`, which is a simplified
+version of Thinc's [Ragged](https://thinc.ai/docs/api-types#ragged) type that
+only supports the `data` and `length` attributes.
| Name | Description |
| ----- | ------------------------------------------------------------------------------------- |
diff --git a/website/docs/api/index.mdx b/website/docs/api/index.mdx
index ba1e0a29a..2d77ea6b3 100644
--- a/website/docs/api/index.mdx
+++ b/website/docs/api/index.mdx
@@ -3,6 +3,6 @@ title: Library Architecture
next: /api/architectures
---
-import Architecture101 from 'usage/101/\_architecture.mdx'
+import Architecture101 from 'usage/101/_architecture.mdx'
diff --git a/website/docs/api/kb.mdx b/website/docs/api/kb.mdx
index b217a1678..b140bb6c1 100644
--- a/website/docs/api/kb.mdx
+++ b/website/docs/api/kb.mdx
@@ -106,7 +106,7 @@ to you.
## KnowledgeBase.get_alias_candidates {#get_alias_candidates tag="method"}
-This method is _not_ available from spaCy 3.5 onwards.
+ This method is _not_ available from spaCy 3.5 onwards.
From spaCy 3.5 on `KnowledgeBase` is an abstract class (with
diff --git a/website/docs/api/morphology.mdx b/website/docs/api/morphology.mdx
index 20fcd1a40..565e520b5 100644
--- a/website/docs/api/morphology.mdx
+++ b/website/docs/api/morphology.mdx
@@ -105,11 +105,11 @@ representation.
## Attributes {#attributes}
-| Name | Description |
-| ------------- | ------------------------------------------------------------------------------------------------------------------------------ |
-| `FEATURE_SEP` | The [FEATS](https://universaldependencies.org/format.html#morphological-annotation) feature separator. Default is `|`. ~~str~~ |
-| `FIELD_SEP` | The [FEATS](https://universaldependencies.org/format.html#morphological-annotation) field separator. Default is `=`. ~~str~~ |
-| `VALUE_SEP` | The [FEATS](https://universaldependencies.org/format.html#morphological-annotation) value separator. Default is `,`. ~~str~~ |
+| Name | Description |
+| ------------- | ---------------------------------------------------------------------------------------------------------------------------- | ---------- |
+| `FEATURE_SEP` | The [FEATS](https://universaldependencies.org/format.html#morphological-annotation) feature separator. Default is ` | `. ~~str~~ |
+| `FIELD_SEP` | The [FEATS](https://universaldependencies.org/format.html#morphological-annotation) field separator. Default is `=`. ~~str~~ |
+| `VALUE_SEP` | The [FEATS](https://universaldependencies.org/format.html#morphological-annotation) value separator. Default is `,`. ~~str~~ |
## MorphAnalysis {#morphanalysis tag="class" source="spacy/tokens/morphanalysis.pyx"}
diff --git a/website/docs/api/sentencizer.mdx b/website/docs/api/sentencizer.mdx
index b75c7a2f1..f5017fbdb 100644
--- a/website/docs/api/sentencizer.mdx
+++ b/website/docs/api/sentencizer.mdx
@@ -38,7 +38,7 @@ how the component should be configured. You can override its settings via the
> ```
| Setting | Description |
-| ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | ------ |
| `punct_chars` | Optional custom list of punctuation characters that mark sentence ends. See below for defaults if not set. Defaults to `None`. ~~Optional[List[str]]~~ | `None` |
| `overwrite` 3.2 | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~ |
| `scorer` 3.2 | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for the attribute `"sents"` ~~Optional[Callable]~~ |
diff --git a/website/docs/images/displacy-dep-founded.html b/website/docs/images/displacy-dep-founded.html
index e22984ee1..8e3c47522 100644
--- a/website/docs/images/displacy-dep-founded.html
+++ b/website/docs/images/displacy-dep-founded.html
@@ -1,58 +1,155 @@
-
diff --git a/website/docs/images/displacy-ent-custom.html b/website/docs/images/displacy-ent-custom.html
index 709c6f631..5da472fdb 100644
--- a/website/docs/images/displacy-ent-custom.html
+++ b/website/docs/images/displacy-ent-custom.html
@@ -1,31 +1,78 @@
But
Google
ORGis starting from behind. The company made a late push into hardware, and
Apple
ORG’s Siri, available on iPhones, and
Amazon
ORG’s Alexa software, which runs on its Echo and Dot devices, have clear leads in consumer
diff --git a/website/docs/images/displacy-ent-snek.html b/website/docs/images/displacy-ent-snek.html
index c8b416d8d..6604d9b78 100644
--- a/website/docs/images/displacy-ent-snek.html
+++ b/website/docs/images/displacy-ent-snek.html
@@ -1,24 +1,57 @@
diff --git a/website/docs/models/index.mdx b/website/docs/models/index.mdx
index 203555651..560c04675 100644
--- a/website/docs/models/index.mdx
+++ b/website/docs/models/index.mdx
@@ -189,8 +189,8 @@ than the rule-based `sentencizer`.
#### Switch from trainable lemmatizer to default lemmatizer
-Since v3.3, a number of pipelines use a trainable lemmatizer. You can check whether
-the lemmatizer is trainable:
+Since v3.3, a number of pipelines use a trainable lemmatizer. You can check
+whether the lemmatizer is trainable:
```python
nlp = spacy.load("de_core_web_sm")
diff --git a/website/docs/styleguide.mdx b/website/docs/styleguide.mdx
index f97ad7d3d..78b2c1d75 100644
--- a/website/docs/styleguide.mdx
+++ b/website/docs/styleguide.mdx
@@ -65,8 +65,16 @@ import { Colors, Patterns } from 'widgets/styleguide'
## Typography {#typography}
-import { H1, H2, H3, H4, H5, Label, InlineList, Comment } from
-'components/typography'
+import {
+ H1,
+ H2,
+ H3,
+ H4,
+ H5,
+ Label,
+ InlineList,
+ Comment,
+} from 'components/typography'
> #### Markdown
>
@@ -101,12 +109,12 @@ in the sidebar menu.
-
Headline 1
-
Headline 2
-
Headline 3
-
Headline 4
-
Headline 5
-
+
Headline 1
+
Headline 2
+
Headline 3
+
Headline 4
+
Headline 5
+
---
@@ -182,8 +190,9 @@ installed.
-method4tagger,
-parser
+method4
+ tagger, parser
+
@@ -200,13 +209,25 @@ Link buttons come in two variants, `primary` and `secondary` and two sizes, with
an optional `large` size modifier. Since they're mostly used as enhanced links,
the buttons are implemented as styled links instead of native button elements.
-
-
+
+
+
+
-
-
+
+
+
+
## Components
diff --git a/website/docs/usage/101/_named-entities.mdx b/website/docs/usage/101/_named-entities.mdx
index 2abc45cbd..1778ab0a9 100644
--- a/website/docs/usage/101/_named-entities.mdx
+++ b/website/docs/usage/101/_named-entities.mdx
@@ -1,9 +1,9 @@
A named entity is a "real-world object" that's assigned a name – for example, a
person, a country, a product or a book title. spaCy can **recognize various
-types of named entities in a document, by asking the model for a
-prediction**. Because models are statistical and strongly depend on the
-examples they were trained on, this doesn't always work _perfectly_ and might
-need some tuning later, depending on your use case.
+types of named entities in a document, by asking the model for a prediction**.
+Because models are statistical and strongly depend on the examples they were
+trained on, this doesn't always work _perfectly_ and might need some tuning
+later, depending on your use case.
Named entities are available as the `ents` property of a `Doc`:
@@ -32,7 +32,11 @@ for ent in doc.ents:
Using spaCy's built-in [displaCy visualizer](/usage/visualizers), here's what
our example sentence and its named entities look like:
-import DisplaCyEntHtml from 'images/displacy-ent1.html'; import { Iframe } from
-'components/embed'
+import DisplaCyEntHtml from 'images/displacy-ent1.html'
+import { Iframe } from 'components/embed'
-
+
diff --git a/website/docs/usage/101/_pos-deps.mdx b/website/docs/usage/101/_pos-deps.mdx
index 93ad0961a..f3c11f81c 100644
--- a/website/docs/usage/101/_pos-deps.mdx
+++ b/website/docs/usage/101/_pos-deps.mdx
@@ -57,7 +57,11 @@ for token in doc:
Using spaCy's built-in [displaCy visualizer](/usage/visualizers), here's what
our example sentence and its dependencies look like:
-import DisplaCyLongHtml from 'images/displacy-long.html'; import { Iframe } from
-'components/embed'
+import DisplaCyLongHtml from 'images/displacy-long.html'
+import { Iframe } from 'components/embed'
-
+
diff --git a/website/docs/usage/101/_training.mdx b/website/docs/usage/101/_training.mdx
index 4218c1b5a..d904c3631 100644
--- a/website/docs/usage/101/_training.mdx
+++ b/website/docs/usage/101/_training.mdx
@@ -10,9 +10,9 @@ any other information.
Training is an iterative process in which the model's predictions are compared
against the reference annotations in order to estimate the **gradient of the
loss**. The gradient of the loss is then used to calculate the gradient of the
-weights through [backpropagation](https://thinc.ai/docs/backprop101). The gradients
-indicate how the weight values should be changed so that the model's predictions
-become more similar to the reference labels over time.
+weights through [backpropagation](https://thinc.ai/docs/backprop101). The
+gradients indicate how the weight values should be changed so that the model's
+predictions become more similar to the reference labels over time.
> - **Training data:** Examples and their annotations.
> - **Text:** The input text the model should predict a label for.
diff --git a/website/docs/usage/_benchmarks-models.mdx b/website/docs/usage/_benchmarks-models.mdx
index 5bf9e63ca..4893316b5 100644
--- a/website/docs/usage/_benchmarks-models.mdx
+++ b/website/docs/usage/_benchmarks-models.mdx
@@ -1,4 +1,5 @@
-import { Help } from 'components/typography'; import Link from 'components/link'
+import { Help } from 'components/typography'
+import Link from 'components/link'
diff --git a/website/docs/usage/embeddings-transformers.mdx b/website/docs/usage/embeddings-transformers.mdx
index a487371de..4173a6c37 100644
--- a/website/docs/usage/embeddings-transformers.mdx
+++ b/website/docs/usage/embeddings-transformers.mdx
@@ -218,8 +218,8 @@ $ pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0+c
Next, install spaCy with the extras for your CUDA version and transformers. The
CUDA extra (e.g., `cuda102`, `cuda113`) installs the correct version of
-[`cupy`](https://docs.cupy.dev/en/stable/install.html#installing-cupy), which
-is just like `numpy`, but for GPU. You may also need to set the `CUDA_PATH`
+[`cupy`](https://docs.cupy.dev/en/stable/install.html#installing-cupy), which is
+just like `numpy`, but for GPU. You may also need to set the `CUDA_PATH`
environment variable if your CUDA runtime is installed in a non-standard
location. Putting it all together, if you had installed CUDA 11.3 in
`/opt/nvidia/cuda`, you would run:
@@ -531,8 +531,7 @@ Word vectors in spaCy are "static" in the sense that they are not learned
parameters of the statistical models, and spaCy itself does not feature any
algorithms for learning word vector tables. You can train a word vectors table
using tools such as [floret](https://github.com/explosion/floret),
-[Gensim](https://radimrehurek.com/gensim/),
-[FastText](https://fasttext.cc/) or
+[Gensim](https://radimrehurek.com/gensim/), [FastText](https://fasttext.cc/) or
[GloVe](https://nlp.stanford.edu/projects/glove/), or download existing
pretrained vectors. The [`init vectors`](/api/cli#init-vectors) command lets you
convert vectors for use with spaCy and will give you a directory you can load or
diff --git a/website/docs/usage/facts-figures.mdx b/website/docs/usage/facts-figures.mdx
index 4c9d7e644..90c140745 100644
--- a/website/docs/usage/facts-figures.mdx
+++ b/website/docs/usage/facts-figures.mdx
@@ -69,7 +69,7 @@ pipeline, which is less accurate but much cheaper to run.
> gold-standard segmentation and tokenization, from a pretty specific type of
> text (articles from a single newspaper, 1984-1989).
-import Benchmarks from 'usage/\_benchmarks-models.mdx'
+import Benchmarks from 'usage/_benchmarks-models.mdx'
diff --git a/website/docs/usage/index.mdx b/website/docs/usage/index.mdx
index dff5a16ba..f4993dd3b 100644
--- a/website/docs/usage/index.mdx
+++ b/website/docs/usage/index.mdx
@@ -181,7 +181,9 @@ $ pip install --no-build-isolation --editable .[lookups,cuda102]
How to install compilers and related build tools:
-
+
+
+
- **Ubuntu:** Install system-level dependencies via `apt-get`:
`sudo apt-get install build-essential python-dev git`
@@ -235,10 +237,10 @@ package to see what the oldest recommended versions of `numpy` are.
Some additional options may be useful for spaCy developers who are editing the
source code and recompiling frequently.
-- Install in editable mode. Changes to `.py` files will be reflected as soon
- as the files are saved, but edits to Cython files (`.pxd`, `.pyx`) will
- require the `pip install` command below to be run again. Before installing in
- editable mode, be sure you have removed any previous installs with
+- Install in editable mode. Changes to `.py` files will be reflected as soon as
+ the files are saved, but edits to Cython files (`.pxd`, `.pyx`) will require
+ the `pip install` command below to be run again. Before installing in editable
+ mode, be sure you have removed any previous installs with
`pip uninstall spacy`, which you may need to run multiple times to remove all
traces of earlier installs.
@@ -247,8 +249,8 @@ source code and recompiling frequently.
$ pip install --no-build-isolation --editable .
```
-- Build in parallel. Starting in v3.4.0, you can specify the number of
- build jobs with the environment variable `SPACY_NUM_BUILD_JOBS`:
+- Build in parallel. Starting in v3.4.0, you can specify the number of build
+ jobs with the environment variable `SPACY_NUM_BUILD_JOBS`:
```bash
$ pip install -r requirements.txt
diff --git a/website/docs/usage/layers-architectures.mdx b/website/docs/usage/layers-architectures.mdx
index 2e23b3684..9f8e4ed08 100644
--- a/website/docs/usage/layers-architectures.mdx
+++ b/website/docs/usage/layers-architectures.mdx
@@ -537,13 +537,13 @@ two major steps required:
pass through the `nlp` pipeline.
-Run this example use-case by using our project template. It includes all the
-code to create the ML model and the pipeline component from scratch.
-It also contains two config files to train the model:
-one to run on CPU with a Tok2Vec layer, and one for the GPU using a transformer.
-The project applies the relation extraction component to identify biomolecular
-interactions in a sample dataset, but you can easily swap in your own dataset
-for your experiments in any other domain.
+ Run this example use-case by using our project template. It includes all the
+ code to create the ML model and the pipeline component from scratch. It also
+ contains two config files to train the model: one to run on CPU with a Tok2Vec
+ layer, and one for the GPU using a transformer. The project applies the
+ relation extraction component to identify biomolecular interactions in a
+ sample dataset, but you can easily swap in your own dataset for your
+ experiments in any other domain.
@@ -1043,11 +1043,10 @@ def make_relation_extractor(nlp, name, model):
```
-Run this example use-case by using our project template. It includes all the
-code to create the ML model and the pipeline component from scratch.
-It contains two config files to train the model:
-one to run on CPU with a Tok2Vec layer, and one for the GPU using a transformer.
-The project applies the relation extraction component to identify biomolecular
-interactions, but you can easily swap in your own dataset for your experiments
-in any other domain.
+ Run this example use-case by using our project template. It includes all the
+ code to create the ML model and the pipeline component from scratch. It
+ contains two config files to train the model: one to run on CPU with a Tok2Vec
+ layer, and one for the GPU using a transformer. The project applies the
+ relation extraction component to identify biomolecular interactions, but you
+ can easily swap in your own dataset for your experiments in any other domain.
diff --git a/website/docs/usage/linguistic-features.mdx b/website/docs/usage/linguistic-features.mdx
index 8fe8b2c1a..d662ad447 100644
--- a/website/docs/usage/linguistic-features.mdx
+++ b/website/docs/usage/linguistic-features.mdx
@@ -28,7 +28,7 @@ annotations.
## Part-of-speech tagging {#pos-tagging model="tagger, parser"}
-import PosDeps101 from 'usage/101/\_pos-deps.mdx'
+import PosDeps101 from 'usage/101/_pos-deps.mdx'
@@ -300,7 +300,11 @@ for token in doc:
import DisplaCyLong2Html from 'images/displacy-long2.html'
-
+
Because the syntactic relations form a tree, every word has **exactly one
head**. You can therefore iterate over the arcs in the tree by iterating over
@@ -536,7 +540,7 @@ with new examples.
### Named Entity Recognition 101 {#named-entities-101}
-import NER101 from 'usage/101/\_named-entities.mdx'
+import NER101 from 'usage/101/_named-entities.mdx'
@@ -730,7 +734,11 @@ displacy.serve(doc, style="ent")
import DisplacyEntHtml from 'images/displacy-ent2.html'
-
+
## Entity Linking {#entity-linking}
@@ -785,7 +793,7 @@ during tokenization. This is kind of a core principle of spaCy's `Doc` object:
-import Tokenization101 from 'usage/101/\_tokenization.mdx'
+import Tokenization101 from 'usage/101/_tokenization.mdx'
@@ -1868,7 +1876,7 @@ initialized before training. See the
## Word vectors and semantic similarity {#vectors-similarity}
-import Vectors101 from 'usage/101/\_vectors-similarity.mdx'
+import Vectors101 from 'usage/101/_vectors-similarity.mdx'
@@ -1998,7 +2006,7 @@ for word, vector in vector_data.items():
## Language Data {#language-data}
-import LanguageData101 from 'usage/101/\_language-data.mdx'
+import LanguageData101 from 'usage/101/_language-data.mdx'
diff --git a/website/docs/usage/models.mdx b/website/docs/usage/models.mdx
index 6971ac8b4..9edff1f58 100644
--- a/website/docs/usage/models.mdx
+++ b/website/docs/usage/models.mdx
@@ -25,7 +25,11 @@ located anywhere on your file system.
import QuickstartModels from 'widgets/quickstart-models.js'
-
+
### Usage note
diff --git a/website/docs/usage/processing-pipelines.mdx b/website/docs/usage/processing-pipelines.mdx
index c31ad0a99..6638676c1 100644
--- a/website/docs/usage/processing-pipelines.mdx
+++ b/website/docs/usage/processing-pipelines.mdx
@@ -12,7 +12,7 @@ menu:
- ['Plugins & Wrappers', 'plugins']
---
-import Pipelines101 from 'usage/101/\_pipelines.mdx'
+import Pipelines101 from 'usage/101/_pipelines.mdx'
@@ -364,7 +364,8 @@ nlp.enable_pipe("tagger")
In addition to `disable`, `spacy.load()` also accepts `enable`. If `enable` is
set, all components except for those in `enable` are disabled. If `enable` and
-`disable` conflict (i.e. the same component is included in both), an error is raised.
+`disable` conflict (i.e. the same component is included in both), an error is
+raised.
```python
# Load the complete pipeline, but disable all components except for tok2vec and tagger
diff --git a/website/docs/usage/projects.mdx b/website/docs/usage/projects.mdx
index f57578049..c90a50924 100644
--- a/website/docs/usage/projects.mdx
+++ b/website/docs/usage/projects.mdx
@@ -43,13 +43,31 @@ and experiments, iterate on demos and prototypes and ship your models into
production.
-Manage and version your data
-Create labelled training data
-Visualize and demo your pipelines
-Serve your models and host APIs
-Distributed and parallel training
-Track your experiments and results
-Upload your pipelines to the Hugging Face Hub
+
+ Manage and version your data
+
+
+ Create labelled training data
+
+
+ Visualize and demo your pipelines
+
+
+ Serve your models and host APIs
+
+
+ Distributed and parallel training
+
+
+ Track your experiments and results
+
+
+ Upload your pipelines to the Hugging Face Hub
+
### 1. Clone a project template {#clone}
diff --git a/website/docs/usage/rule-based-matching.mdx b/website/docs/usage/rule-based-matching.mdx
index ad8ea27f3..55ba058dd 100644
--- a/website/docs/usage/rule-based-matching.mdx
+++ b/website/docs/usage/rule-based-matching.mdx
@@ -1107,7 +1107,11 @@ relations and tokens we want to match:
import DisplaCyDepFoundedHtml from 'images/displacy-dep-founded.html'
-
+
The relations we're interested in are:
diff --git a/website/docs/usage/saving-loading.mdx b/website/docs/usage/saving-loading.mdx
index 0ee933177..285ea562d 100644
--- a/website/docs/usage/saving-loading.mdx
+++ b/website/docs/usage/saving-loading.mdx
@@ -10,7 +10,7 @@ menu:
## Basics {#basics hidden="true"}
-import Serialization101 from 'usage/101/\_serialization.mdx'
+import Serialization101 from 'usage/101/_serialization.mdx'
@@ -582,7 +582,11 @@ displayed in `#3dff74`.
import DisplaCyEntSnekHtml from 'images/displacy-ent-snek.html'
-
+
## Saving, loading and distributing trained pipelines {#models}
diff --git a/website/docs/usage/spacy-101.mdx b/website/docs/usage/spacy-101.mdx
index 47d0954ae..eaff7bb3d 100644
--- a/website/docs/usage/spacy-101.mdx
+++ b/website/docs/usage/spacy-101.mdx
@@ -37,7 +37,11 @@ understanding systems, using both rule-based and machine learning approaches. It
includes 55 exercises featuring interactive coding practice, multiple-choice
questions and slide decks.
-
+
+
+
@@ -191,7 +195,7 @@ text with spaCy.
### Tokenization {#annotations-token}
-import Tokenization101 from 'usage/101/\_tokenization.mdx'
+import Tokenization101 from 'usage/101/_tokenization.mdx'
@@ -207,7 +211,7 @@ language-specific data**, see the usage guides on
### Part-of-speech tags and dependencies {#annotations-pos-deps model="parser"}
-import PosDeps101 from 'usage/101/\_pos-deps.mdx'
+import PosDeps101 from 'usage/101/_pos-deps.mdx'
@@ -222,7 +226,7 @@ how to **navigate and use the parse tree** effectively, see the usage guides on
### Named Entities {#annotations-ner model="ner"}
-import NER101 from 'usage/101/\_named-entities.mdx'
+import NER101 from 'usage/101/_named-entities.mdx'
@@ -238,7 +242,7 @@ of a model, see the usage guides on
### Word vectors and similarity {#vectors-similarity model="vectors"}
-import Vectors101 from 'usage/101/\_vectors-similarity.mdx'
+import Vectors101 from 'usage/101/_vectors-similarity.mdx'
@@ -252,7 +256,7 @@ To learn more about word vectors, how to **customize them** and how to load
## Pipelines {#pipelines}
-import Pipelines101 from 'usage/101/\_pipelines.mdx'
+import Pipelines101 from 'usage/101/_pipelines.mdx'
@@ -266,7 +270,7 @@ guide on [language processing pipelines](/usage/processing-pipelines).
## Architecture {#architecture}
-import Architecture101 from 'usage/101/\_architecture.mdx'
+import Architecture101 from 'usage/101/_architecture.mdx'
@@ -384,7 +388,7 @@ it.
## Serialization {#serialization}
-import Serialization101 from 'usage/101/\_serialization.mdx'
+import Serialization101 from 'usage/101/_serialization.mdx'
@@ -397,7 +401,7 @@ guide on [saving and loading](/usage/saving-loading#models).
## Training {#training}
-import Training101 from 'usage/101/\_training.mdx'
+import Training101 from 'usage/101/_training.mdx'
@@ -476,7 +480,7 @@ for trainable components.
## Language data {#language-data}
-import LanguageData101 from 'usage/101/\_language-data.mdx'
+import LanguageData101 from 'usage/101/_language-data.mdx'
@@ -513,10 +517,11 @@ via the following platforms:
questions** and everything related to problems with your specific code. The
Stack Overflow community is much larger than ours, so if your problem can be
solved by others, you'll receive help much quicker.
-- [GitHub discussions](https://github.com/explosion/spaCy/discussions): **General
- discussion**, **project ideas** and **usage questions**. Meet other community
- members to get help with a specific code implementation, discuss ideas for new
- projects/plugins, support more languages, and share best practices.
+- [GitHub discussions](https://github.com/explosion/spaCy/discussions):
+ **General discussion**, **project ideas** and **usage questions**. Meet other
+ community members to get help with a specific code implementation, discuss
+ ideas for new projects/plugins, support more languages, and share best
+ practices.
- [GitHub issue tracker](https://github.com/explosion/spaCy/issues): **Bug
reports** and **improvement suggestions**, i.e. everything that's likely
spaCy's fault. This also includes problems with the trained pipelines beyond
@@ -588,7 +593,8 @@ project is using spaCy, you can grab one of our **spaCy badges** here:
[![Built with spaCy](https://img.shields.io/badge/built%20with-spaCy-09a3d5.svg)](https://spacy.io)
```
-
```markdown
diff --git a/website/docs/usage/training.mdx b/website/docs/usage/training.mdx
index 0b471c359..3366600bb 100644
--- a/website/docs/usage/training.mdx
+++ b/website/docs/usage/training.mdx
@@ -17,7 +17,7 @@ menu:
## Introduction to training {#basics hidden="true"}
-import Training101 from 'usage/101/\_training.mdx'
+import Training101 from 'usage/101/_training.mdx'
diff --git a/website/docs/usage/v2-1.mdx b/website/docs/usage/v2-1.mdx
index 500e43803..6f2c784d8 100644
--- a/website/docs/usage/v2-1.mdx
+++ b/website/docs/usage/v2-1.mdx
@@ -180,9 +180,9 @@ entirely **in Markdown**, without having to compromise on easy-to-use custom UI
components. We're hoping that the Markdown source will make it even easier to
contribute to the documentation. For more details, check out the
[styleguide](/styleguide) and
-[source](https://github.com/explosion/spacy/tree/v2.x/website). While
-converting the pages to Markdown, we've also fixed a bunch of typos, improved
-the existing pages and added some new content:
+[source](https://github.com/explosion/spacy/tree/v2.x/website). While converting
+the pages to Markdown, we've also fixed a bunch of typos, improved the existing
+pages and added some new content:
- **Usage Guide:** [Rule-based Matching](/usage/rule-based-matching) How to
use the `Matcher`, `PhraseMatcher` and the new `EntityRuler`, and write
diff --git a/website/docs/usage/v3-3.mdx b/website/docs/usage/v3-3.mdx
index 739e2a2f9..f6e8c03ad 100644
--- a/website/docs/usage/v3-3.mdx
+++ b/website/docs/usage/v3-3.mdx
@@ -79,7 +79,11 @@ displacy.serve(doc, style="span", options={"spans_key": "custom"})
import DisplacySpanHtml from 'images/displacy-span.html'
-
+
## Additional features and improvements
diff --git a/website/docs/usage/v3-4.mdx b/website/docs/usage/v3-4.mdx
index e10110b71..e6987e7a2 100644
--- a/website/docs/usage/v3-4.mdx
+++ b/website/docs/usage/v3-4.mdx
@@ -63,8 +63,8 @@ All CNN pipelines have been extended with whitespace augmentation.
The English CNN pipelines have new word vectors:
-| Package | Model Version | TAG | Parser LAS | NER F |
-| ----------------------------------------------- | ------------- | ---: | ---------: | ----: |
+| Package | Model Version | TAG | Parser LAS | NER F |
+| --------------------------------------------- | ------------- | ---: | ---------: | ----: |
| [`en_core_web_md`](/models/en#en_core_web_md) | v3.3.0 | 97.3 | 90.1 | 84.6 |
| [`en_core_web_md`](/models/en#en_core_web_md) | v3.4.0 | 97.2 | 90.3 | 85.5 |
| [`en_core_web_lg`](/models/en#en_core_web_lg) | v3.3.0 | 97.4 | 90.1 | 85.3 |
diff --git a/website/docs/usage/v3.mdx b/website/docs/usage/v3.mdx
index e8d62efe5..b4053a9de 100644
--- a/website/docs/usage/v3.mdx
+++ b/website/docs/usage/v3.mdx
@@ -88,7 +88,7 @@ giving you access to thousands of pretrained models for your pipelines.
![Pipeline components listening to shared embedding component](../images/tok2vec-listener.svg)
-import Benchmarks from 'usage/\_benchmarks-models.mdx'
+import Benchmarks from 'usage/_benchmarks-models.mdx'
diff --git a/website/docs/usage/visualizers.mdx b/website/docs/usage/visualizers.mdx
index da847d939..b0c02db60 100644
--- a/website/docs/usage/visualizers.mdx
+++ b/website/docs/usage/visualizers.mdx
@@ -118,12 +118,16 @@ displacy.serve(doc, style="ent")
import DisplacyEntHtml from 'images/displacy-ent2.html'
-
+
The entity visualizer lets you customize the following `options`:
| Argument | Description |
-| -------- | ------------------------------------------------------------------------------------------------------------- |
+| -------- | ------------------------------------------------------------------------------------------------------------- | ------ |
| `ents` | Entity types to highlight (`None` for all types). Defaults to `None`. ~~Optional[List[str]]~~ | `None` |
| `colors` | Color overrides. Entity types should be mapped to color names or values. Defaults to `{}`. ~~Dict[str, str]~~ |
@@ -145,7 +149,11 @@ use the `colors` setting to add your own colors for them.
import DisplacyEntCustomHtml from 'images/displacy-ent-custom.html'
-
+
The above example uses a little trick: Since the background color values are
added as the `background` style attribute, you can use any
@@ -184,7 +192,7 @@ nlp = spacy.blank("en")
doc = nlp(text)
doc.spans["sc"] = [
- Span(doc, 3, 6, "ORG"),
+ Span(doc, 3, 6, "ORG"),
Span(doc, 5, 6, "GPE"),
]
@@ -193,8 +201,11 @@ displacy.serve(doc, style="span")
import DisplacySpanHtml from 'images/displacy-span.html'
-
-
+
The span visualizer lets you customize the following `options`:
@@ -205,8 +216,8 @@ The span visualizer lets you customize the following `options`:
| `kb_url_template` | Optional template to construct the KB url for the entity to link to. Expects a python f-string format with single field to fill in ~~Optional[str]~~ |
| `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ |
-Because spans can be stored across different keys in `doc.spans`, you need to specify
-which one displaCy should use with `spans_key` (`sc` is the default).
+Because spans can be stored across different keys in `doc.spans`, you need to
+specify which one displaCy should use with `spans_key` (`sc` is the default).
> #### Options example
>
@@ -214,12 +225,15 @@ which one displaCy should use with `spans_key` (`sc` is the default).
> doc.spans["custom"] = [Span(doc, 3, 6, "BANK")]
> options = {"spans_key": "custom"}
> displacy.serve(doc, style="span", options=options)
+> ```
import DisplacySpanCustomHtml from 'images/displacy-span-custom.html'
-
-
-
+
## Using displaCy in Jupyter notebooks {#jupyter}
@@ -343,11 +357,11 @@ want to visualize output from other libraries, like [NLTK](http://www.nltk.org)
or
[SyntaxNet](https://github.com/tensorflow/models/tree/master/research/syntaxnet).
If you set `manual=True` on either `render()` or `serve()`, you can pass in data
-in displaCy's format as a dictionary (instead of `Doc` objects). There are helper
-functions for converting `Doc` objects to displaCy's format for use with `manual=True`:
-[`displacy.parse_deps`](/api/top-level#displacy.parse_deps),
-[`displacy.parse_ents`](/api/top-level#displacy.parse_ents),
-and [`displacy.parse_spans`](/api/top-level#displacy.parse_spans).
+in displaCy's format as a dictionary (instead of `Doc` objects). There are
+helper functions for converting `Doc` objects to displaCy's format for use with
+`manual=True`: [`displacy.parse_deps`](/api/top-level#displacy.parse_deps),
+[`displacy.parse_ents`](/api/top-level#displacy.parse_ents), and
+[`displacy.parse_spans`](/api/top-level#displacy.parse_spans).
> #### Example with parse function
>
@@ -366,7 +380,6 @@ and [`displacy.parse_spans`](/api/top-level#displacy.parse_spans).
> html = displacy.render(ex, style="ent", manual=True)
> ```
-
```python
### DEP input
{
diff --git a/website/gatsby-node.js b/website/gatsby-node.js
index b5d8c22c3..4a580eb03 100644
--- a/website/gatsby-node.js
+++ b/website/gatsby-node.js
@@ -101,19 +101,19 @@ exports.createPages = ({ graphql, actions }) => {
}
}
`
- ).then(result => {
+ ).then((result) => {
if (result.errors) {
console.log(result.errors)
reject(result.errors)
}
const sectionData = result.data.site.siteMetadata.sections
- const sections = Object.assign({}, ...sectionData.map(s => ({ [s.id]: s })))
+ const sections = Object.assign({}, ...sectionData.map((s) => ({ [s.id]: s })))
/* Regular pages */
const pages = result.data.allFile.edges
- pages.forEach(page => {
+ pages.forEach((page) => {
const { name } = path.parse(page.node.absolutePath)
if (!name.startsWith('_')) {
const mdx = page.node.childMdx || {}
@@ -184,7 +184,7 @@ exports.createPages = ({ graphql, actions }) => {
})
const universe = result.data.site.siteMetadata.universe.resources
- universe.forEach(page => {
+ universe.forEach((page) => {
const slug = `/universe/project/${page.id}`
createPage({
@@ -203,8 +203,11 @@ exports.createPages = ({ graphql, actions }) => {
})
const universeCategories = result.data.site.siteMetadata.universe.categories
- const categories = [].concat.apply([], universeCategories.map(cat => cat.items))
- categories.forEach(page => {
+ const categories = [].concat.apply(
+ [],
+ universeCategories.map((cat) => cat.items)
+ )
+ categories.forEach((page) => {
const slug = `/universe/category/${page.id}`
createPage({
diff --git a/website/meta/languages.json b/website/meta/languages.json
index 15158df79..46c0d3adb 100644
--- a/website/meta/languages.json
+++ b/website/meta/languages.json
@@ -36,12 +36,7 @@
"name": "Catalan",
"example": "Això és una frase.",
"has_examples": true,
- "models": [
- "ca_core_news_sm",
- "ca_core_news_md",
- "ca_core_news_lg",
- "ca_core_news_trf"
- ]
+ "models": ["ca_core_news_sm", "ca_core_news_md", "ca_core_news_lg", "ca_core_news_trf"]
},
{
"code": "cs",
@@ -53,22 +48,12 @@
"name": "Danish",
"example": "Dette er en sætning.",
"has_examples": true,
- "models": [
- "da_core_news_sm",
- "da_core_news_md",
- "da_core_news_lg",
- "da_core_news_trf"
- ]
+ "models": ["da_core_news_sm", "da_core_news_md", "da_core_news_lg", "da_core_news_trf"]
},
{
"code": "de",
"name": "German",
- "models": [
- "de_core_news_sm",
- "de_core_news_md",
- "de_core_news_lg",
- "de_dep_news_trf"
- ],
+ "models": ["de_core_news_sm", "de_core_news_md", "de_core_news_lg", "de_dep_news_trf"],
"example": "Dies ist ein Satz.",
"has_examples": true
},
@@ -80,35 +65,21 @@
{
"code": "el",
"name": "Greek",
- "models": [
- "el_core_news_sm",
- "el_core_news_md",
- "el_core_news_lg"
- ],
+ "models": ["el_core_news_sm", "el_core_news_md", "el_core_news_lg"],
"example": "Αυτή είναι μια πρόταση.",
"has_examples": true
},
{
"code": "en",
"name": "English",
- "models": [
- "en_core_web_sm",
- "en_core_web_md",
- "en_core_web_lg",
- "en_core_web_trf"
- ],
+ "models": ["en_core_web_sm", "en_core_web_md", "en_core_web_lg", "en_core_web_trf"],
"example": "This is a sentence.",
"has_examples": true
},
{
"code": "es",
"name": "Spanish",
- "models": [
- "es_core_news_sm",
- "es_core_news_md",
- "es_core_news_lg",
- "es_dep_news_trf"
- ],
+ "models": ["es_core_news_sm", "es_core_news_md", "es_core_news_lg", "es_dep_news_trf"],
"example": "Esto es una frase.",
"has_examples": true
},
@@ -130,21 +101,12 @@
"code": "fi",
"name": "Finnish",
"has_examples": true,
- "models": [
- "fi_core_news_sm",
- "fi_core_news_md",
- "fi_core_news_lg"
- ]
+ "models": ["fi_core_news_sm", "fi_core_news_md", "fi_core_news_lg"]
},
{
"code": "fr",
"name": "French",
- "models": [
- "fr_core_news_sm",
- "fr_core_news_md",
- "fr_core_news_lg",
- "fr_dep_news_trf"
- ],
+ "models": ["fr_core_news_sm", "fr_core_news_md", "fr_core_news_lg", "fr_dep_news_trf"],
"example": "C'est une phrase.",
"has_examples": true
},
@@ -178,11 +140,7 @@
"code": "hr",
"name": "Croatian",
"has_examples": true,
- "models": [
- "hr_core_news_sm",
- "hr_core_news_md",
- "hr_core_news_lg"
- ]
+ "models": ["hr_core_news_sm", "hr_core_news_md", "hr_core_news_lg"]
},
{
"code": "hsb",
@@ -213,23 +171,14 @@
{
"code": "it",
"name": "Italian",
- "models": [
- "it_core_news_sm",
- "it_core_news_md",
- "it_core_news_lg"
- ],
+ "models": ["it_core_news_sm", "it_core_news_md", "it_core_news_lg"],
"example": "Questa è una frase.",
"has_examples": true
},
{
"code": "ja",
"name": "Japanese",
- "models": [
- "ja_core_news_sm",
- "ja_core_news_md",
- "ja_core_news_lg",
- "ja_core_news_trf"
- ],
+ "models": ["ja_core_news_sm", "ja_core_news_md", "ja_core_news_lg", "ja_core_news_trf"],
"dependencies": [
{
"name": "SudachiPy",
@@ -263,11 +212,7 @@
],
"example": "이것은 문장입니다.",
"has_examples": true,
- "models": [
- "ko_core_news_sm",
- "ko_core_news_md",
- "ko_core_news_lg"
- ]
+ "models": ["ko_core_news_sm", "ko_core_news_md", "ko_core_news_lg"]
},
{
"code": "ky",
@@ -299,11 +244,7 @@
"code": "lt",
"name": "Lithuanian",
"has_examples": true,
- "models": [
- "lt_core_news_sm",
- "lt_core_news_md",
- "lt_core_news_lg"
- ]
+ "models": ["lt_core_news_sm", "lt_core_news_md", "lt_core_news_lg"]
},
{
"code": "lv",
@@ -312,11 +253,7 @@
{
"code": "mk",
"name": "Macedonian",
- "models": [
- "mk_core_news_sm",
- "mk_core_news_md",
- "mk_core_news_lg"
- ]
+ "models": ["mk_core_news_sm", "mk_core_news_md", "mk_core_news_lg"]
},
{
"code": "ml",
@@ -332,11 +269,7 @@
"name": "Norwegian Bokmål",
"example": "Dette er en setning.",
"has_examples": true,
- "models": [
- "nb_core_news_sm",
- "nb_core_news_md",
- "nb_core_news_lg"
- ]
+ "models": ["nb_core_news_sm", "nb_core_news_md", "nb_core_news_lg"]
},
{
"code": "ne",
@@ -346,11 +279,7 @@
{
"code": "nl",
"name": "Dutch",
- "models": [
- "nl_core_news_sm",
- "nl_core_news_md",
- "nl_core_news_lg"
- ],
+ "models": ["nl_core_news_sm", "nl_core_news_md", "nl_core_news_lg"],
"example": "Dit is een zin.",
"has_examples": true
},
@@ -359,20 +288,12 @@
"name": "Polish",
"example": "To jest zdanie.",
"has_examples": true,
- "models": [
- "pl_core_news_sm",
- "pl_core_news_md",
- "pl_core_news_lg"
- ]
+ "models": ["pl_core_news_sm", "pl_core_news_md", "pl_core_news_lg"]
},
{
"code": "pt",
"name": "Portuguese",
- "models": [
- "pt_core_news_sm",
- "pt_core_news_md",
- "pt_core_news_lg"
- ],
+ "models": ["pt_core_news_sm", "pt_core_news_md", "pt_core_news_lg"],
"example": "Esta é uma frase.",
"has_examples": true
},
@@ -381,11 +302,7 @@
"name": "Romanian",
"example": "Aceasta este o propoziție.",
"has_examples": true,
- "models": [
- "ro_core_news_sm",
- "ro_core_news_md",
- "ro_core_news_lg"
- ]
+ "models": ["ro_core_news_sm", "ro_core_news_md", "ro_core_news_lg"]
},
{
"code": "ru",
@@ -397,11 +314,7 @@
"url": "https://github.com/no-plagiarism/pymorphy3"
}
],
- "models": [
- "ru_core_news_sm",
- "ru_core_news_md",
- "ru_core_news_lg"
- ]
+ "models": ["ru_core_news_sm", "ru_core_news_md", "ru_core_news_lg"]
},
{
"code": "sa",
@@ -438,11 +351,7 @@
"code": "sv",
"name": "Swedish",
"has_examples": true,
- "models": [
- "sv_core_news_sm",
- "sv_core_news_md",
- "sv_core_news_lg"
- ]
+ "models": ["sv_core_news_sm", "sv_core_news_md", "sv_core_news_lg"]
},
{
"code": "ta",
@@ -496,12 +405,7 @@
"code": "uk",
"name": "Ukrainian",
"has_examples": true,
- "models": [
- "uk_core_news_sm",
- "uk_core_news_md",
- "uk_core_news_lg",
- "uk_core_news_trf"
- ],
+ "models": ["uk_core_news_sm", "uk_core_news_md", "uk_core_news_lg", "uk_core_news_trf"],
"dependencies": [
{
"name": "pymorphy3",
@@ -532,10 +436,7 @@
{
"code": "xx",
"name": "Multi-language",
- "models": [
- "xx_ent_wiki_sm",
- "xx_sent_ud_sm"
- ],
+ "models": ["xx_ent_wiki_sm", "xx_sent_ud_sm"],
"example": "This is a sentence about Facebook."
},
{
@@ -546,12 +447,7 @@
{
"code": "zh",
"name": "Chinese",
- "models": [
- "zh_core_web_sm",
- "zh_core_web_md",
- "zh_core_web_lg",
- "zh_core_web_trf"
- ],
+ "models": ["zh_core_web_sm", "zh_core_web_md", "zh_core_web_lg", "zh_core_web_trf"],
"dependencies": [
{
"name": "Jieba",
diff --git a/website/meta/universe.json b/website/meta/universe.json
index db533c3b2..7c7ce0c55 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -1,6 +1,6 @@
{
"resources": [
- {
+ {
"id": "grecy",
"title": "greCy",
"slogan": "Ancient Greek pipelines for spaCy",
@@ -60,12 +60,8 @@
"github": "Ce11an",
"website": "https://www.linkedin.com/in/cellan-hall/"
},
- "category": [
- "extension"
- ],
- "tags": [
- "text-processing"
- ]
+ "category": ["extension"],
+ "tags": ["text-processing"]
},
{
"id": "Zshot",
@@ -195,9 +191,7 @@
"description": "Aim-spaCy helps to easily collect, store and explore training logs for spaCy, including: hyper-parameters, metrics and displaCy visualizations",
"github": "aimhubio/aim-spacy",
"pip": "aim-spacy",
- "code_example": [
- "https://github.com/aimhubio/aim-spacy/tree/master/examples"
- ],
+ "code_example": ["https://github.com/aimhubio/aim-spacy/tree/master/examples"],
"code_language": "python",
"url": "https://aimstack.io/spacy",
"thumb": "https://user-images.githubusercontent.com/13848158/172912427-ee9327ea-3cd8-47fa-8427-6c0d36cd831f.png",
@@ -266,7 +260,7 @@
"code_language": "python",
"author": "Peter Baumgartner",
"author_links": {
- "twitter" : "pmbaumgartner",
+ "twitter": "pmbaumgartner",
"github": "pmbaumgartner",
"website": "https://www.peterbaumgartner.com/"
},
@@ -286,7 +280,7 @@
"code_language": "python",
"author": "Explosion",
"author_links": {
- "twitter" : "explosion_ai",
+ "twitter": "explosion_ai",
"github": "explosion",
"website": "https://explosion.ai/"
},
@@ -748,7 +742,7 @@
"code_language": "python",
"author": "Keith Rozario",
"author_links": {
- "twitter" : "keithrozario",
+ "twitter": "keithrozario",
"github": "keithrozario",
"website": "https://www.keithrozario.com"
},
@@ -2664,10 +2658,7 @@
"github": "davidberenstein1957",
"website": "https://www.linkedin.com/in/david-berenstein-1bab11105/"
},
- "category": [
- "pipeline",
- "standalone"
- ],
+ "category": ["pipeline", "standalone"],
"tags": [
"classification",
"zero-shot",
@@ -2720,14 +2711,8 @@
"github": "davidberenstein1957",
"website": "https://www.linkedin.com/in/david-berenstein-1bab11105/"
},
- "category": [
- "pipeline"
- ],
- "tags": [
- "ner",
- "few-shot",
- "gensim"
- ],
+ "category": ["pipeline"],
+ "tags": ["ner", "few-shot", "gensim"],
"spacy_version": 3
},
{
@@ -2778,16 +2763,8 @@
"github": "davidberenstein1957",
"website": "https://www.linkedin.com/in/david-berenstein-1bab11105/"
},
- "category": [
- "pipeline",
- "standalone"
- ],
- "tags": [
- "coreference",
- "multi-lingual",
- "cross-lingual",
- "allennlp"
- ],
+ "category": ["pipeline", "standalone"],
+ "tags": ["coreference", "multi-lingual", "cross-lingual", "allennlp"],
"spacy_version": 3
},
{
@@ -2952,7 +2929,7 @@
"description": "Collection of NLP visualizations for NER and syntax tree markup. Similar to [displaCy](https://explosion.ai/demos/displacy) and [displaCy ENT](https://explosion.ai/demos/displacy-ent).",
"github": "natasha/ipymarkup",
"image": "https://github.com/natasha/ipymarkup/blob/master/table.png?raw=true",
- "pip":"pip install ipymarkup",
+ "pip": "pip install ipymarkup",
"code_example": [
"from ipymarkup import show_span_ascii_markup, show_dep_ascii_markup",
"",
@@ -3715,7 +3692,15 @@
"website": "https://brucewlee.github.io/"
},
"category": ["research", "scientific"],
- "tags": ["Readability", "Simplification", "Feature Extraction", "Syntax", "Discourse", "Semantics", "Lexical"]
+ "tags": [
+ "Readability",
+ "Simplification",
+ "Feature Extraction",
+ "Syntax",
+ "Discourse",
+ "Semantics",
+ "Lexical"
+ ]
},
{
"id": "hmrb",
@@ -4063,7 +4048,6 @@
"github": "yasufumy"
}
}
-
],
"categories": [
diff --git a/website/src/components/accordion.js b/website/src/components/accordion.js
index efe2477a3..00596326f 100644
--- a/website/src/components/accordion.js
+++ b/website/src/components/accordion.js
@@ -34,7 +34,7 @@ export default function Accordion({ title, id, expanded = false, spaced = false,
to={`#${id}`}
className={classes.anchor}
hidden
- onClick={event => event.stopPropagation()}
+ onClick={(event) => event.stopPropagation()}
>
¶
diff --git a/website/src/components/code.js b/website/src/components/code.js
index 6e9f0c22e..c4a3640f6 100644
--- a/website/src/components/code.js
+++ b/website/src/components/code.js
@@ -16,13 +16,13 @@ import classes from '../styles/code.module.sass'
const WRAP_THRESHOLD = 30
const CLI_GROUPS = ['init', 'debug', 'project', 'ray', 'huggingface-hub']
-export default props => (
+export default (props) => (
)
-export const Pre = props => {
+export const Pre = (props) => {
return