Update docs [ci skip]

This commit is contained in:
Ines Montani 2020-09-12 17:05:10 +02:00
parent eedaaaec75
commit 8b0dabe987
76 changed files with 722 additions and 790 deletions

View File

@ -609,7 +609,6 @@ In addition to the native markdown elements, you can use the components
├── docs # the actual markdown content ├── docs # the actual markdown content
├── meta # JSON-formatted site metadata ├── meta # JSON-formatted site metadata
| ├── languages.json # supported languages and statistical models | ├── languages.json # supported languages and statistical models
| ├── logos.json # logos and links for landing page
| ├── sidebars.json # sidebar navigations for different sections | ├── sidebars.json # sidebar navigations for different sections
| ├── site.json # general site metadata | ├── site.json # general site metadata
| └── universe.json # data for the spaCy universe section | └── universe.json # data for the spaCy universe section

View File

@ -38,7 +38,7 @@ how the component should be configured. You can override its settings via the
| `validate` | Whether patterns should be validated (passed to the `Matcher`). Defaults to `False`. ~~bool~~ | | `validate` | Whether patterns should be validated (passed to the `Matcher`). Defaults to `False`. ~~bool~~ |
```python ```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/attributeruler.py %%GITHUB_SPACY/spacy/pipeline/attributeruler.py
``` ```
## AttributeRuler.\_\_init\_\_ {#init tag="method"} ## AttributeRuler.\_\_init\_\_ {#init tag="method"}

View File

@ -230,12 +230,12 @@ $ python -m spacy convert [input_file] [output_dir] [--converter] [--file-type]
### Converters {#converters} ### Converters {#converters}
| ID | Description | | ID | Description |
| ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `auto` | Automatically pick converter based on file extension and file content (default). | | `auto` | Automatically pick converter based on file extension and file content (default). |
| `json` | JSON-formatted training data used in spaCy v2.x. | | `json` | JSON-formatted training data used in spaCy v2.x. |
| `conll` | Universal Dependencies `.conllu` or `.conll` format. | | `conll` | Universal Dependencies `.conllu` or `.conll` format. |
| `ner` | NER with IOB/IOB2 tags, one token per line with columns separated by whitespace. The first column is the token and the final column is the IOB tag. Sentences are separated by blank lines and documents are separated by the line `-DOCSTART- -X- O O`. Supports CoNLL 2003 NER format. See [sample data](https://github.com/explosion/spaCy/tree/master/examples/training/ner_example_data). | | `ner` | NER with IOB/IOB2 tags, one token per line with columns separated by whitespace. The first column is the token and the final column is the IOB tag. Sentences are separated by blank lines and documents are separated by the line `-DOCSTART- -X- O O`. Supports CoNLL 2003 NER format. See [sample data](%%GITHUB_SPACY/extra/example_data/ner_example_data). |
| `iob` | NER with IOB/IOB2 tags, one sentence per line with tokens separated by whitespace and annotation separated by `|`, either `word|B-ENT` or `word|POS|B-ENT`. See [sample data](https://github.com/explosion/spaCy/tree/master/examples/training/ner_example_data). | | `iob` | NER with IOB/IOB2 tags, one sentence per line with tokens separated by whitespace and annotation separated by `|`, either `word|B-ENT` or `word|POS|B-ENT`. See [sample data](%%GITHUB_SPACY/extra/example_data/ner_example_data). |
## debug {#debug new="3"} ## debug {#debug new="3"}
@ -358,37 +358,37 @@ File /path/to/spacy/ml/models/tok2vec.py (line 207)
Registry @loggers Registry @loggers
Name spacy.ConsoleLogger.v1 Name spacy.ConsoleLogger.v1
Module spacy.training.loggers Module spacy.training.loggers
File /path/to/spacy/gold/loggers.py (line 8) File /path/to/spacy/training/loggers.py (line 8)
[training.batcher] [training.batcher]
Registry @batchers Registry @batchers
Name spacy.batch_by_words.v1 Name spacy.batch_by_words.v1
Module spacy.training.batchers Module spacy.training.batchers
File /path/to/spacy/gold/batchers.py (line 49) File /path/to/spacy/training/batchers.py (line 49)
[training.batcher.size] [training.batcher.size]
Registry @schedules Registry @schedules
Name compounding.v1 Name compounding.v1
Module thinc.schedules Module thinc.schedules
File /Users/ines/Repos/explosion/thinc/thinc/schedules.py (line 43) File /path/to/thinc/thinc/schedules.py (line 43)
[training.dev_corpus] [training.dev_corpus]
Registry @readers Registry @readers
Name spacy.Corpus.v1 Name spacy.Corpus.v1
Module spacy.training.corpus Module spacy.training.corpus
File /path/to/spacy/gold/corpus.py (line 18) File /path/to/spacy/training/corpus.py (line 18)
[training.optimizer] [training.optimizer]
Registry @optimizers Registry @optimizers
Name Adam.v1 Name Adam.v1
Module thinc.optimizers Module thinc.optimizers
File /Users/ines/Repos/explosion/thinc/thinc/optimizers.py (line 58) File /path/to/thinc/thinc/optimizers.py (line 58)
[training.optimizer.learn_rate] [training.optimizer.learn_rate]
Registry @schedules Registry @schedules
Name warmup_linear.v1 Name warmup_linear.v1
Module thinc.schedules Module thinc.schedules
File /Users/ines/Repos/explosion/thinc/thinc/schedules.py (line 91) File /path/to/thinc/thinc/schedules.py (line 91)
[training.train_corpus] [training.train_corpus]
Registry @readers Registry @readers
Name spacy.Corpus.v1 Name spacy.Corpus.v1
Module spacy.training.corpus Module spacy.training.corpus
File /path/to/spacy/gold/corpus.py (line 18) File /path/to/spacy/training/corpus.py (line 18)
``` ```
</Accordion> </Accordion>

View File

@ -2,7 +2,7 @@
title: Corpus title: Corpus
teaser: An annotated corpus teaser: An annotated corpus
tag: class tag: class
source: spacy/gold/corpus.py source: spacy/training/corpus.py
new: 3 new: 3
--- ---
@ -42,7 +42,7 @@ streaming.
| `limit` | Limit corpus to a subset of examples, e.g. for debugging. Defaults to `0` for no limit. ~~int~~ | | `limit` | Limit corpus to a subset of examples, e.g. for debugging. Defaults to `0` for no limit. ~~int~~ |
```python ```python
https://github.com/explosion/spaCy/blob/develop/spacy/gold/corpus.py %%GITHUB_SPACY/spacy/training/corpus.py
``` ```
## Corpus.\_\_init\_\_ {#init tag="method"} ## Corpus.\_\_init\_\_ {#init tag="method"}

View File

@ -24,11 +24,11 @@ With Cython there are four ways of declaring complex data types. Unfortunately
we use all four in different places, as they all have different utility: we use all four in different places, as they all have different utility:
| Declaration | Description | Example | | Declaration | Description | Example |
| --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------- | | --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------- |
| `class` | A normal Python class. | [`Language`](/api/language) | | `class` | A normal Python class. | [`Language`](/api/language) |
| `cdef class` | A Python extension type. Differs from a normal Python class in that its attributes can be defined on the underlying struct. Can have C-level objects as attributes (notably structs and pointers), and can have methods which have C-level objects as arguments or return types. | [`Lexeme`](/api/cython-classes#lexeme) | | `cdef class` | A Python extension type. Differs from a normal Python class in that its attributes can be defined on the underlying struct. Can have C-level objects as attributes (notably structs and pointers), and can have methods which have C-level objects as arguments or return types. | [`Lexeme`](/api/cython-classes#lexeme) |
| `cdef struct` | A struct is just a collection of variables, sort of like a named tuple, except the memory is contiguous. Structs can't have methods, only attributes. | [`LexemeC`](/api/cython-structs#lexemec) | | `cdef struct` | A struct is just a collection of variables, sort of like a named tuple, except the memory is contiguous. Structs can't have methods, only attributes. | [`LexemeC`](/api/cython-structs#lexemec) |
| `cdef cppclass` | A C++ class. Like a struct, this can be allocated on the stack, but can have methods, a constructor and a destructor. Differs from `cdef class` in that it can be created and destroyed without acquiring the Python global interpreter lock. This style is the most obscure. | [`StateC`](https://github.com/explosion/spaCy/tree/master/spacy/syntax/_state.pxd) | | `cdef cppclass` | A C++ class. Like a struct, this can be allocated on the stack, but can have methods, a constructor and a destructor. Differs from `cdef class` in that it can be created and destroyed without acquiring the Python global interpreter lock. This style is the most obscure. | [`StateC`](%%GITHUB_SPACY/spacy/pipeline/_parser_internals/_state.pxd) |
The most important classes in spaCy are defined as `cdef class` objects. The The most important classes in spaCy are defined as `cdef class` objects. The
underlying data for these objects is usually gathered into a struct, which is underlying data for these objects is usually gathered into a struct, which is

View File

@ -37,7 +37,7 @@ recommended settings for your use case, check out the
> guide on [registered functions](/usage/training#config-functions) for details. > guide on [registered functions](/usage/training#config-functions) for details.
```ini ```ini
https://github.com/explosion/spaCy/blob/develop/spacy/default_config.cfg %%GITHUB_SPACY/spacy/default_config.cfg
``` ```
<Infobox title="Notes on data validation" emoji="💡"> <Infobox title="Notes on data validation" emoji="💡">
@ -45,8 +45,7 @@ https://github.com/explosion/spaCy/blob/develop/spacy/default_config.cfg
Under the hood, spaCy's configs are powered by our machine learning library Under the hood, spaCy's configs are powered by our machine learning library
[Thinc's config system](https://thinc.ai/docs/usage-config), which uses [Thinc's config system](https://thinc.ai/docs/usage-config), which uses
[`pydantic`](https://github.com/samuelcolvin/pydantic/) for data validation [`pydantic`](https://github.com/samuelcolvin/pydantic/) for data validation
based on type hints. See based on type hints. See [`spacy/schemas.py`](%%GITHUB_SPACY/spacy/schemas.py)
[`spacy/schemas.py`](https://github.com/explosion/spaCy/blob/develop/spacy/schemas.py)
for the schemas used to validate the default config. Arguments of registered for the schemas used to validate the default config. Arguments of registered
functions are validated against their type annotations, if available. To debug functions are validated against their type annotations, if available. To debug
your config and check that it's valid, you can run the your config and check that it's valid, you can run the
@ -456,7 +455,7 @@ lexical data.
Here's an example of the 20 most frequent lexemes in the English training data: Here's an example of the 20 most frequent lexemes in the English training data:
```json ```json
https://github.com/explosion/spaCy/tree/master/examples/training/vocab-data.jsonl %%GITHUB_SPACY / extra / example_data / vocab - data.jsonl
``` ```
## Pipeline meta {#meta} ## Pipeline meta {#meta}

View File

@ -57,7 +57,7 @@ architectures and their arguments and hyperparameters.
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [TransitionBasedParser](/api/architectures#TransitionBasedParser). ~~Model[List[Doc], List[Floats2d]]~~ | | `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [TransitionBasedParser](/api/architectures#TransitionBasedParser). ~~Model[List[Doc], List[Floats2d]]~~ |
```python ```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/dep_parser.pyx %%GITHUB_SPACY/spacy/pipeline/dep_parser.pyx
``` ```
## DependencyParser.\_\_init\_\_ {#init tag="method"} ## DependencyParser.\_\_init\_\_ {#init tag="method"}

View File

@ -50,7 +50,7 @@ architectures and their arguments and hyperparameters.
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ | | `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
```python ```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/entity_linker.py %%GITHUB_SPACY/spacy/pipeline/entity_linker.py
``` ```
## EntityLinker.\_\_init\_\_ {#init tag="method"} ## EntityLinker.\_\_init\_\_ {#init tag="method"}

View File

@ -48,7 +48,7 @@ architectures and their arguments and hyperparameters.
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [TransitionBasedParser](/api/architectures#TransitionBasedParser). ~~Model[List[Doc], List[Floats2d]]~~ | | `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [TransitionBasedParser](/api/architectures#TransitionBasedParser). ~~Model[List[Doc], List[Floats2d]]~~ |
```python ```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/ner.pyx %%GITHUB_SPACY/spacy/pipeline/ner.pyx
``` ```
## EntityRecognizer.\_\_init\_\_ {#init tag="method"} ## EntityRecognizer.\_\_init\_\_ {#init tag="method"}

View File

@ -42,7 +42,7 @@ how the component should be configured. You can override its settings via the
| `ent_id_sep` | Separator used internally for entity IDs. Defaults to `"||"`. ~~str~~ | | `ent_id_sep` | Separator used internally for entity IDs. Defaults to `"||"`. ~~str~~ |
```python ```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/entityruler.py %%GITHUB_SPACY/spacy/pipeline/entityruler.py
``` ```
## EntityRuler.\_\_init\_\_ {#init tag="method"} ## EntityRuler.\_\_init\_\_ {#init tag="method"}

View File

@ -2,7 +2,7 @@
title: Example title: Example
teaser: A training instance teaser: A training instance
tag: class tag: class
source: spacy/gold/example.pyx source: spacy/training/example.pyx
new: 3.0 new: 3.0
--- ---

View File

@ -945,10 +945,10 @@ available to the loaded object.
## Class attributes {#class-attributes} ## Class attributes {#class-attributes}
| Name | Description | | Name | Description |
| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `Defaults` | Settings, data and factory methods for creating the `nlp` object and processing pipeline. ~~Defaults~~ | | `Defaults` | Settings, data and factory methods for creating the `nlp` object and processing pipeline. ~~Defaults~~ |
| `lang` | Two-letter language ID, i.e. [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). ~~str~~ | | `lang` | Two-letter language ID, i.e. [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). ~~str~~ |
| `default_config` | Base [config](/usage/training#config) to use for [Language.config](/api/language#config). Defaults to [`default_config.cfg`](https://github.com/explosion/spaCy/tree/develop/spacy/default_config.cfg). ~~Config~~ | | `default_config` | Base [config](/usage/training#config) to use for [Language.config](/api/language#config). Defaults to [`default_config.cfg`](%%GITHUB_SPACY/spacy/default_config.cfg). ~~Config~~ |
## Defaults {#defaults} ## Defaults {#defaults}
@ -982,33 +982,16 @@ customize the default language data:
> ``` > ```
| Name | Description | | Name | Description |
| --------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | --------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `stop_words` | List of stop words, used for `Token.is_stop`.<br />**Example:** [`stop_words.py`][stop_words.py] ~~Set[str]~~ | | `stop_words` | List of stop words, used for `Token.is_stop`.<br />**Example:** [`stop_words.py`](%%GITHUB_SPACY/spacy/lang/en/stop_words.py) ~~Set[str]~~ |
| `tokenizer_exceptions` | Tokenizer exception rules, string mapped to list of token attributes.<br />**Example:** [`de/tokenizer_exceptions.py`][de/tokenizer_exceptions.py] ~~Dict[str, List[dict]]~~ | | `tokenizer_exceptions` | Tokenizer exception rules, string mapped to list of token attributes.<br />**Example:** [`de/tokenizer_exceptions.py`](%%GITHUB_SPACY/spacy/lang/de/tokenizer_exceptions.py) ~~Dict[str, List[dict]]~~ |
| `prefixes`, `suffixes`, `infixes` | Prefix, suffix and infix rules for the default tokenizer.<br />**Example:** [`puncutation.py`][punctuation.py] ~~Optional[List[Union[str, Pattern]]]~~ | | `prefixes`, `suffixes`, `infixes` | Prefix, suffix and infix rules for the default tokenizer.<br />**Example:** [`puncutation.py`](%%GITHUB_SPACY/spacy/lang/punctuation.py) ~~Optional[List[Union[str, Pattern]]]~~ |
| `token_match` | Optional regex for matching strings that should never be split, overriding the infix rules.<br />**Example:** [`fr/tokenizer_exceptions.py`][fr/tokenizer_exceptions.py] ~~Optional[Pattern]~~ | | `token_match` | Optional regex for matching strings that should never be split, overriding the infix rules.<br />**Example:** [`fr/tokenizer_exceptions.py`](%%GITHUB_SPACY/spacy/lang/fr/tokenizer_exceptions.py) ~~Optional[Pattern]~~ |
| `url_match` | Regular expression for matching URLs. Prefixes and suffixes are removed before applying the match.<br />**Example:** [`tokenizer_exceptions.py`][tokenizer_exceptions.py] ~~Optional[Pattern]~~ | | `url_match` | Regular expression for matching URLs. Prefixes and suffixes are removed before applying the match.<br />**Example:** [`tokenizer_exceptions.py`](%%GITHUB_SPACY/spacy/lang/tokenizer_exceptions.py) ~~Optional[Pattern]~~ |
| `lex_attr_getters` | Custom functions for setting lexical attributes on tokens, e.g. `like_num`.<br />**Example:** [`lex_attrs.py`][lex_attrs.py] ~~Dict[int, Callable[[str], Any]]~~ | | `lex_attr_getters` | Custom functions for setting lexical attributes on tokens, e.g. `like_num`.<br />**Example:** [`lex_attrs.py`](%%GITHUB_SPACY/spacy/lang/en/lex_attrs.py) ~~Dict[int, Callable[[str], Any]]~~ |
| `syntax_iterators` | Functions that compute views of a `Doc` object based on its syntax. At the moment, only used for [noun chunks](/usage/linguistic-features#noun-chunks).<br />**Example:** [`syntax_iterators.py`][syntax_iterators.py]. ~~Dict[str, Callable[[Union[Doc, Span]], Iterator[Span]]]~~ | | `syntax_iterators` | Functions that compute views of a `Doc` object based on its syntax. At the moment, only used for [noun chunks](/usage/linguistic-features#noun-chunks).<br />**Example:** [`syntax_iterators.py`](%%GITHUB_SPACY/spacy/lang/en/syntax_iterators.py). ~~Dict[str, Callable[[Union[Doc, Span]], Iterator[Span]]]~~ |
| `writing_system` | Information about the language's writing system, available via `Vocab.writing_system`. Defaults to: `{"direction": "ltr", "has_case": True, "has_letters": True}.`.<br />**Example:** [`zh/__init__.py`][zh/__init__.py] ~~Dict[str, Any]~~ | | `writing_system` | Information about the language's writing system, available via `Vocab.writing_system`. Defaults to: `{"direction": "ltr", "has_case": True, "has_letters": True}.`.<br />**Example:** [`zh/__init__.py`](%%GITHUB_SPACY/spacy/lang/zh/__init__.py) ~~Dict[str, Any]~~ |
| `config` | Default [config](/usage/training#config) added to `nlp.config`. This can include references to custom tokenizers or lemmatizers.<br />**Example:** [`zh/__init__.py`][zh/__init__.py] ~~Config~~ | | `config` | Default [config](/usage/training#config) added to `nlp.config`. This can include references to custom tokenizers or lemmatizers.<br />**Example:** [`zh/__init__.py`](%%GITHUB_SPACY/spacy/lang/zh/__init__.py) ~~Config~~ |
[stop_words.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/en/stop_words.py
[tokenizer_exceptions.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/tokenizer_exceptions.py
[de/tokenizer_exceptions.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/de/tokenizer_exceptions.py
[fr/tokenizer_exceptions.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/fr/tokenizer_exceptions.py
[punctuation.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py
[lex_attrs.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/en/lex_attrs.py
[syntax_iterators.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/en/syntax_iterators.py
[zh/__init__.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/zh/__init__.py
## Serialization fields {#serialization-fields} ## Serialization fields {#serialization-fields}

View File

@ -56,7 +56,7 @@ data formats used by the lookup and rule-based lemmatizers, see
| `model` | **Not yet implemented:** the model to use. ~~Model~~ | | `model` | **Not yet implemented:** the model to use. ~~Model~~ |
```python ```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/lemmatizer.py %%GITHUB_SPACY/spacy/pipeline/lemmatizer.py
``` ```
## Lemmatizer.\_\_init\_\_ {#init tag="method"} ## Lemmatizer.\_\_init\_\_ {#init tag="method"}

View File

@ -37,7 +37,7 @@ architectures and their arguments and hyperparameters.
| `model` | The model to use. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ | | `model` | The model to use. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
```python ```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/morphologizer.pyx %%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx
``` ```
## Morphologizer.\_\_init\_\_ {#init tag="method"} ## Morphologizer.\_\_init\_\_ {#init tag="method"}

View File

@ -22,7 +22,7 @@ for how to use the `Pipe` base class to implement custom components.
> inherit from `Pipe`. > inherit from `Pipe`.
```python ```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/pipe.pyx %%GITHUB_SPACY/spacy/pipeline/pipe.pyx
``` ```
## Pipe.\_\_init\_\_ {#init tag="method"} ## Pipe.\_\_init\_\_ {#init tag="method"}

View File

@ -34,7 +34,7 @@ architectures and their arguments and hyperparameters.
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ | | `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
```python ```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/senter.pyx %%GITHUB_SPACY/spacy/pipeline/senter.pyx
``` ```
## SentenceRecognizer.\_\_init\_\_ {#init tag="method"} ## SentenceRecognizer.\_\_init\_\_ {#init tag="method"}

View File

@ -33,7 +33,7 @@ how the component should be configured. You can override its settings via the
| `punct_chars` | Optional custom list of punctuation characters that mark sentence ends. See below for defaults if not set. Defaults to `None`. ~~Optional[List[str]]~~ | `None` | | `punct_chars` | Optional custom list of punctuation characters that mark sentence ends. See below for defaults if not set. Defaults to `None`. ~~Optional[List[str]]~~ | `None` |
```python ```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/sentencizer.pyx %%GITHUB_SPACY/spacy/pipeline/sentencizer.pyx
``` ```
## Sentencizer.\_\_init\_\_ {#init tag="method"} ## Sentencizer.\_\_init\_\_ {#init tag="method"}

View File

@ -34,7 +34,7 @@ architectures and their arguments and hyperparameters.
| `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ | | `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
```python ```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/tagger.pyx %%GITHUB_SPACY/spacy/pipeline/tagger.pyx
``` ```
## Tagger.\_\_init\_\_ {#init tag="method"} ## Tagger.\_\_init\_\_ {#init tag="method"}

View File

@ -41,7 +41,7 @@ architectures and their arguments and hyperparameters.
| `model` | A model instance that predicts scores for each category. Defaults to [TextCatEnsemble](/api/architectures#TextCatEnsemble). ~~Model[List[Doc], List[Floats2d]]~~ | | `model` | A model instance that predicts scores for each category. Defaults to [TextCatEnsemble](/api/architectures#TextCatEnsemble). ~~Model[List[Doc], List[Floats2d]]~~ |
```python ```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/textcat.py %%GITHUB_SPACY/spacy/pipeline/textcat.py
``` ```
## TextCategorizer.\_\_init\_\_ {#init tag="method"} ## TextCategorizer.\_\_init\_\_ {#init tag="method"}

View File

@ -45,7 +45,7 @@ architectures and their arguments and hyperparameters.
| `model` | The model to use. Defaults to [HashEmbedCNN](/api/architectures#HashEmbedCNN). ~~Model[List[Doc], List[Floats2d]~~ | | `model` | The model to use. Defaults to [HashEmbedCNN](/api/architectures#HashEmbedCNN). ~~Model[List[Doc], List[Floats2d]~~ |
```python ```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/tok2vec.py %%GITHUB_SPACY/spacy/pipeline/tok2vec.py
``` ```
## Tok2Vec.\_\_init\_\_ {#init tag="method"} ## Tok2Vec.\_\_init\_\_ {#init tag="method"}

View File

@ -105,8 +105,7 @@ your installation, installed pipelines and local setup from within spaCy.
### spacy.explain {#spacy.explain tag="function"} ### spacy.explain {#spacy.explain tag="function"}
Get a description for a given POS tag, dependency label or entity type. For a Get a description for a given POS tag, dependency label or entity type. For a
list of available terms, see list of available terms, see [`glossary.py`](%%GITHUB_SPACY/spacy/glossary.py).
[`glossary.py`](https://github.com/explosion/spaCy/tree/master/spacy/glossary.py).
> #### Example > #### Example
> >
@ -263,10 +262,10 @@ If a setting is not present in the options, the default value will be used.
> ``` > ```
| Name | Description | | Name | Description |
| --------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | --------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `ents` | Entity types to highlight or `None` for all types (default). ~~Optional[List[str]]~~ | | `ents` | Entity types to highlight or `None` for all types (default). ~~Optional[List[str]]~~ |
| `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ | | `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ |
| `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](https://github.com/explosion/spaCy/blob/master/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ | | `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](GITHUB_SPACY/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
By default, displaCy comes with colors for all entity types used by By default, displaCy comes with colors for all entity types used by
[spaCy's trained pipelines](/models). If you're using custom entity types, you [spaCy's trained pipelines](/models). If you're using custom entity types, you
@ -348,7 +347,7 @@ See the [`Transformer`](/api/transformer) API reference and
| [`span_getters`](/api/transformer#span_getters) | Registry for functions that take a batch of `Doc` objects and return a list of `Span` objects to process by the transformer, e.g. sentences. | | [`span_getters`](/api/transformer#span_getters) | Registry for functions that take a batch of `Doc` objects and return a list of `Span` objects to process by the transformer, e.g. sentences. |
| [`annotation_setters`](/api/transformer#annotation_setters) | Registry for functions that create annotation setters. Annotation setters are functions that take a batch of `Doc` objects and a [`FullTransformerBatch`](/api/transformer#fulltransformerbatch) and can set additional annotations on the `Doc`. | | [`annotation_setters`](/api/transformer#annotation_setters) | Registry for functions that create annotation setters. Annotation setters are functions that take a batch of `Doc` objects and a [`FullTransformerBatch`](/api/transformer#fulltransformerbatch) and can set additional annotations on the `Doc`. |
## Loggers {#loggers source="spacy/gold/loggers.py" new="3"} ## Loggers {#loggers source="spacy/training/loggers.py" new="3"}
A logger records the training results. When a logger is created, two functions A logger records the training results. When a logger is created, two functions
are returned: one for logging the information for each training step, and a are returned: one for logging the information for each training step, and a
@ -452,7 +451,7 @@ remain in the config file stored on your local system.
| `project_name` | The name of the project in the Weights & Biases interface. The project will be created automatically if it doesn't exist yet. ~~str~~ | | `project_name` | The name of the project in the Weights & Biases interface. The project will be created automatically if it doesn't exist yet. ~~str~~ |
| `remove_config_values` | A list of values to include from the config before it is uploaded to W&B (default: empty). ~~List[str]~~ | | `remove_config_values` | A list of values to include from the config before it is uploaded to W&B (default: empty). ~~List[str]~~ |
## Batchers {#batchers source="spacy/gold/batchers.py" new="3"} ## Batchers {#batchers source="spacy/training/batchers.py" new="3"}
A data batcher implements a batching strategy that essentially turns a stream of A data batcher implements a batching strategy that essentially turns a stream of
items into a stream of batches, with each batch consisting of one item or a list items into a stream of batches, with each batch consisting of one item or a list
@ -536,7 +535,7 @@ sequences in the batch.
| `discard_oversize` | Whether to discard sequences that are by themselves longer than the largest padded batch size. ~~bool~~ | | `discard_oversize` | Whether to discard sequences that are by themselves longer than the largest padded batch size. ~~bool~~ |
| `get_length` | Optional function that receives a sequence item and returns its length. Defaults to the built-in `len()` if not set. ~~Optional[Callable[[Any], int]]~~ | | `get_length` | Optional function that receives a sequence item and returns its length. Defaults to the built-in `len()` if not set. ~~Optional[Callable[[Any], int]]~~ |
## Training data and alignment {#gold source="spacy/gold"} ## Training data and alignment {#gold source="spacy/training"}
### training.biluo_tags_from_offsets {#biluo_tags_from_offsets tag="function"} ### training.biluo_tags_from_offsets {#biluo_tags_from_offsets tag="function"}
@ -616,12 +615,12 @@ token-based tags, e.g. to overwrite the `doc.ents`.
## Utility functions {#util source="spacy/util.py"} ## Utility functions {#util source="spacy/util.py"}
spaCy comes with a small collection of utility functions located in spaCy comes with a small collection of utility functions located in
[`spacy/util.py`](https://github.com/explosion/spaCy/tree/master/spacy/util.py). [`spacy/util.py`](%%GITHUB_SPACY/spacy/util.py). Because utility functions are
Because utility functions are mostly intended for **internal use within spaCy**, mostly intended for **internal use within spaCy**, their behavior may change
their behavior may change with future releases. The functions documented on this with future releases. The functions documented on this page should be safe to
page should be safe to use and we'll try to ensure backwards compatibility. use and we'll try to ensure backwards compatibility. However, we recommend
However, we recommend having additional tests in place if your application having additional tests in place if your application depends on any of spaCy's
depends on any of spaCy's utilities. utilities.
### util.get_lang_class {#util.get_lang_class tag="function"} ### util.get_lang_class {#util.get_lang_class tag="function"}
@ -833,8 +832,8 @@ Compile a sequence of prefix rules into a regex object.
> ``` > ```
| Name | Description | | Name | Description |
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
| `entries` | The prefix rules, e.g. [`lang.punctuation.TOKENIZER_PREFIXES`](https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ | | `entries` | The prefix rules, e.g. [`lang.punctuation.TOKENIZER_PREFIXES`](%%GITHUB_SPACY/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
| **RETURNS** | The regex object. to be used for [`Tokenizer.prefix_search`](/api/tokenizer#attributes). ~~Pattern~~ | | **RETURNS** | The regex object. to be used for [`Tokenizer.prefix_search`](/api/tokenizer#attributes). ~~Pattern~~ |
### util.compile_suffix_regex {#util.compile_suffix_regex tag="function"} ### util.compile_suffix_regex {#util.compile_suffix_regex tag="function"}
@ -850,8 +849,8 @@ Compile a sequence of suffix rules into a regex object.
> ``` > ```
| Name | Description | | Name | Description |
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
| `entries` | The suffix rules, e.g. [`lang.punctuation.TOKENIZER_SUFFIXES`](https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ | | `entries` | The suffix rules, e.g. [`lang.punctuation.TOKENIZER_SUFFIXES`](%%GITHUB_SPACY/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
| **RETURNS** | The regex object. to be used for [`Tokenizer.suffix_search`](/api/tokenizer#attributes). ~~Pattern~~ | | **RETURNS** | The regex object. to be used for [`Tokenizer.suffix_search`](/api/tokenizer#attributes). ~~Pattern~~ |
### util.compile_infix_regex {#util.compile_infix_regex tag="function"} ### util.compile_infix_regex {#util.compile_infix_regex tag="function"}
@ -867,8 +866,8 @@ Compile a sequence of infix rules into a regex object.
> ``` > ```
| Name | Description | | Name | Description |
| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ----------- | ----------------------------------------------------------------------------------------------------------------------------------------- |
| `entries` | The infix rules, e.g. [`lang.punctuation.TOKENIZER_INFIXES`](https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ | | `entries` | The infix rules, e.g. [`lang.punctuation.TOKENIZER_INFIXES`](%%GITHUB_SPACY/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
| **RETURNS** | The regex object. to be used for [`Tokenizer.infix_finditer`](/api/tokenizer#attributes). ~~Pattern~~ | | **RETURNS** | The regex object. to be used for [`Tokenizer.infix_finditer`](/api/tokenizer#attributes). ~~Pattern~~ |
### util.minibatch {#util.minibatch tag="function" new="2"} ### util.minibatch {#util.minibatch tag="function" new="2"}

View File

@ -31,7 +31,7 @@ supports all models that are available via the
Usually you will connect subsequent components to the shared transformer using Usually you will connect subsequent components to the shared transformer using
the [TransformerListener](/api/architectures#TransformerListener) layer. This the [TransformerListener](/api/architectures#TransformerListener) layer. This
works similarly to spaCy's [Tok2Vec](/api/tok2vec) component and works similarly to spaCy's [Tok2Vec](/api/tok2vec) component and
[Tok2VecListener](/api/architectures/Tok2VecListener) sublayer. [Tok2VecListener](/api/architectures/#Tok2VecListener) sublayer.
The component assigns the output of the transformer to the `Doc`'s extension The component assigns the output of the transformer to the `Doc`'s extension
attributes. We also calculate an alignment between the word-piece tokens and the attributes. We also calculate an alignment between the word-piece tokens and the

Binary file not shown.

After

Width:  |  Height:  |  Size: 281 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 99 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 151 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 146 KiB

View File

@ -2,9 +2,8 @@ Every language is different and usually full of **exceptions and special
cases**, especially amongst the most common words. Some of these exceptions are cases**, especially amongst the most common words. Some of these exceptions are
shared across languages, while others are **entirely specific** usually so shared across languages, while others are **entirely specific** usually so
specific that they need to be hard-coded. The specific that they need to be hard-coded. The
[`lang`](https://github.com/explosion/spaCy/tree/master/spacy/lang) module [`lang`](%%GITHUB_SPACY/spacy/lang) module contains all language-specific data,
contains all language-specific data, organized in simple Python files. This organized in simple Python files. This makes the data easy to update and extend.
makes the data easy to update and extend.
The **shared language data** in the directory root includes rules that can be The **shared language data** in the directory root includes rules that can be
generalized across languages for example, rules for basic punctuation, emoji, generalized across languages for example, rules for basic punctuation, emoji,
@ -23,27 +22,11 @@ values are defined in the [`Language.Defaults`](/api/language#defaults).
> ``` > ```
| Name | Description | | Name | Description |
| ----------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **Stop words**<br />[`stop_words.py`][stop_words.py] | List of most common words of a language that are often useful to filter out, for example "and" or "I". Matching tokens will return `True` for `is_stop`. | | **Stop words**<br />[`stop_words.py`](%%GITHUB_SPACY/spacy/lang/en/stop_words.py) | List of most common words of a language that are often useful to filter out, for example "and" or "I". Matching tokens will return `True` for `is_stop`. |
| **Tokenizer exceptions**<br />[`tokenizer_exceptions.py`][tokenizer_exceptions.py] | Special-case rules for the tokenizer, for example, contractions like "can't" and abbreviations with punctuation, like "U.K.". | | **Tokenizer exceptions**<br />[`tokenizer_exceptions.py`](%%GITHUB_SPACY/spacy/lang/de/tokenizer_exceptions.py) | Special-case rules for the tokenizer, for example, contractions like "can't" and abbreviations with punctuation, like "U.K.". |
| **Punctuation rules**<br />[`punctuation.py`][punctuation.py] | Regular expressions for splitting tokens, e.g. on punctuation or special characters like emoji. Includes rules for prefixes, suffixes and infixes. | | **Punctuation rules**<br />[`punctuation.py`](%%GITHUB_SPACY/spacy/lang/punctuation.py) | Regular expressions for splitting tokens, e.g. on punctuation or special characters like emoji. Includes rules for prefixes, suffixes and infixes. |
| **Character classes**<br />[`char_classes.py`][char_classes.py] | Character classes to be used in regular expressions, for example, Latin characters, quotes, hyphens or icons. | | **Character classes**<br />[`char_classes.py`](%%GITHUB_SPACY/spacy/lang/char_classes.py) | Character classes to be used in regular expressions, for example, Latin characters, quotes, hyphens or icons. |
| **Lexical attributes**<br />[`lex_attrs.py`][lex_attrs.py] | Custom functions for setting lexical attributes on tokens, e.g. `like_num`, which includes language-specific words like "ten" or "hundred". | | **Lexical attributes**<br />[`lex_attrs.py`](%%GITHUB_SPACY/spacy/lang/en/lex_attrs.py) | Custom functions for setting lexical attributes on tokens, e.g. `like_num`, which includes language-specific words like "ten" or "hundred". |
| **Syntax iterators**<br />[`syntax_iterators.py`][syntax_iterators.py] | Functions that compute views of a `Doc` object based on its syntax. At the moment, only used for [noun chunks](/usage/linguistic-features#noun-chunks). | | **Syntax iterators**<br />[`syntax_iterators.py`](%%GITHUB_SPACY/spacy/lang/en/syntax_iterators.py) | Functions that compute views of a `Doc` object based on its syntax. At the moment, only used for [noun chunks](/usage/linguistic-features#noun-chunks). |
| **Lemmatizer**<br />[`lemmatizer.py`][lemmatizer.py] [`spacy-lookups-data`][spacy-lookups-data] | Custom lemmatizer implementation and lemmatization tables. | | **Lemmatizer**<br />[`lemmatizer.py`](%%GITHUB_SPACY/master/spacy/lang/fr/lemmatizer.py) [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) | Custom lemmatizer implementation and lemmatization tables. |
[stop_words.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/en/stop_words.py
[tokenizer_exceptions.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/de/tokenizer_exceptions.py
[punctuation.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py
[char_classes.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/char_classes.py
[lex_attrs.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/en/lex_attrs.py
[syntax_iterators.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/en/syntax_iterators.py
[lemmatizer.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/fr/lemmatizer.py
[spacy-lookups-data]: https://github.com/explosion/spacy-lookups-data

View File

@ -1,10 +0,0 @@
import { Help } from 'components/typography'
| System | Year | Language | Accuracy | Speed (wps) |
| -------------- | ---- | --------------- | -------: | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| **spaCy v2.x** | 2017 | Python / Cython | **92.6** | _n/a_ <Help>This table shows speed as benchmarked by Choi et al. We therefore can't provide comparable figures, as we'd be running the benchmark on different hardware.</Help> |
| **spaCy v1.x** | 2015 | Python / Cython | 91.8 | 13,963 |
| ClearNLP | 2015 | Java | 91.7 | 10,271 |
| CoreNLP | 2015 | Java | 89.6 | 8,602 |
| MATE | 2015 | Java | 92.5 | 550 |
| Turbo | 2015 | C++ | 92.4 | 349 |

View File

@ -0,0 +1,44 @@
import { Help } from 'components/typography'; import Link from 'components/link'
<!-- TODO: update, add project template -->
<figure>
| System | Parser | Tagger | NER | WPS<br />CPU <Help>words per second on CPU, higher is better</Help> | WPS<br/>GPU <Help>words per second on GPU, higher is better</Help> |
| ------------------------------------------------------------------------- | ----------------: | ----------------: | ---: | ------------------------------------------------------------------: | -----------------------------------------------------------------: |
| [`en_core_web_trf`](/models/en#en_core_web_trf) (spaCy v3) | | | | | 6k |
| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | | | | | |
| `en_core_web_lg` (spaCy v2) | 91.9 | 97.2 | 85.9 | 10k | |
| [Stanza](https://stanfordnlp.github.io/stanza/) (StanfordNLP)<sup>1</sup> | _n/a_<sup>2</sup> | _n/a_<sup>2</sup> | 88.8 | 234 | 2k |
| <Link to="https://github.com/flairNLP/flair" hideIcon>Flair</Link> | - | 97.9 | 89.3 | | |
<figcaption class="caption">
**Accuracy and speed on the
[OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19) corpus.**<br />**1. **
[Qi et al. (2020)](https://arxiv.org/pdf/2003.07082.pdf). **2. ** _Coming soon_:
Qi et al. don't report parsing and tagging results on OntoNotes. We're working
on training Stanza on this corpus to allow direct comparison.
</figcaption>
</figure>
<figure>
| System | POS | USA | LAS |
| ------------------------------------------------------------------------------ | ---: | ---: | ---: |
| spaCy RoBERTa (2020) | | | |
| spaCy CNN (2020) | | | |
| [Mrini et al.](https://khalilmrini.github.io/Label_Attention_Layer.pdf) (2019) | 97.3 | 97.4 | 96.3 |
| [Zhou and Zhao](https://www.aclweb.org/anthology/P19-1230/) (2019) | 97.3 | 97.2 | 95.7 |
<figcaption class="caption">
**Accuracy on the Penn Treebank.** See
[NLP-progress](http://nlpprogress.com/english/dependency_parsing.html) for more
results.
</figcaption>
</figure>

View File

@ -579,12 +579,17 @@ def MyCustomVectors(
## Pretraining {#pretraining} ## Pretraining {#pretraining}
<Infobox title="This section is still under construction" emoji="🚧" variant="warning">
</Infobox>
<!--
- explain general concept and idea (short!) - explain general concept and idea (short!)
- present it as a separate lightweight mechanism for pretraining the tok2vec - present it as a separate lightweight mechanism for pretraining the tok2vec
layer layer
- advantages (could also be pros/cons table) - advantages (could also be pros/cons table)
- explain how it generates a separate file (!) and how it depends on the same - explain how it generates a separate file (!) and how it depends on the same
vectors vectors
-->
> #### Raw text format > #### Raw text format
> >

View File

@ -5,254 +5,55 @@ next: /usage/spacy-101
menu: menu:
- ['Feature Comparison', 'comparison'] - ['Feature Comparison', 'comparison']
- ['Benchmarks', 'benchmarks'] - ['Benchmarks', 'benchmarks']
# TODO: - ['Citing spaCy', 'citation']
--- ---
## Feature comparison {#comparison} ## Comparison {#comparison hidden="true"}
Here's a quick comparison of the functionalities offered by spaCy, ### When should I use spaCy? {#comparison-usage}
[NLTK](http://www.nltk.org/py-modindex.html) and
[CoreNLP](http://stanfordnlp.github.io/CoreNLP/).
| | spaCy | NLTK | CoreNLP | <!-- TODO: update -->
| ----------------------- | :----: | :----: | :-----------: |
| Programming language | Python | Python | Java / Python |
| Neural network models | ✅ | ❌ | ✅ |
| Integrated word vectors | ✅ | ❌ | ❌ |
| Multi-language support | ✅ | ✅ | ✅ |
| Tokenization | ✅ | ✅ | ✅ |
| Part-of-speech tagging | ✅ | ✅ | ✅ |
| Sentence segmentation | ✅ | ✅ | ✅ |
| Dependency parsing | ✅ | ❌ | ✅ |
| Entity recognition | ✅ | ✅ | ✅ |
| Entity linking | ✅ | ❌ | ❌ |
| Coreference resolution | ❌ | ❌ | ✅ |
### When should I use what? {#comparison-usage} | Use Cases |
| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
Natural Language Understanding is an active area of research and development, so | ✅ **I'm a beginner and just getting started with NLP.**<br />spaCy makes it easy to get started and comes with extensive documentation, including a beginner-friendly [101 guide](/usage/spacy-101) and a free interactive [online course](https://course.spacy.io). |
there are many different tools or technologies catering to different use-cases. | ✅ **I want to build an end-to-end production application.** |
The table below summarizes a few libraries (spaCy, | ✅ **I want my application to be efficient on CPU.**<br />While spaCy lets you train modern NLP models that are best run on GPU, it also offers CPU-optimized pipelines, which may be less accurate but much cheaper to run. |
[NLTK](http://www.nltk.org/py-modindex.html), [AllenNLP](https://allennlp.org/), | ✅ **I want to try out different neural network architectures for NLP.** |
[StanfordNLP](https://stanfordnlp.github.io/stanfordnlp/) and | ❌ **I want to build a language generation application.**<br />spaCy's focus is natural language _processing_ and extracting information from large volumes of text. While you can use it to help you re-write existing text, it doesn't include any specific functionality for language generation tasks. |
[TensorFlow](https://www.tensorflow.org/)) to help you get a feel for things fit | ❌ **I want to research machine learning algorithms.** |
together.
| | spaCy | NLTK | Allen-<br />NLP | Stanford-<br />NLP | Tensor-<br />Flow |
| ----------------------------------------------------------------- | :---: | :--: | :-------------: | :----------------: | :---------------: |
| I'm a beginner and just getting started with NLP. | ✅ | ✅ | ❌ | ✅ | ❌ |
| I want to build an end-to-end production application. | ✅ | ❌ | ❌ | ❌ | ✅ |
| I want to try out different neural network architectures for NLP. | ❌ | ❌ | ✅ | ❌ | ✅ |
| I want to try the latest models with state-of-the-art accuracy. | ❌ | ❌ | ✅ | ✅ | ✅ |
| I want to train models from my own data. | ✅ | ✅ | ✅ | ✅ | ✅ |
| I want my application to be efficient on CPU. | ✅ | ✅ | ❌ | ❌ | ❌ |
## Benchmarks {#benchmarks} ## Benchmarks {#benchmarks}
Two peer-reviewed papers in 2015 confirmed that spaCy offers the **fastest spaCy v3.0 introduces transformer-based pipelines that bring spaCy's accuracy
syntactic parser in the world** and that **its accuracy is within 1% of the right up to **current state-of-the-art**. You can also use a CPU-optimized
best** available. The few systems that are more accurate are 20× slower or more. pipeline, which is less accurate but much cheaper to run.
> #### About the evaluation <!-- TODO: -->
> #### Evaluation details
> >
> The first of the evaluations was published by **Yahoo! Labs** and **Emory > - **OntoNotes 5.0:** spaCy's English models are trained on this corpus, as
> University**, as part of a survey of current parsing technologies > it's several times larger than other English treebanks. However, most
> ([Choi et al., 2015](https://aclweb.org/anthology/P/P15/P15-1038.pdf)). Their > systems do not report accuracies on it.
> results and subsequent discussions helped us develop a novel > - **Penn Treebank:** The "classic" parsing evaluation for research. However,
> psychologically-motivated technique to improve spaCy's accuracy, which we > it's quite far removed from actual usage: it uses sentences with
> published in joint work with Macquarie University > gold-standard segmentation and tokenization, from a pretty specific type of
> ([Honnibal and Johnson, 2015](https://www.aclweb.org/anthology/D/D15/D15-1162.pdf)). > text (articles from a single newspaper, 1984-1989).
import BenchmarksChoi from 'usage/\_benchmarks-choi.md' import Benchmarks from 'usage/\_benchmarks-models.md'
<BenchmarksChoi /> <Benchmarks />
### Algorithm comparison {#algorithm} <!-- TODO: update -->
In this section, we compare spaCy's algorithms to recently published systems, <Project id="benchmarks/penn_treebank">
using some of the most popular benchmarks. These benchmarks are designed to help
isolate the contributions of specific algorithmic decisions, so they promote
slightly "idealized" conditions. Specifically, the text comes pre-processed with
"gold standard" token and sentence boundaries. The data sets also tend to be
fairly small, to help researchers iterate quickly. These conditions mean the
models trained on these data sets are not always useful for practical purposes.
#### Parse accuracy (Penn Treebank / Wall Street Journal) {#parse-accuracy-penn} The easiest way to reproduce spaCy's benchmarks on the Penn Treebank is to clone
our project template.
This is the "classic" evaluation, so it's the number parsing researchers are </Project>
most easily able to put in context. However, it's quite far removed from actual
usage: it uses sentences with gold-standard segmentation and tokenization, from
a pretty specific type of text (articles from a single newspaper, 1984-1989).
> #### Methodology <!-- ## Citing spaCy {#citation}
>
> [Andor et al. (2016)](http://arxiv.org/abs/1603.06042) chose slightly
> different experimental conditions from
> [Choi et al. (2015)](https://aclweb.org/anthology/P/P15/P15-1038.pdf), so the
> two accuracy tables here do not present directly comparable figures.
| System | Year | Type | Accuracy | <!-- TODO: update -->
| ------------------------------------------------------------ | ---- | ------ | --------: |
| spaCy v2.0.0 | 2017 | neural | 94.48 |
| spaCy v1.1.0 | 2016 | linear | 92.80 |
| [Dozat and Manning][dozat and manning] | 2017 | neural | **95.75** |
| [Andor et al.][andor et al.] | 2016 | neural | 94.44 |
| [SyntaxNet Parsey McParseface][syntaxnet parsey mcparseface] | 2016 | neural | 94.15 |
| [Weiss et al.][weiss et al.] | 2015 | neural | 93.91 |
| [Zhang and McDonald][zhang and mcdonald] | 2014 | linear | 93.32 |
| [Martins et al.][martins et al.] | 2013 | linear | 93.10 |
[dozat and manning]: https://arxiv.org/pdf/1611.01734.pdf
[andor et al.]: http://arxiv.org/abs/1603.06042
[syntaxnet parsey mcparseface]:
https://github.com/tensorflow/models/tree/master/research/syntaxnet
[weiss et al.]:
http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43800.pdf
[zhang and mcdonald]: http://research.google.com/pubs/archive/38148.pdf
[martins et al.]: http://www.cs.cmu.edu/~ark/TurboParser/
#### NER accuracy (OntoNotes 5, no pre-process) {#ner-accuracy-ontonotes5}
This is the evaluation we use to tune spaCy's parameters to decide which
algorithms are better than the others. It's reasonably close to actual usage,
because it requires the parses to be produced from raw text, without any
pre-processing.
| System | Year | Type | Accuracy |
| -------------------------------------------------- | ---- | ------ | --------: |
| spaCy [`en_core_web_lg`][en_core_web_lg] v2.0.0a3  | 2017 | neural | 85.85 |
| [Strubell et al.][strubell et al.]  | 2017 | neural | **86.81** |
| [Chiu and Nichols][chiu and nichols]  | 2016 | neural | 86.19 |
| [Durrett and Klein][durrett and klein]  | 2014 | neural | 84.04 |
| [Ratinov and Roth][ratinov and roth]  | 2009 | linear | 83.45 |
[en_core_web_lg]: /models/en#en_core_web_lg
[strubell et al.]: https://arxiv.org/pdf/1702.02098.pdf
[chiu and nichols]:
https://www.semanticscholar.org/paper/Named-Entity-Recognition-with-Bidirectional-LSTM-C-Chiu-Nichols/10a4db59e81d26b2e0e896d3186ef81b4458b93f
[durrett and klein]:
https://www.semanticscholar.org/paper/A-Joint-Model-for-Entity-Analysis-Coreference-Typi-Durrett-Klein/28eb033eee5f51c5e5389cbb6b777779203a6778
[ratinov and roth]: http://www.aclweb.org/anthology/W09-1119
### Model comparison {#spacy-models}
In this section, we provide benchmark accuracies for the pretrained model
pipelines we distribute with spaCy. Evaluations are conducted end-to-end from
raw text, with no "gold standard" pre-processing, over text from a mix of genres
where possible.
> #### Methodology
>
> The evaluation was conducted on raw text with no gold standard information.
> The parser, tagger and entity recognizer were trained on the
> [OntoNotes 5](https://www.gabormelli.com/RKB/OntoNotes_Corpus) corpus, the
> word vectors on [Common Crawl](http://commoncrawl.org).
#### English {#benchmarks-models-english}
| Model | spaCy | Type | UAS | NER F | POS | WPS | Size |
| --------------------------------------------------- | ----- | ------ | -------: | -------: | -------: | --------: | -------: |
| [`en_core_web_sm`](/models/en#en_core_web_sm) 2.0.0 | 2.x | neural | 91.7 | 85.3 | 97.0 | 10.1k | **35MB** |
| [`en_core_web_md`](/models/en#en_core_web_md) 2.0.0 | 2.x | neural | 91.7 | **85.9** | 97.1 | 10.0k | 115MB |
| [`en_core_web_lg`](/models/en#en_core_web_lg) 2.0.0 | 2.x | neural | **91.9** | **85.9** | **97.2** | 10.0k | 812MB |
| `en_core_web_sm` 1.2.0 | 1.x | linear | 86.6 | 78.5 | 96.6 | **25.7k** | 50MB |
| `en_core_web_md` 1.2.1 | 1.x | linear | 90.6 | 81.4 | 96.7 | 18.8k | 1GB |
#### Spanish {#benchmarks-models-spanish}
> #### Evaluation note
>
> The NER accuracy refers to the "silver standard" annotations in the WikiNER
> corpus. Accuracy on these annotations tends to be higher than correct human
> annotations.
| Model | spaCy | Type | UAS | NER F | POS | WPS | Size |
| ----------------------------------------------------- | ----- | ------ | -------: | -------: | -------: | ----: | -------: |
| [`es_core_news_sm`](/models/es#es_core_news_sm) 2.0.0 | 2.x | neural | 89.8 | 88.7 | **96.9** | _n/a_ | **35MB** |
| [`es_core_news_md`](/models/es#es_core_news_md) 2.0.0 | 2.x | neural | **90.2** | 89.0 | 97.8 | _n/a_ | 93MB |
| `es_core_web_md` 1.1.0 | 1.x | linear | 87.5 | **94.2** | 96.7 | _n/a_ | 377MB |
### Detailed speed comparison {#speed-comparison}
Here we compare the per-document processing time of various spaCy
functionalities against other NLP libraries. We show both absolute timings (in
ms) and relative performance (normalized to spaCy). Lower is better.
<Infobox title="Important note" variant="warning">
This evaluation was conducted in 2015. We're working on benchmarks on current
CPU and GPU hardware. In the meantime, we're grateful to the Stanford folks for
drawing our attention to what seems to be
[a long-standing error](https://nlp.stanford.edu/software/tokenizer.html#Speed)
in our CoreNLP benchmarks, especially for their tokenizer. Until we run
corrected experiments, we have updated the table using their figures.
</Infobox>
> #### Methodology
>
> - **Set up:** 100,000 plain-text documents were streamed from an SQLite3
> database, and processed with an NLP library, to one of three levels of
> detail — tokenization, tagging, or parsing. The tasks are additive: to parse
> the text you have to tokenize and tag it. The pre-processing was not
> subtracted from the times — we report the time required for the pipeline to
> complete. We report mean times per document, in milliseconds.
> - **Hardware**: Intel i7-3770 (2012)
> - **Implementation**:
> [`spacy-benchmarks`](https://github.com/explosion/spacy-benchmarks)
<Table>
<thead>
<Tr>
<Th></Th>
<Th colSpan="3">Absolute (ms per doc)</Th>
<Th colSpan="3">Relative (to spaCy)</Th>
</Tr>
<Tr>
<Th>System</Th>
<Th>Tokenize</Th>
<Th>Tag</Th>
<Th>Parse</Th>
<Th>Tokenize</Th>
<Th>Tag</Th>
<Th>Parse</Th>
</Tr>
</thead>
<tbody style="text-align: right">
<Tr>
<Td style="text-align: left"><strong>spaCy</strong></Td>
<Td>0.2ms</Td>
<Td>1ms</Td>
<Td>19ms</Td>
<Td>1x</Td>
<Td>1x</Td>
<Td>1x</Td>
</Tr>
<Tr>
<Td style="text-align: left">CoreNLP</Td>
<Td>0.18ms</Td>
<Td>10ms</Td>
<Td>49ms</Td>
<Td>0.9x</Td>
<Td>10x</Td>
<Td>2.6x</Td>
</Tr>
<Tr>
<Td style="text-align: left">ZPar</Td>
<Td>1ms</Td>
<Td>8ms</Td>
<Td>850ms</Td>
<Td>5x</Td>
<Td>8x</Td>
<Td>44.7x</Td>
</Tr>
<Tr>
<Td style="text-align: left">NLTK</Td>
<Td>4ms</Td>
<Td>443ms</Td>
<Td><em>n/a</em></Td>
<Td>20x</Td>
<Td>443x</Td>
<Td><em>n/a</em></Td>
</Tr>
</tbody>
</Table>

View File

@ -166,10 +166,9 @@ $ python setup.py build_ext --inplace # compile spaCy
``` ```
Compared to regular install via pip, the Compared to regular install via pip, the
[`requirements.txt`](https://github.com/explosion/spaCy/tree/master/requirements.txt) [`requirements.txt`](%%GITHUB_SPACY/requirements.txt) additionally installs
additionally installs developer dependencies such as Cython. See the developer dependencies such as Cython. See the [quickstart widget](#quickstart)
[quickstart widget](#quickstart) to get the right commands for your platform and to get the right commands for your platform and Python version.
Python version.
#### Ubuntu {#source-ubuntu} #### Ubuntu {#source-ubuntu}
@ -195,16 +194,14 @@ that matches the version that was used to compile your Python interpreter.
### Run tests {#run-tests} ### Run tests {#run-tests}
spaCy comes with an spaCy comes with an [extensive test suite](%%GITHUB_SPACY/spacy/tests). In order
[extensive test suite](https://github.com/explosion/spaCy/tree/master/spacy/tests). to run the tests, you'll usually want to clone the [repository](%%GITHUB_SPACY)
In order to run the tests, you'll usually want to clone the and [build spaCy from source](#source). This will also install the required
[repository](https://github.com/explosion/spaCy/tree/master/) and
[build spaCy from source](#source). This will also install the required
development dependencies and test utilities defined in the `requirements.txt`. development dependencies and test utilities defined in the `requirements.txt`.
Alternatively, you can find out where spaCy is installed and run `pytest` on Alternatively, you can find out where spaCy is installed and run `pytest` on
that directory. Don't forget to also install the test utilities via spaCy's that directory. Don't forget to also install the test utilities via spaCy's
[`requirements.txt`](https://github.com/explosion/spaCy/tree/master/requirements.txt): [`requirements.txt`](%%GITHUB_SPACY/requirements.txt):
```bash ```bash
$ python -c "import os; import spacy; print(os.path.dirname(spacy.__file__))" $ python -c "import os; import spacy; print(os.path.dirname(spacy.__file__))"

View File

@ -28,9 +28,9 @@ A **model architecture** is a function that wires up a
neural network that is run internally as part of a component in a spaCy neural network that is run internally as part of a component in a spaCy
pipeline. To define the actual architecture, you can implement your logic in pipeline. To define the actual architecture, you can implement your logic in
Thinc directly, or you can use Thinc as a thin wrapper around frameworks such as Thinc directly, or you can use Thinc as a thin wrapper around frameworks such as
PyTorch, TensorFlow and MXNet. Each Model can also be used as a sublayer of a PyTorch, TensorFlow and MXNet. Each `Model` can also be used as a sublayer of a
larger network, allowing you to freely combine implementations from different larger network, allowing you to freely combine implementations from different
frameworks into one `Thinc` Model. frameworks into a single model.
spaCy's built-in components require a `Model` instance to be passed to them via spaCy's built-in components require a `Model` instance to be passed to them via
the config system. To change the model architecture of an existing component, the config system. To change the model architecture of an existing component,
@ -264,9 +264,10 @@ larger network. This effectively means that you can easily wrap different
components from different frameworks, and "glue" them together with Thinc: components from different frameworks, and "glue" them together with Thinc:
```python ```python
from thinc.api import chain, with_array from thinc.api import chain, with_array, PyTorchWrapper
from spacy.ml import CharacterEmbed from spacy.ml import CharacterEmbed
wrapped_pt_model = PyTorchWrapper(torch_model)
char_embed = CharacterEmbed(width, embed_size, nM, nC) char_embed = CharacterEmbed(width, embed_size, nM, nC)
model = chain(char_embed, with_array(wrapped_pt_model)) model = chain(char_embed, with_array(wrapped_pt_model))
``` ```
@ -473,18 +474,17 @@ with Model.define_operators({">>": chain}):
## Create new trainable components {#components} ## Create new trainable components {#components}
<!-- TODO: <Infobox title="This section is still under construction" emoji="🚧" variant="warning">
</Infobox>
<!-- TODO:
- Interaction with `predict`, `get_loss` and `set_annotations` - Interaction with `predict`, `get_loss` and `set_annotations`
- Initialization life-cycle with `begin_training`, correlation with add_label - Initialization life-cycle with `begin_training`, correlation with add_label
Example: relation extraction component (implemented as project template) Example: relation extraction component (implemented as project template)
Avoid duplication with usage/processing-pipelines#trainable-components ? Avoid duplication with usage/processing-pipelines#trainable-components ?
--> -->
![Diagram of a pipeline component with its model](../images/layers-architectures.svg) <!-- ![Diagram of a pipeline component with its model](../images/layers-architectures.svg)
```python ```python
def update(self, examples): def update(self, examples):
@ -498,3 +498,4 @@ def __call__(self, doc):
predictions = self.model([doc]) predictions = self.model([doc])
self.set_annotations(predictions) self.set_annotations(predictions)
``` ```
-->

View File

@ -854,24 +854,22 @@ The algorithm can be summarized as follows:
</Accordion> </Accordion>
**Global** and **language-specific** tokenizer data is supplied via the language **Global** and **language-specific** tokenizer data is supplied via the language
data in data in [`spacy/lang`](%%GITHUB_SPACY/spacy/lang). The tokenizer exceptions
[`spacy/lang`](https://github.com/explosion/spaCy/tree/master/spacy/lang). The define special cases like "don't" in English, which needs to be split into two
tokenizer exceptions define special cases like "don't" in English, which needs tokens: `{ORTH: "do"}` and `{ORTH: "n't", NORM: "not"}`. The prefixes, suffixes
to be split into two tokens: `{ORTH: "do"}` and `{ORTH: "n't", NORM: "not"}`. and infixes mostly define punctuation rules for example, when to split off
The prefixes, suffixes and infixes mostly define punctuation rules for periods (at the end of a sentence), and when to leave tokens containing periods
example, when to split off periods (at the end of a sentence), and when to leave intact (abbreviations like "U.S.").
tokens containing periods intact (abbreviations like "U.S.").
<Accordion title="Should I change the language data or add custom tokenizer rules?" id="lang-data-vs-tokenizer"> <Accordion title="Should I change the language data or add custom tokenizer rules?" id="lang-data-vs-tokenizer">
Tokenization rules that are specific to one language, but can be **generalized Tokenization rules that are specific to one language, but can be **generalized
across that language** should ideally live in the language data in across that language** should ideally live in the language data in
[`spacy/lang`](https://github.com/explosion/spaCy/tree/master/spacy/lang)  we [`spacy/lang`](%%GITHUB_SPACY/spacy/lang)  we always appreciate pull requests!
always appreciate pull requests! Anything that's specific to a domain or text Anything that's specific to a domain or text type like financial trading
type like financial trading abbreviations, or Bavarian youth slang should be abbreviations, or Bavarian youth slang should be added as a special case rule
added as a special case rule to your tokenizer instance. If you're dealing with to your tokenizer instance. If you're dealing with a lot of customizations, it
a lot of customizations, it might make sense to create an entirely custom might make sense to create an entirely custom subclass.
subclass.
</Accordion> </Accordion>
@ -1059,7 +1057,7 @@ but also detailed regular expressions that take the surrounding context into
account. For example, there is a regular expression that treats a hyphen between account. For example, there is a regular expression that treats a hyphen between
letters as an infix. If you do not want the tokenizer to split on hyphens letters as an infix. If you do not want the tokenizer to split on hyphens
between letters, you can modify the existing infix definition from between letters, you can modify the existing infix definition from
[`lang/punctuation.py`](https://github.com/explosion/spaCy/blob/master/spacy/lang/punctuation.py): [`lang/punctuation.py`](%%GITHUB_SPACY/spacy/lang/punctuation.py):
```python ```python
### {executable="true"} ### {executable="true"}
@ -1096,10 +1094,10 @@ print([t.text for t in doc]) # ['mother-in-law']
``` ```
For an overview of the default regular expressions, see For an overview of the default regular expressions, see
[`lang/punctuation.py`](https://github.com/explosion/spaCy/blob/master/spacy/lang/punctuation.py) [`lang/punctuation.py`](%%GITHUB_SPACY/spacy/lang/punctuation.py) and
and language-specific definitions such as language-specific definitions such as
[`lang/de/punctuation.py`](https://github.com/explosion/spaCy/blob/master/spacy/lang/de/punctuation.py) [`lang/de/punctuation.py`](%%GITHUB_SPACY/spacy/lang/de/punctuation.py) for
for German. German.
### Hooking a custom tokenizer into the pipeline {#custom-tokenizer} ### Hooking a custom tokenizer into the pipeline {#custom-tokenizer}

View File

@ -76,7 +76,7 @@ spaCy also supports pipelines trained on more than one language. This is
especially useful for named entity recognition. The language ID used for especially useful for named entity recognition. The language ID used for
multi-language or language-neutral pipelines is `xx`. The language class, a multi-language or language-neutral pipelines is `xx`. The language class, a
generic subclass containing only the base language data, can be found in generic subclass containing only the base language data, can be found in
[`lang/xx`](https://github.com/explosion/spaCy/tree/master/spacy/lang/xx). [`lang/xx`](%%GITHUB_SPACY/spacy/lang/xx).
To train a pipeline using the neutral multi-language class, you can set To train a pipeline using the neutral multi-language class, you can set
`lang = "xx"` in your [training config](/usage/training#config). You can also `lang = "xx"` in your [training config](/usage/training#config). You can also

View File

@ -728,18 +728,21 @@ workflows, but only one can be tracked by DVC.
</Infobox> </Infobox>
<Project id="integrations/dvc"> <!-- TODO: <Project id="integrations/dvc">
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus interdum </Project> -->
sodales lectus, ut sodales orci ullamcorper id. Sed condimentum neque ut erat
mattis pretium.
</Project>
--- ---
### Prodigy {#prodigy} <IntegrationLogo name="prodigy" width={100} height="auto" align="right" /> ### Prodigy {#prodigy} <IntegrationLogo name="prodigy" width={100} height="auto" align="right" />
<Infobox title="This section is still under construction" emoji="🚧" variant="warning">
The Prodigy integration will require a nightly version of Prodigy that supports
spaCy v3+.
</Infobox>
[Prodigy](https://prodi.gy) is a modern annotation tool for creating training [Prodigy](https://prodi.gy) is a modern annotation tool for creating training
data for machine learning models, developed by us. It integrates with spaCy data for machine learning models, developed by us. It integrates with spaCy
out-of-the-box and provides many different out-of-the-box and provides many different
@ -795,9 +798,7 @@ results.
<Project id="integrations/prodigy"> <Project id="integrations/prodigy">
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus interdum <!-- TODO: -->
sodales lectus, ut sodales orci ullamcorper id. Sed condimentum neque ut erat
mattis pretium.
</Project> </Project>
@ -805,10 +806,6 @@ mattis pretium.
### Streamlit {#streamlit} <IntegrationLogo name="streamlit" width={150} height="auto" align="right" /> ### Streamlit {#streamlit} <IntegrationLogo name="streamlit" width={150} height="auto" align="right" />
<Grid cols={2} gutterBottom={false}>
<div>
[Streamlit](https://streamlit.io) is a Python framework for building interactive [Streamlit](https://streamlit.io) is a Python framework for building interactive
data apps. The [`spacy-streamlit`](https://github.com/explosion/spacy-streamlit) data apps. The [`spacy-streamlit`](https://github.com/explosion/spacy-streamlit)
package helps you integrate spaCy visualizations into your Streamlit apps and package helps you integrate spaCy visualizations into your Streamlit apps and
@ -817,16 +814,14 @@ full embedded visualizer, as well as individual components.
<!-- TODO: update once version is stable --> <!-- TODO: update once version is stable -->
```bash > #### Installation
$ pip install "spacy_streamlit>=1.0.0a0" >
``` > ```bash
> $ pip install "spacy_streamlit>=1.0.0a0"
</div> > ```
![](../images/spacy-streamlit.png) ![](../images/spacy-streamlit.png)
</Grid>
Using [`spacy-streamlit`](https://github.com/explosion/spacy-streamlit), your Using [`spacy-streamlit`](https://github.com/explosion/spacy-streamlit), your
projects can easily define their own scripts that spin up an interactive projects can easily define their own scripts that spin up an interactive
visualizer, using the latest pipeline you trained, or a selection of pipelines visualizer, using the latest pipeline you trained, or a selection of pipelines
@ -917,10 +912,43 @@ https://github.com/explosion/projects/blob/v3/integrations/fastapi/scripts/main.
### Ray {#ray} <IntegrationLogo name="ray" width={100} height="auto" align="right" /> ### Ray {#ray} <IntegrationLogo name="ray" width={100} height="auto" align="right" />
<Infobox title="This section is still under construction" emoji="🚧" variant="warning">
</Infobox>
<!-- TODO: document --> <!-- TODO: document -->
--- ---
### Weights & Biases {#wandb} <IntegrationLogo name="wandb" width={175} height="auto" align="right" /> ### Weights & Biases {#wandb} <IntegrationLogo name="wandb" width={175} height="auto" align="right" />
<!-- TODO: link to WandB logger, explain that it's built-in but that you can also do other cool stuff with WandB? And then include example project (still need to decide what we want to do here) --> [Weights & Biases](https://www.wandb.com/) is a popular platform for experiment
tracking. spaCy integrates with it out-of-the-box via the
[`WandbLogger`](/api/top-level#WandbLogger), which you can add as the
`[training.logger]` block of your training [config](/usage/training#config). The
results of each step are then logged in your project, together with the full
**training config**. This means that _every_ hyperparameter, registered function
name and argument will be tracked and you'll be able to see the impact it has on
your results.
> #### Example config
>
> ```ini
> [training.logger]
> @loggers = "spacy.WandbLogger.v1"
> project_name = "monitor_spacy_training"
> remove_config_values = ["paths.train", "paths.dev", "training.dev_corpus.path", "training.train_corpus.path"]
> ```
![Screenshot: Visualized training results](../images/wandb1.jpg)
![Screenshot: Parameter importance using config values](../images/wandb2.jpg 'Parameter importance using config values')
<Project id="integrations/wandb">
Get started with tracking your spaCy training runs in Weights & Biases using our
project template. It includes a simple config using the `WandbLogger`, as well
as a custom logger implementation you can adjust for your specific use case.
<!-- TODO: -->
</Project>

View File

@ -192,12 +192,11 @@ of [`Token`](/api/token). This means that all of the attributes that refer to
computed properties can't be accessed. computed properties can't be accessed.
The uppercase attribute names like `LOWER` or `IS_PUNCT` refer to symbols from The uppercase attribute names like `LOWER` or `IS_PUNCT` refer to symbols from
the the [`spacy.attrs`](%%GITHUB_SPACY/spacy/attrs.pyx) enum table. They're passed
[`spacy.attrs`](https://github.com/explosion/spaCy/tree/master/spacy/attrs.pyx) into a function that essentially is a big case/switch statement, to figure out
enum table. They're passed into a function that essentially is a big case/switch which struct field to return. The same attribute identifiers are used in
statement, to figure out which struct field to return. The same attribute [`Doc.to_array`](/api/doc#to_array), and a few other places in the code where
identifiers are used in [`Doc.to_array`](/api/doc#to_array), and a few other you need to describe fields like this.
places in the code where you need to describe fields like this.
</Accordion> </Accordion>

View File

@ -187,11 +187,11 @@ add to that data and saves and loads the data to and from a JSON file.
> >
> To see custom serialization methods in action, check out the new > To see custom serialization methods in action, check out the new
> [`EntityRuler`](/api/entityruler) component and its > [`EntityRuler`](/api/entityruler) component and its
> [source](https://github.com/explosion/spaCy/tree/master/spacy/pipeline/entityruler.py). > [source](%%GITHUB_SPACY/spacy/pipeline/entityruler.py). Patterns added to the
> Patterns added to the component will be saved to a `.jsonl` file if the > component will be saved to a `.jsonl` file if the pipeline is serialized to
> pipeline is serialized to disk, and to a bytestring if the pipeline is > disk, and to a bytestring if the pipeline is serialized to bytes. This allows
> serialized to bytes. This allows saving out a pipeline with a rule-based > saving out a pipeline with a rule-based entity recognizer and including all
> entity recognizer and including all rules _with_ the component data. > rules _with_ the component data.
```python ```python
### {highlight="14-18,20-25"} ### {highlight="14-18,20-25"}

View File

@ -494,7 +494,7 @@ regressions to the parts of the library that you care about the most.
**For more details on the types of contributions we're looking for, the code **For more details on the types of contributions we're looking for, the code
conventions and other useful tips, make sure to check out the conventions and other useful tips, make sure to check out the
[contributing guidelines](https://github.com/explosion/spaCy/tree/master/CONTRIBUTING.md).** [contributing guidelines](%%GITHUB_SPACY/CONTRIBUTING.md).**
<Infobox title="Code of Conduct" variant="warning"> <Infobox title="Code of Conduct" variant="warning">

View File

@ -59,7 +59,7 @@ specific use case. It's also available in spaCy as the
import QuickstartTraining from 'widgets/quickstart-training.js' import QuickstartTraining from 'widgets/quickstart-training.js'
<QuickstartTraining download="base_config.cfg" /> <QuickstartTraining />
After you've saved the starter config to a file `base_config.cfg`, you can use After you've saved the starter config to a file `base_config.cfg`, you can use
the [`init fill-config`](/api/cli#init-fill-config) command to fill in the the [`init fill-config`](/api/cli#init-fill-config) command to fill in the
@ -127,7 +127,7 @@ Some of the main advantages and features of spaCy's training config are:
config which types of data to expect. config which types of data to expect.
```ini ```ini
https://github.com/explosion/spaCy/blob/develop/spacy/default_config.cfg %%GITHUB_SPACY/spacy/default_config.cfg
``` ```
Under the hood, the config is parsed into a dictionary. It's divided into Under the hood, the config is parsed into a dictionary. It's divided into

View File

@ -76,9 +76,7 @@ noise contrastive estimation or reinforcement learning.
## New features {#features} ## New features {#features}
This section contains an overview of the most important **new features and This section contains an overview of the most important **new features and
improvements**. The [API docs](/api) include additional deprecation notes. New improvements**. The [API docs](/api) include additional deprecation notes.
methods and functions that were introduced in this version are marked with the
tag <Tag variant="new">2</Tag>.
### Convolutional neural network models {#features-models} ### Convolutional neural network models {#features-models}

View File

@ -8,20 +8,30 @@ menu:
- ['Migrating from v2.x', 'migrating'] - ['Migrating from v2.x', 'migrating']
--- ---
## Summary {#summary} ## Summary {#summary hidden="true"}
<Grid cols={2}> <Grid cols={2} gutterBottom={false}>
<div> <div>
spaCy v3.0 features all new **transformer-based pipelines** that bring spaCy's
accuracy right up to the current **state-of-the-art**. You can use any
pretrained transformer to train your own pipelines, and even share one
transformer between multiple components with **multi-task learning**. Training
is now fully configurable and extensible, and you can define your own custom
models using **PyTorch**, **TensorFlow** and other frameworks. The new spaCy
projects system lets you describe whole **end-to-end workflows** in a single
file, giving you an easy path from prototype to production, and making it easy
to clone and adapt best-practice projects for your own use cases.
</div> </div>
<Infobox title="Table of Contents" id="toc"> <Infobox title="Table of Contents" id="toc">
- [Summary](#summary) - [Summary](#summary)
- [New features](#features) - [New features](#features)
- [Training & config system](#features-training)
- [Transformer-based pipelines](#features-transformers) - [Transformer-based pipelines](#features-transformers)
- [Training & config system](#features-training)
- [Custom models](#features-custom-models) - [Custom models](#features-custom-models)
- [End-to-end project workflows](#features-projects) - [End-to-end project workflows](#features-projects)
- [New built-in components](#features-pipeline-components) - [New built-in components](#features-pipeline-components)
@ -39,47 +49,126 @@ menu:
## New Features {#features} ## New Features {#features}
### New training workflow and config system {#features-training} This section contains an overview of the most important **new features and
improvements**. The [API docs](/api) include additional deprecation notes. New
<Infobox title="Details & Documentation" emoji="📖" list> methods and functions that were introduced in this version are marked with the
tag <Tag variant="new">3</Tag>.
- **Usage:** [Training pipelines and models](/usage/training)
- **Thinc:** [Thinc's config system](https://thinc.ai/docs/usage-config),
[`Config`](https://thinc.ai/docs/api-config#config)
- **CLI:** [`train`](/api/cli#train), [`pretrain`](/api/cli#pretrain),
[`evaluate`](/api/cli#evaluate)
- **API:** [Config format](/api/data-formats#config),
[`registry`](/api/top-level#registry)
</Infobox>
### Transformer-based pipelines {#features-transformers} ### Transformer-based pipelines {#features-transformers}
> #### Example
>
> ```cli
> $ python -m spacy download en_core_web_trf
> ```
spaCy v3.0 features all new transformer-based pipelines that bring spaCy's
accuracy right up to the current **state-of-the-art**. You can use any
pretrained transformer to train your own pipelines, and even share one
transformer between multiple components with **multi-task learning**. spaCy's
transformer support interoperates with [PyTorch](https://pytorch.org) and the
[HuggingFace `transformers`](https://huggingface.co/transformers/) library,
giving you access to thousands of pretrained models for your pipelines.
![Pipeline components listening to shared embedding component](../images/tok2vec-listener.svg) ![Pipeline components listening to shared embedding component](../images/tok2vec-listener.svg)
import Benchmarks from 'usage/\_benchmarks-models.md'
<Benchmarks />
<Infobox title="Details & Documentation" emoji="📖" list> <Infobox title="Details & Documentation" emoji="📖" list>
- **Usage:** [Embeddings & Transformers](/usage/embeddings-transformers), - **Usage:** [Embeddings & Transformers](/usage/embeddings-transformers),
[Training pipelines and models](/usage/training) [Training pipelines and models](/usage/training),
[Benchmarks](/usage/facts-figures#benchmarks)
- **API:** [`Transformer`](/api/transformer), - **API:** [`Transformer`](/api/transformer),
[`TransformerData`](/api/transformer#transformerdata), [`TransformerData`](/api/transformer#transformerdata),
[`FullTransformerBatch`](/api/transformer#fulltransformerbatch) [`FullTransformerBatch`](/api/transformer#fulltransformerbatch)
- **Architectures: ** [TransformerModel](/api/architectures#TransformerModel), - **Architectures: ** [TransformerModel](/api/architectures#TransformerModel),
[TransformerListener](/api/architectures#TransformerListener), [TransformerListener](/api/architectures#TransformerListener),
[Tok2VecTransformer](/api/architectures#Tok2VecTransformer) [Tok2VecTransformer](/api/architectures#Tok2VecTransformer)
- **Trained Pipelines:** [`en_core_trf_lg_sm`](/models/en) - **Trained Pipelines:** [`en_core_web_trf`](/models/en#en_core_web_trf)
- **Implementation:** - **Implementation:**
[`spacy-transformers`](https://github.com/explosion/spacy-transformers) [`spacy-transformers`](https://github.com/explosion/spacy-transformers)
</Infobox> </Infobox>
### New training workflow and config system {#features-training}
> #### Example
>
> ```ini
> [training]
> vectors = null
> accumulate_gradient = 3
>
> [training.optimizer]
> @optimizers = "Adam.v1"
>
> [training.optimizer.learn_rate]
> @schedules = "warmup_linear.v1"
> warmup_steps = 250
> total_steps = 20000
> initial_rate = 0.01
> ```
spaCy v3.0 introduces a comprehensive and extensible system for **configuring
your training runs**. A single configuration file describes every detail of your
training run, with no hidden defaults, making it easy to rerun your experiments
and track changes. You can use the
[quickstart widget](/usage/training#quickstart) or the `init config` command to
get started. Instead of providing lots of arguments on the command line, you
only need to pass your `config.cfg` file to `spacy train`.
Training config files include all **settings and hyperparameters** for training
your pipeline. Some settings can also be registered **functions** that you can
swap out and customize, making it easy to implement your own custom models and
architectures.
<Infobox title="Details & Documentation" emoji="📖" list>
- **Usage:** [Training pipelines and models](/usage/training)
- **Thinc:** [Thinc's config system](https://thinc.ai/docs/usage-config),
[`Config`](https://thinc.ai/docs/api-config#config)
- **CLI:** [`init config`](/api/cli#init-config),
[`init fill-config`](/api/cli#init-fill-config), [`train`](/api/cli#train),
[`pretrain`](/api/cli#pretrain), [`evaluate`](/api/cli#evaluate)
- **API:** [Config format](/api/data-formats#config),
[`registry`](/api/top-level#registry)
</Infobox>
### Custom models using any framework {#features-custom-models} ### Custom models using any framework {#features-custom-models}
> #### Example
>
> ```python
> from torch import nn
> from thinc.api import PyTorchWrapper
>
> torch_model = nn.Sequential(
> nn.Linear(32, 32),
> nn.ReLU(),
> nn.Softmax(dim=1)
> )
> model = PyTorchWrapper(torch_model)
> ```
spaCy's new configuration system makes it easy to customize the neural network
models used by the different pipeline components. You can also implement your
own architectures via spaCy's machine learning library [Thinc](https://thinc.ai)
that provides various layers and utilities, as well as thin wrappers around
frameworks like **PyTorch**, **TensorFlow** and **MXNet**. Component models all
follow the same unified [`Model`](https://thinc.ai/docs/api-model) API and each
`Model` can also be used as a sublayer of a larger network, allowing you to
freely combine implementations from different frameworks into a single model.
<Infobox title="Details & Documentation" emoji="📖" list> <Infobox title="Details & Documentation" emoji="📖" list>
- **Usage: ** [Layers and architectures](/usage/layers-architectures) - **Usage: ** [Layers and architectures](/usage/layers-architectures)
- **Thinc: ** - **Thinc: **
[Wrapping PyTorch, TensorFlow & MXNet](https://thinc.ai/docs/usage-frameworks) [Wrapping PyTorch, TensorFlow & MXNet](https://thinc.ai/docs/usage-frameworks),
[`Model` API](https://thinc.ai/docs/api-model)
- **API:** [Model architectures](/api/architectures), [`Pipe`](/api/pipe) - **API:** [Model architectures](/api/architectures), [`Pipe`](/api/pipe)
</Infobox> </Infobox>
@ -159,8 +248,7 @@ add to your pipeline and customize for your use case:
- **Usage:** [Processing pipelines](/usage/processing-pipelines) - **Usage:** [Processing pipelines](/usage/processing-pipelines)
- **API:** [Built-in pipeline components](/api#architecture-pipeline) - **API:** [Built-in pipeline components](/api#architecture-pipeline)
- **Implementation:** - **Implementation:** [`spacy/pipeline`](%%GITHUB_SPACY/spacy/pipeline)
[`spacy/pipeline`](https://github.com/explosion/spaCy/tree/develop/spacy/pipeline)
</Infobox> </Infobox>
@ -197,15 +285,12 @@ aren't set.
[`@Language.factory`](/api/language#factory), [`@Language.factory`](/api/language#factory),
[`Language.add_pipe`](/api/language#add_pipe), [`Language.add_pipe`](/api/language#add_pipe),
[`Language.analyze_pipes`](/api/language#analyze_pipes) [`Language.analyze_pipes`](/api/language#analyze_pipes)
- **Implementation:** - **Implementation:** [`spacy/language.py`](%%GITHUB_SPACY/spacy/language.py)
[`spacy/language.py`](https://github.com/explosion/spaCy/tree/develop/spacy/language.py)
</Infobox> </Infobox>
### Dependency matching {#features-dep-matcher} ### Dependency matching {#features-dep-matcher}
<!-- TODO: improve summary -->
> #### Example > #### Example
> >
> ```python > ```python
@ -233,7 +318,7 @@ dictionaries**, with each dictionary describing a **token to match** and its
[Dependency matching](/usage/rule-based-matching#dependencymatcher), [Dependency matching](/usage/rule-based-matching#dependencymatcher),
- **API:** [`DependencyMatcher`](/api/dependencymatcher), - **API:** [`DependencyMatcher`](/api/dependencymatcher),
- **Implementation:** - **Implementation:**
[`spacy/matcher/dependencymatcher.pyx`](https://github.com/explosion/spaCy/tree/develop/spacy/matcher/dependencymatcher.pyx) [`spacy/matcher/dependencymatcher.pyx`](%%GITHUB_SPACY/spacy/matcher/dependencymatcher.pyx)
</Infobox> </Infobox>
@ -404,11 +489,12 @@ Note that spaCy v3.0 now requires **Python 3.6+**.
[`Pipe.begin_training`](/api/pipe#begin_training) now take a function that [`Pipe.begin_training`](/api/pipe#begin_training) now take a function that
returns a sequence of `Example` objects to initialize the model instead of a returns a sequence of `Example` objects to initialize the model instead of a
list of tuples. list of tuples.
- [`Matcher.add`](/api/matcher#add), - [`Matcher.add`](/api/matcher#add) and
[`PhraseMatcher.add`](/api/phrasematcher#add) and [`PhraseMatcher.add`](/api/phrasematcher#add) now only accept a list of
[`DependencyMatcher.add`](/api/dependencymatcher#add) now only accept a list patterns as the second argument (instead of a variable number of arguments).
of patterns as the second argument (instead of a variable number of The `on_match` callback becomes an optional keyword argument.
arguments). The `on_match` callback becomes an optional keyword argument. - The `spacy.gold` module has been renamed to
[`spacy.training`](%%GITHUB_SPACY/spacy/training).
- The `PRON_LEMMA` symbol and `-PRON-` as an indicator for pronoun lemmas has - The `PRON_LEMMA` symbol and `-PRON-` as an indicator for pronoun lemmas has
been removed. been removed.
- The `TAG_MAP` and `MORPH_RULES` in the language data have been replaced by the - The `TAG_MAP` and `MORPH_RULES` in the language data have been replaced by the
@ -779,6 +865,20 @@ python -m spacy package ./output ./packages
- python setup.py sdist - python setup.py sdist
``` ```
#### Data utilities and gold module {#migrating-gold}
The `spacy.gold` module has been renamed to `spacy.training`. This mostly
affects internals, but if you've been using the span offset conversion utilities
[`biluo_tags_from_offsets`](/api/top-level#biluo_tags_from_offsets),
[`offsets_from_biluo_tags`](/api/top-level#offsets_from_biluo_tags) or
[`spans_from_biluo_tags`](/api/top-level#spans_from_biluo_tags), you'll have to
change your imports:
```diff
- from spacy.gold import biluo_tags_from_offsets, spans_from_biluo_tags
+ from spacy.training import biluo_tags_from_offsets, spans_from_biluo_tags
```
#### Migration notes for plugin maintainers {#migrating-plugins} #### Migration notes for plugin maintainers {#migrating-plugins}
Thanks to everyone who's been contributing to the spaCy ecosystem by developing Thanks to everyone who's been contributing to the spaCy ecosystem by developing

View File

@ -8,7 +8,6 @@ const codeBlocksPlugin = require('./src/plugins/remark-code-blocks.js')
// Import metadata // Import metadata
const site = require('./meta/site.json') const site = require('./meta/site.json')
const logos = require('./meta/logos.json')
const sidebars = require('./meta/sidebars.json') const sidebars = require('./meta/sidebars.json')
const models = require('./meta/languages.json') const models = require('./meta/languages.json')
const universe = require('./meta/universe.json') const universe = require('./meta/universe.json')
@ -20,11 +19,16 @@ const favicon = isNightly ? `src/images/icon_nightly.png` : `src/images/icon.png
const binderBranch = isNightly ? 'nightly' : site.binderBranch const binderBranch = isNightly ? 'nightly' : site.binderBranch
const siteUrl = isNightly ? site.siteUrlNightly : site.siteUrl const siteUrl = isNightly ? site.siteUrlNightly : site.siteUrl
const domain = isNightly ? site.domainNightly : site.domain const domain = isNightly ? site.domainNightly : site.domain
const branch = isNightly ? 'develop' : 'master'
// Those variables are going to be replaced in the Markdown, e.g. %%GITHUB_SPACY
const replacements = {
GITHUB_SPACY: `https://github.com/explosion/spaCy/tree/${branch}`,
}
module.exports = { module.exports = {
siteMetadata: { siteMetadata: {
...site, ...site,
...logos,
sidebars, sidebars,
...models, ...models,
universe, universe,
@ -121,6 +125,13 @@ module.exports = {
{ {
resolve: `gatsby-remark-copy-linked-files`, resolve: `gatsby-remark-copy-linked-files`,
}, },
{
resolve: 'gatsby-remark-find-replace',
options: {
replacements,
prefix: '%%',
},
},
], ],
}, },
}, },

View File

@ -1,37 +0,0 @@
{
"logosUsers": [
{ "id": "airbnb", "url": "https://www.airbnb.com" },
{ "id": "uber", "url": "https://www.uber.com" },
{ "id": "quora", "url": "https://www.quora.com" },
{ "id": "retriever", "url": "https://www.retriever.no" },
{ "id": "stitchfix", "url": "https://www.stitchfix.com" },
{ "id": "chartbeat", "url": "https://chartbeat.com" },
{ "id": "allenai", "url": "https://allenai.org" }
],
"logosPublications": [
{
"id": "recode",
"url": "https://www.recode.net/2017/6/22/15855492/ai-artificial-intelligence-nonprofit-good-human-chatbots-machine-learning"
},
{
"id": "wapo",
"url": "https://www.washingtonpost.com/news/wonk/wp/2016/05/18/googles-new-artificial-intelligence-cant-understand-these-sentences-can-you/"
},
{
"id": "bbc",
"url": "http://www.bbc.co.uk/rd/blog/2017-08-irfs-weeknotes-number-250"
},
{
"id": "microsoft",
"url": "https://www.microsoft.com/developerblog/2016/09/13/training-a-classifier-for-relation-extraction-from-medical-literature/"
},
{
"id": "venturebeat",
"url": "https://venturebeat.com/2017/01/27/4-ai-startups-that-analyze-customer-reviews/"
},
{
"id": "thoughtworks",
"url": "https://www.thoughtworks.com/radar/tools"
}
]
}

View File

@ -28,7 +28,7 @@
}, },
"binderUrl": "explosion/spacy-io-binder", "binderUrl": "explosion/spacy-io-binder",
"binderBranch": "live", "binderBranch": "live",
"binderVersion": "2.3.0", "binderVersion": "3.0.0",
"sections": [ "sections": [
{ "id": "usage", "title": "Usage Documentation", "theme": "blue" }, { "id": "usage", "title": "Usage Documentation", "theme": "blue" },
{ "id": "models", "title": "Models Documentation", "theme": "blue" }, { "id": "models", "title": "Models Documentation", "theme": "blue" },
@ -47,20 +47,19 @@
"items": [ "items": [
{ "text": "Usage", "url": "/usage" }, { "text": "Usage", "url": "/usage" },
{ "text": "Models", "url": "/models" }, { "text": "Models", "url": "/models" },
{ "text": "API", "url": "/api" }, { "text": "API Reference", "url": "/api" },
{ "text": "Universe", "url": "/universe" } { "text": "Online Course", "url": "https://course.spacy.io" }
] ]
}, },
{ {
"label": "Support", "label": "Community",
"items": [ "items": [
{ "text": "Universe", "url": "/universe" },
{ "text": "Issue Tracker", "url": "https://github.com/explosion/spaCy/issues" }, { "text": "Issue Tracker", "url": "https://github.com/explosion/spaCy/issues" },
{ {
"text": "Stack Overflow", "text": "Stack Overflow",
"url": "http://stackoverflow.com/questions/tagged/spacy" "url": "http://stackoverflow.com/questions/tagged/spacy"
}, }
{ "text": "Reddit User Group", "url": "https://www.reddit.com/r/spacynlp/" },
{ "text": "Gitter Chat", "url": "https://gitter.im/explosion/spaCy" }
] ]
}, },
{ {

View File

@ -14238,6 +14238,46 @@
} }
} }
}, },
"gatsby-remark-find-replace": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/gatsby-remark-find-replace/-/gatsby-remark-find-replace-0.3.0.tgz",
"integrity": "sha512-tTXt+ZxD+7hEVtZVbZVrifcQUk2mt4uJNUHhc9cje+93sDa4PrrFBbny9IWgXLj9QH9xDxWOZrI768ApMtbPUQ==",
"requires": {
"escape-string-regexp": "^2.0.0",
"unist-util-visit": "^2.0.1"
},
"dependencies": {
"escape-string-regexp": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-2.0.0.tgz",
"integrity": "sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w=="
},
"unist-util-is": {
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-4.0.2.tgz",
"integrity": "sha512-Ofx8uf6haexJwI1gxWMGg6I/dLnF2yE+KibhD3/diOqY2TinLcqHXCV6OI5gFVn3xQqDH+u0M625pfKwIwgBKQ=="
},
"unist-util-visit": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-2.0.3.tgz",
"integrity": "sha512-iJ4/RczbJMkD0712mGktuGpm/U4By4FfDonL7N/9tATGIF4imikjOuagyMY53tnZq3NP6BcmlrHhEKAfGWjh7Q==",
"requires": {
"@types/unist": "^2.0.0",
"unist-util-is": "^4.0.0",
"unist-util-visit-parents": "^3.0.0"
}
},
"unist-util-visit-parents": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-3.1.0.tgz",
"integrity": "sha512-0g4wbluTF93npyPrp/ymd3tCDTMnP0yo2akFD2FIBAYXq/Sga3lwaU1D8OYKbtpioaI6CkDcQ6fsMnmtzt7htw==",
"requires": {
"@types/unist": "^2.0.0",
"unist-util-is": "^4.0.0"
}
}
}
},
"gatsby-remark-images": { "gatsby-remark-images": {
"version": "3.0.4", "version": "3.0.4",
"resolved": "https://registry.npmjs.org/gatsby-remark-images/-/gatsby-remark-images-3.0.4.tgz", "resolved": "https://registry.npmjs.org/gatsby-remark-images/-/gatsby-remark-images-3.0.4.tgz",
@ -22152,6 +22192,14 @@
"clipboard": "^2.0.0" "clipboard": "^2.0.0"
} }
}, },
"prismjs-bibtex": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/prismjs-bibtex/-/prismjs-bibtex-1.1.0.tgz",
"integrity": "sha512-IjZUJP3iTkV1DZ8qcjUF7p7Ji/LPns56jw+INUBPtnBaX4Q/VhtzlRGHM0lxSvdfqUvqgTGF3oM8aogWqzZz2g==",
"requires": {
"prismjs": "^1.15"
}
},
"private": { "private": {
"version": "0.1.8", "version": "0.1.8",
"resolved": "https://registry.npmjs.org/private/-/private-0.1.8.tgz", "resolved": "https://registry.npmjs.org/private/-/private-0.1.8.tgz",

View File

@ -31,6 +31,7 @@
"gatsby-plugin-sitemap": "^2.0.5", "gatsby-plugin-sitemap": "^2.0.5",
"gatsby-plugin-svgr": "^2.0.1", "gatsby-plugin-svgr": "^2.0.1",
"gatsby-remark-copy-linked-files": "^2.0.9", "gatsby-remark-copy-linked-files": "^2.0.9",
"gatsby-remark-find-replace": "^0.3.0",
"gatsby-remark-images": "^3.0.4", "gatsby-remark-images": "^3.0.4",
"gatsby-remark-prismjs": "^3.2.4", "gatsby-remark-prismjs": "^3.2.4",
"gatsby-remark-smartypants": "^2.0.8", "gatsby-remark-smartypants": "^2.0.8",
@ -44,6 +45,7 @@
"node-sass": "^4.11.0", "node-sass": "^4.11.0",
"parse-numeric-range": "0.0.2", "parse-numeric-range": "0.0.2",
"prismjs": "^1.15.0", "prismjs": "^1.15.0",
"prismjs-bibtex": "^1.1.0",
"prop-types": "^15.7.2", "prop-types": "^15.7.2",
"react": "^16.8.2", "react": "^16.8.2",
"react-dom": "^16.8.2", "react-dom": "^16.8.2",

View File

@ -2,6 +2,7 @@ import React, { Fragment } from 'react'
import PropTypes from 'prop-types' import PropTypes from 'prop-types'
import classNames from 'classnames' import classNames from 'classnames'
import highlightCode from 'gatsby-remark-prismjs/highlight-code.js' import highlightCode from 'gatsby-remark-prismjs/highlight-code.js'
import 'prismjs-bibtex'
import rangeParser from 'parse-numeric-range' import rangeParser from 'parse-numeric-range'
import { StaticQuery, graphql } from 'gatsby' import { StaticQuery, graphql } from 'gatsby'
import { window } from 'browser-monads' import { window } from 'browser-monads'

View File

@ -9,6 +9,7 @@ export default function Grid({
narrow = false, narrow = false,
gutterBottom = true, gutterBottom = true,
className, className,
style,
children, children,
}) { }) {
const gridClassNames = classNames(classes.root, className, { const gridClassNames = classNames(classes.root, className, {
@ -18,7 +19,11 @@ export default function Grid({
[classes.third]: cols === 3, [classes.third]: cols === 3,
[classes.quarter]: cols === 4, [classes.quarter]: cols === 4,
}) })
return <div className={gridClassNames}>{children}</div> return (
<div className={gridClassNames} style={style}>
{children}
</div>
)
} }
Grid.propTypes = { Grid.propTypes = {

View File

@ -23,7 +23,7 @@ export default function Infobox({
<aside className={infoboxClassNames} id={id}> <aside className={infoboxClassNames} id={id}>
{title && ( {title && (
<h4 className={classes.title}> <h4 className={classes.title}>
{variant !== 'default' && ( {variant !== 'default' && !emoji && (
<Icon width={18} name={variant} inline className={classes.icon} /> <Icon width={18} name={variant} inline className={classes.icon} />
)} )}
<span className={classes.titleText}> <span className={classes.titleText}>

View File

@ -1,19 +1,17 @@
import React, { Fragment } from 'react' import React from 'react'
import classNames from 'classnames' import classNames from 'classnames'
import pattern from '../images/pattern_blue.jpg' import pattern from '../images/pattern_blue.jpg'
import patternNightly from '../images/pattern_nightly.jpg' import patternNightly from '../images/pattern_nightly.jpg'
import patternOverlay from '../images/pattern_landing.jpg' import patternOverlay from '../images/pattern_landing.jpg'
import patternOverlayNightly from '../images/pattern_landing_nightly.jpg' import patternOverlayNightly from '../images/pattern_landing_nightly.jpg'
import logoSvgs from '../images/logos'
import Grid from './grid' import Grid from './grid'
import { Content } from './main' import { Content } from './main'
import Button from './button' import Button from './button'
import CodeBlock from './code' import CodeBlock from './code'
import { H1, H2, H3, Label, InlineList } from './typography' import { H1, H2, H3 } from './typography'
import Link from './link' import Link from './link'
import { chunkArray } from './util'
import classes from '../styles/landing.module.sass' import classes from '../styles/landing.module.sass'
export const LandingHeader = ({ nightly, style = {}, children }) => { export const LandingHeader = ({ nightly, style = {}, children }) => {
@ -39,9 +37,9 @@ export const LandingSubtitle = ({ children }) => (
</h2> </h2>
) )
export const LandingGrid = ({ cols = 3, blocks = false, children }) => ( export const LandingGrid = ({ cols = 3, blocks = false, style, children }) => (
<Content className={classNames(classes.grid, { [classes.blocks]: blocks })}> <Content className={classNames(classes.grid, { [classes.blocks]: blocks })}>
<Grid cols={cols} narrow={blocks}> <Grid cols={cols} narrow={blocks} style={style}>
{children} {children}
</Grid> </Grid>
</Content> </Content>
@ -142,33 +140,3 @@ export const LandingBannerButton = ({ to, small, children }) => (
</Button> </Button>
</div> </div>
) )
export const LandingLogos = ({ logos = [], title, maxRow = 4, children }) => {
const rows = chunkArray(logos, maxRow)
return (
<Content className={classes.logos}>
{title && <Label>{title}</Label>}
{rows.map((logos, i) => (
<Fragment key={i}>
<InlineList className={classes.logosContent}>
{logos.map(({ id, url }, j) => {
const Component = logoSvgs[id]
return !Component ? null : (
<Link
to={url}
key={j}
aria-label={id}
hidden
className={classes.logo}
>
<Component />
</Link>
)
})}
{i === rows.length - 1 && children}
</InlineList>
</Fragment>
))}
</Content>
)
}

View File

@ -1,7 +1,8 @@
import React from 'react' import React, { Fragment } from 'react'
import classNames from 'classnames' import classNames from 'classnames'
import Icon from './icon' import Icon from './icon'
import { Help } from './typography'
import { isString } from './util' import { isString } from './util'
import classes from '../styles/table.module.sass' import classes from '../styles/table.module.sass'
@ -16,14 +17,26 @@ function getCellContent(cellChildren) {
'✅': { name: 'yes', variant: 'success', 'aria-label': 'positive' }, '✅': { name: 'yes', variant: 'success', 'aria-label': 'positive' },
'❌': { name: 'no', variant: 'error', 'aria-label': 'negative' }, '❌': { name: 'no', variant: 'error', 'aria-label': 'negative' },
} }
const iconRe = new RegExp(`^(${Object.keys(icons).join('|')})`, 'g')
let children = isString(cellChildren) ? [cellChildren] : cellChildren let children = isString(cellChildren) ? [cellChildren] : cellChildren
if (Array.isArray(children)) { if (Array.isArray(children)) {
return children.map((child, i) => { return children.map((child, i) => {
if (isString(child)) { if (isString(child)) {
const icon = icons[child.trim()] const icon = icons[child.trim()]
const props = {
inline: i < children.length,
'aria-hidden': undefined,
}
if (icon) { if (icon) {
const props = { ...icon, inline: i < children.length, 'aria-hidden': undefined } return <Icon {...icon} {...props} key={i} />
return <Icon {...props} key={i} /> } else if (iconRe.test(child)) {
const [, iconName, text] = child.split(iconRe)
return (
<Fragment key={i}>
<Icon {...icons[iconName]} {...props} />
{text.trim()}
</Fragment>
)
} }
// Work around prettier auto-escape // Work around prettier auto-escape
if (child.startsWith('\\')) return child.slice(1) if (child.startsWith('\\')) return child.slice(1)
@ -66,7 +79,22 @@ export const Table = ({ fixed, className, ...props }) => {
return <table className={tableClassNames} {...props} /> return <table className={tableClassNames} {...props} />
} }
export const Th = props => <th className={classes.th} {...props} /> export const Th = ({ children, ...props }) => {
const isRotated = children && !isString(children) && children.type && children.type.name == 'Tx'
const thClassNames = classNames(classes.th, { [classes.thRotated]: isRotated })
return (
<th className={thClassNames} {...props}>
{children}
</th>
)
}
// Rotated head, child of Th
export const Tx = ({ children, ...props }) => (
<div className={classes.tx} {...props}>
<span>{children}</span>
</div>
)
export const Tr = ({ evenodd = true, children, ...props }) => { export const Tr = ({ evenodd = true, children, ...props }) => {
const foot = isFootRow(children) const foot = isFootRow(children)

View File

@ -64,8 +64,8 @@ export const InlineList = ({ Component = 'p', gutterBottom = true, className, ch
return <Component className={listClassNames}>{children}</Component> return <Component className={listClassNames}>{children}</Component>
} }
export const Help = ({ children, size = 16 }) => ( export const Help = ({ children, className, size = 16 }) => (
<span className={classes.help} data-tooltip={children}> <span className={classNames(classes.help, className)} data-tooltip={children}>
<Icon name="help2" width={size} /> <Icon name="help2" width={size} />
</span> </span>
) )

View File

@ -6,6 +6,8 @@ import siteMetadata from '../../meta/site.json'
const htmlToReactParser = new HtmlToReactParser() const htmlToReactParser = new HtmlToReactParser()
// TODO: update this
const DEFAULT_BRANCH = 'develop'
export const repo = siteMetadata.repo export const repo = siteMetadata.repo
export const modelsRepo = siteMetadata.modelsRepo export const modelsRepo = siteMetadata.modelsRepo
@ -21,7 +23,7 @@ export const headingTextClassName = 'heading-text'
* @param {string} [branch] - Optional branch. Defaults to master. * @param {string} [branch] - Optional branch. Defaults to master.
* @returns {string} - URL to the file on GitHub. * @returns {string} - URL to the file on GitHub.
*/ */
export function github(filepath, branch = 'master') { export function github(filepath, branch = DEFAULT_BRANCH) {
if (filepath && filepath.startsWith('github.com')) return `https://${filepath}` if (filepath && filepath.startsWith('github.com')) return `https://${filepath}`
const path = filepath ? '/tree/' + (branch || 'master') + '/' + filepath : '' const path = filepath ? '/tree/' + (branch || 'master') + '/' + filepath : ''
return `https://github.com/${repo}${path}` return `https://github.com/${repo}${path}`
@ -33,7 +35,7 @@ export function github(filepath, branch = 'master') {
* @param {boolean} [isIndex] - Whether the page is an index, e.g. /api/index.md * @param {boolean} [isIndex] - Whether the page is an index, e.g. /api/index.md
* @param {string} [branch] - Optional branch on GitHub. Defaults to master. * @param {string} [branch] - Optional branch on GitHub. Defaults to master.
*/ */
export function getCurrentSource(slug, isIndex = false, branch = 'master') { export function getCurrentSource(slug, isIndex = false, branch = DEFAULT_BRANCH) {
const ext = isIndex ? '/index.md' : '.md' const ext = isIndex ? '/index.md' : '.md'
return github(`website/docs${slug}${ext}`, branch) return github(`website/docs${slug}${ext}`, branch)
} }

View File

@ -1,3 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="150" height="45" viewBox="0 0 320 100">
<path fill="#FF5A5F" d="M168.7 25.1c0 3.6-2.9 6.5-6.5 6.5s-6.5-2.9-6.5-6.5 2.8-6.5 6.5-6.5c3.7.1 6.5 3 6.5 6.5zm-26.8 13.1v1.6s-3.1-4-9.7-4c-10.9 0-19.4 8.3-19.4 19.8 0 11.4 8.4 19.8 19.4 19.8 6.7 0 9.7-4.1 9.7-4.1V73c0 .8.6 1.4 1.4 1.4h8.1V36.8h-8.1c-.8 0-1.4.7-1.4 1.4zm0 24.1c-1.5 2.2-4.5 4.1-8.1 4.1-6.4 0-11.3-4-11.3-10.8s4.9-10.8 11.3-10.8c3.5 0 6.7 2 8.1 4.1v13.4zm15.5-25.5h9.6v37.6h-9.6V36.8zm143.4-1c-6.6 0-9.7 4-9.7 4V18.7h-9.6v55.7h8.1c.8 0 1.4-.7 1.4-1.4v-1.7s3.1 4.1 9.7 4.1c10.9 0 19.4-8.4 19.4-19.8s-8.5-19.8-19.3-19.8zm-1.6 30.5c-3.7 0-6.6-1.9-8.1-4.1V48.8c1.5-2 4.7-4.1 8.1-4.1 6.4 0 11.3 4 11.3 10.8s-4.9 10.8-11.3 10.8zm-22.7-14.2v22.4h-9.6V53.2c0-6.2-2-8.7-7.4-8.7-2.9 0-5.9 1.5-7.8 3.7v26.2h-9.6V36.8h7.6c.8 0 1.4.7 1.4 1.4v1.6c2.8-2.9 6.5-4 10.2-4 4.2 0 7.7 1.2 10.5 3.6 3.4 2.8 4.7 6.4 4.7 12.7zm-57.7-16.3c-6.6 0-9.7 4-9.7 4V18.7h-9.6v55.7h8.1c.8 0 1.4-.7 1.4-1.4v-1.7s3.1 4.1 9.7 4.1c10.9 0 19.4-8.4 19.4-19.8.1-11.4-8.4-19.8-19.3-19.8zm-1.6 30.5c-3.7 0-6.6-1.9-8.1-4.1V48.8c1.5-2 4.7-4.1 8.1-4.1 6.4 0 11.3 4 11.3 10.8s-4.9 10.8-11.3 10.8zm-26-30.5c2.9 0 4.4.5 4.4.5v8.9s-8-2.7-13 3v26.3H173V36.8h8.1c.8 0 1.4.7 1.4 1.4v1.6c1.8-2.1 5.7-4 8.7-4zM91.5 71c-.5-1.2-1-2.5-1.5-3.6-.8-1.8-1.6-3.5-2.3-5.1l-.1-.1C80.7 47.2 73.3 32 65.5 17l-.3-.6c-.8-1.5-1.6-3.1-2.4-4.7-1-1.8-2-3.7-3.6-5.5C56 2.2 51.4 0 46.5 0c-5 0-9.5 2.2-12.8 6-1.5 1.8-2.6 3.7-3.6 5.5-.8 1.6-1.6 3.2-2.4 4.7l-.3.6C19.7 31.8 12.2 47 5.3 62l-.1.2c-.7 1.6-1.5 3.3-2.3 5.1-.5 1.1-1 2.3-1.5 3.6C.1 74.6-.3 78.1.2 81.7c1.1 7.5 6.1 13.8 13 16.6 2.6 1.1 5.3 1.6 8.1 1.6.8 0 1.8-.1 2.6-.2 3.3-.4 6.7-1.5 10-3.4 4.1-2.3 8-5.6 12.4-10.4 4.4 4.8 8.4 8.1 12.4 10.4 3.3 1.9 6.7 3 10 3.4.8.1 1.8.2 2.6.2 2.8 0 5.6-.5 8.1-1.6 7-2.8 11.9-9.2 13-16.6.8-3.5.4-7-.9-10.7zm-45.1 5.2C41 69.4 37.5 63 36.3 57.6c-.5-2.3-.6-4.3-.3-6.1.2-1.6.8-3 1.6-4.2 1.9-2.7 5.1-4.4 8.8-4.4 3.7 0 7 1.6 8.8 4.4.8 1.2 1.4 2.6 1.6 4.2.3 1.8.2 3.9-.3 6.1-1.2 5.3-4.7 11.7-10.1 18.6zm39.9 4.7c-.7 5.2-4.2 9.7-9.1 11.7-2.4 1-5 1.3-7.6 1-2.5-.3-5-1.1-7.6-2.6-3.6-2-7.2-5.1-11.4-9.7 6.6-8.1 10.6-15.5 12.1-22.1.7-3.1.8-5.9.5-8.5-.4-2.5-1.3-4.8-2.7-6.8-3.1-4.5-8.3-7.1-14.1-7.1s-11 2.7-14.1 7.1c-1.4 2-2.3 4.3-2.7 6.8-.4 2.6-.3 5.5.5 8.5 1.5 6.6 5.6 14.1 12.1 22.2-4.1 4.6-7.8 7.7-11.4 9.7-2.6 1.5-5.1 2.3-7.6 2.6-2.7.3-5.3-.1-7.6-1-4.9-2-8.4-6.5-9.1-11.7-.3-2.5-.1-5 .9-7.8.3-1 .8-2 1.3-3.2.7-1.6 1.5-3.3 2.3-5l.1-.2c6.9-14.9 14.3-30.1 22-44.9l.3-.6c.8-1.5 1.6-3.1 2.4-4.6.8-1.6 1.7-3.1 2.8-4.4 2.1-2.4 4.9-3.7 8-3.7 3.1 0 5.9 1.3 8 3.7 1.1 1.3 2 2.8 2.8 4.4.8 1.5 1.6 3.1 2.4 4.6l.3.6c7.6 14.9 15 30.1 21.9 45v.1c.8 1.6 1.5 3.4 2.3 5 .5 1.2 1 2.2 1.3 3.2.8 2.6 1.1 5.1.7 7.7z"></path>
</svg>

Before

Width:  |  Height:  |  Size: 2.7 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 9.6 KiB

View File

@ -1,3 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="90" height="26" viewBox="0 0 1000 285">
<path fill="#111" d="M542.1 184.3c0 30.97-38.56 29.14-38.56 29.14H465v-56h38.54c39.7-.23 38.56 26.85 38.56 26.85M465 72.05h29.36c30.53 1.6 29.38 24.33 29.38 24.33 0 28.22-33.74 28.68-33.74 28.68h-25V72.06zm70 64.03s26.38-11.25 26.15-41.1c0 0 4-48.87-60.76-54.84h-71.9V245.1h82.4s68.84.24 68.84-57.83c0 0 1.6-39.47-44.75-51.18M348.95 0h302.6v285.17h-302.6V0zM193.14 184.3c0 30.97-38.56 29.14-38.56 29.14h-38.56v-56h38.56c39.7-.23 38.56 26.85 38.56 26.85M116.02 72.05h29.38c30.52 1.6 29.38 24.33 29.38 24.33 0 28.22-33.74 28.68-33.74 28.68h-25.02V72.06zm70 64.03s26.4-11.25 26.17-41.1c0 0 4-48.87-60.78-54.84h-71.9V245.1h82.4s68.86.24 68.86-57.83c0 0 1.6-39.47-44.76-51.18M0 0h302.6v285.17H0V0zM938.8 54.85v37.87S901.85 70 861 69.54c0 0-76.2-1.5-79.64 73.04 0 0-2.75 68.57 78.72 72.47 0 0 34.2 4.13 80.56-25.48v39.25s-62.2 36.95-134.26 8.5c0 0-60.6-22.15-62.9-94.74 0 0-2.52-74.65 78.27-99.43 0 0 21.58-8.26 60.36-4.6 0 0 23.2 2.3 56.7 16.3M697.93 285.17h302.6V0h-302.6v285.17z" />
</svg>

Before

Width:  |  Height:  |  Size: 1.0 KiB

View File

@ -1,6 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="180" height="25" viewBox="0 0 915.7 130.1">
<path fill="#51a4da" d="M157.8 8.6c-8.1 6.8-15.5 13-23 19.1-.8.7-2.2.7-3.4.7H23.3c-.8 0-1.6-.3-2.6-.4V8.6h137.1z" />
<path fill="#b3e4fa" d="M0 33.9c2-.1 4.1-.3 6.1-.3H129c-8.7 7.2-16.2 13.5-23.8 19.6-.8.6-2.3.7-3.4.7H2.4c-.8 0-1.7-.3-2.5-.5.1-6.5.1-13 .1-19.5z" />
<path fill="#5bc4bf" d="M36 79.2V59.4h63.2C91 66.3 83.6 72.5 76.1 78.7c-.5.4-1.3.5-1.9.5H36z" />
<path fill="#657d8c" d="M613.3 49.4c6.5-3.8 12.5-8.7 19.3-10.9 19.6-6.4 39.7 2.9 48 21.8 6.8 15.4 6.3 31-1.3 46-12.1 24.2-47.3 28-66 8-.1 2-.3 3.6-.3 5.2.1 4.3-2 6.4-6.2 6.3-4.2 0-6.2-2.2-6.2-6.4V8.2c0-4.3 2-6.1 6.3-6.1 4.4 0 6.2 2.1 6.2 6.3.2 13.9.2 27.7.2 41zm-.2 30.1c0 2.6-.1 5.1 0 7.7.5 19.7 19.2 33.3 38.2 27.7 13.9-4.1 22.5-18.7 21-35.8-1.2-13.7-6.6-24.9-20.5-29.7-9.8-3.4-19.3-1.6-27.7 4.4-8.7 6.3-13.1 14.7-11 25.7z M561.7 39h27.7c.3 8.4-2.3 11.2-9.9 11.2-5.3 0-10.6 0-15.9.1-.3 0-.6.2-1.3.3-.2 1-.5 2.1-.5 3.2v45.6c0 1.5.1 3.1.2 4.6 1.2 10.6 8.8 15.1 18.7 10.8 3.5-1.5 7-4.1 9.7.8 1.8 3.2-.7 6.9-5.8 9.4-16.6 8-34.1-1.7-34.8-20-.7-16-.2-32.1-.3-48.2v-6.1c-7.6 0-14.6-.6-21.5.2-7.7.9-13.5 5.7-16.1 12.8-2.4 6.6-3.9 13.8-4.4 20.8-.7 11.4-.1 22.9-.2 34.3-.1 5.5-2.7 7.7-7.6 7-4.5-.6-5.2-3.8-5.2-7.5V72.2 45.5c0-4.3 1.1-7.4 6.2-7.5 5.1-.1 6.4 3 6.4 7.3 0 3.1 0 6.1.4 9.9 9-17.9 25.3-17 41.9-16.2 0-5.1.1-9.5 0-13.9s1.6-6.8 6.2-6.7c4.8 0 6.2 2.7 6.1 6.9-.1 4.3 0 8.6 0 13.7z M267.1 127.1c-36.6 0-61.2-28.5-57.6-66.8 4.8-50.7 52.1-62.9 83-48.9 6.3 2.8 11.5 8.2 16.6 13.1 2.6 2.5 3.4 6.6-.4 9.5-3.6 2.8-6.5 1.2-9.3-1.8-12.7-13.6-28.3-17.6-45.8-12.3-17.9 5.4-27 18.9-30.1 36.7-2.4 13.4-.5 26.3 6.5 38.1 14.1 23.7 48.5 28.7 67.2 9.7 2-2 3.8-4.4 5.3-6.9 1.8-2.9 4.4-4 6.9-2.3 1.9 1.3 4 4.7 3.5 6.4-1.1 3.8-3.2 7.6-5.9 10.5-10.8 11-24.7 15-39.9 15z M704.6 85.7c1.5 13.7 7.5 23.6 20 28 13 4.5 25.6 3.4 36.1-6.9 5.2-5.1 7.6-5.9 10.5-3.1 2.8 2.8 1.9 7-2.4 11.7-.6.6-1.2 1.2-1.9 1.8-15.4 11.7-32.5 13.2-49.8 6-16.7-6.9-24.3-20.9-24.9-38.5-.9-24 11.8-42.2 33-46.7 28.6-6.1 50.8 11.2 50.4 43-.1 4.4-2.9 4.8-6.2 4.8h-58.9c-1.9-.1-3.7-.1-5.9-.1zm1.2-11.4h55.9c-.1-15.3-11-26.2-26-26.3-15.6-.2-29.6 12.1-29.9 26.3zM842.8 73.1c1.4-14-5.1-22.5-18.2-24.3-11.1-1.5-20.7 1.6-28 10.3-2.5 2.9-4.9 5.1-8.6 2.4-3.7-2.8-2.7-6.3-.5-9.5 1.5-2.2 3.3-4.5 5.4-6.1 14.4-10.7 30.2-12.1 46.5-5.1 9.5 4.1 15.2 12.3 15.6 22.5.9 18.9.7 37.9.8 56.9 0 3.9-2.3 5.9-6.3 5.8-4-.1-6-2-5.8-6.1.1-1.8 0-3.7 0-6.5-1.6 1.3-2.4 1.9-3.2 2.6-12.4 11.4-26.8 13.7-42.4 8.8-9.1-2.9-14.5-9.4-15.5-19.1-1-9.9 2.6-17.8 11.3-22.9 10.8-6.3 22.9-7.7 35-8.7 4.5-.5 9.1-.7 13.9-1zm-.2 9.7c-9.8 1.2-19.4 2.1-28.9 3.6-3.8.6-7.5 2.2-10.9 4-5.9 3.1-8.4 8.4-7.4 14.4 1 6.2 5.3 9.5 11 10.7 17.7 3.9 40.5-6.1 36.2-32.7z M338.5 50.2c.7-1.1 1.3-2.4 2.2-3.3 10.5-10.7 23.3-12.4 36.9-8.2 13.3 4.1 20 14.6 20.9 27.7 1.2 18 .8 36.2.9 54.3 0 4-2.5 5.4-6.1 5.2-3.9-.1-5.8-2.1-5.8-6.2.1-13.7.1-27.3 0-41 0-3.2-.2-6.5-.7-9.7-1.9-11.5-8.4-18.5-18.2-20-12.1-1.8-23.5 3.1-28.1 13.2-2.1 4.7-3.2 10.1-3.4 15.3-.5 13.7-.1 27.3-.2 41 0 6-3.2 8.7-8.8 7.1-1.8-.5-3.2-2.9-4.5-4.6-.5-.7-.2-2-.2-3V9.9c0-6.2 1.2-7.5 6.3-7.6 5.3-.1 7.1 1.4 7.1 6.9.1 11.8 0 23.6 0 35.4 0 1.6.1 3.3.2 4.9.7.2 1.1.5 1.5.7z M469 73.1c1.3-13.6-5.3-22.3-17.9-24.2-11.3-1.7-21 1.4-28.5 10.2-2.5 2.9-5 5.1-8.6 2.4-3.7-2.8-2.7-6.4-.5-9.6 6.2-9.2 15.4-13.3 25.9-14.6 5.2-.7 10.6-.7 15.8.1 16.6 2.7 26.4 14.3 26.5 31.3.2 16.6.1 33.1 0 49.7 0 5.6-1.6 7.5-6 7.5-5 0-6.4-3.1-6.1-7.5.1-1.4 0-2.7 0-4.8-1.3 1-2.3 1.5-3 2.2-12.1 11.4-26.4 13.7-41.8 9.1-9.8-2.9-15.5-9.9-16.2-20.2-.9-10.1 3.4-17.8 12.4-22.7 10.6-5.7 22.3-7.1 34.1-8.1 4.6-.3 9.2-.5 13.9-.8zm0 9.9c-8.8.9-17.4 1.5-25.9 2.9-4.8.8-9.6 2.4-14 4.6-6.3 3.1-8.8 8.6-7.7 14.7.9 5.3 5.2 9.5 11.7 10.7 18.7 3.1 39.3-7.4 35.9-32.9z M63.9 127.4c-5.1-1.2-8.2-3.2-9.7-7.3-1.7-4.6-.3-8.3 3.2-11.5C68 98.9 78.6 89.2 89.1 79.5c24.2-22.1 48.4-44.3 72.7-66.4.5-.5.9-1.2 1.5-1.3 2-.6 4.1-1 6.1-1.5-.6 2.1-.5 4.7-1.8 6.1-31.8 35.3-63.8 70.4-95.8 105.5-2 2.3-5.2 3.7-7.9 5.5z M873.9 49.4h-8.8c-3.2 0-5.1-2-4.4-4.9.5-2 2.3-4.5 4.2-5.3 2.4-.9 5.3-.2 9-.2 0-4.6-.1-8.8 0-12.9.1-5.9 1.7-7.6 6.5-7.7 5.3-.1 6.1 3.3 6.1 7.4v12.9h27.8c-.2 8.1-2.7 10.6-9.7 10.7h-18.3v12.9l.3 35.9c0 1.5 0 3.1.2 4.6.9 12 8.5 16.6 19.5 11.6 3.3-1.5 6.6-3.2 8.8 1.1 2.1 4-.9 6.4-3.8 8.4-14.4 9.7-34.8 1-36.3-16.2-1.3-14.2-.8-28.7-1-43-.1-4.8-.1-9.6-.1-15.3z" />
</svg>

Before

Width:  |  Height:  |  Size: 4.2 KiB

View File

@ -1,31 +0,0 @@
import { ReactComponent as AirbnbLogo } from './airbnb.svg'
import { ReactComponent as UberLogo } from './uber.svg'
import { ReactComponent as QuoraLogo } from './quora.svg'
import { ReactComponent as RetrieverLogo } from './retriever.svg'
import { ReactComponent as StitchfixLogo } from './stitchfix.svg'
import { ReactComponent as ChartbeatLogo } from './chartbeat.svg'
import { ReactComponent as AllenAILogo } from './allenai.svg'
import { ReactComponent as RecodeLogo } from './recode.svg'
import { ReactComponent as WapoLogo } from './wapo.svg'
import { ReactComponent as BBCLogo } from './bbc.svg'
import { ReactComponent as MicrosoftLogo } from './microsoft.svg'
import { ReactComponent as VenturebeatLogo } from './venturebeat.svg'
import { ReactComponent as ThoughtworksLogo } from './thoughtworks.svg'
export default {
airbnb: AirbnbLogo,
uber: UberLogo,
quora: QuoraLogo,
retriever: RetrieverLogo,
stitchfix: StitchfixLogo,
chartbeat: ChartbeatLogo,
allenai: AllenAILogo,
recode: RecodeLogo,
wapo: WapoLogo,
bbc: BBCLogo,
microsoft: MicrosoftLogo,
venturebeat: VenturebeatLogo,
thoughtworks: ThoughtworksLogo,
}

View File

@ -1,7 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="130" height="28" viewBox="0 0 609 130">
<path fill="#737373" d="M213.2 74.3l-3.6 10.2h-.3c-.6-2.3-1.7-5.8-3.5-10L186.5 26h-18.9v77.3h12.5V55.6c0-3 0-6.4-.1-10.6-.1-2.1-.3-3.7-.4-4.9h.3c.6 3 1.3 5.2 1.8 6.6l23.2 56.4h8.8l23-56.9c.5-1.3 1-3.9 1.5-6.1h.3c-.3 5.7-.5 10.8-.6 13.9v49h13.3V25.8H233l-19.8 48.5zm50.6-26.7h13V103h-13zm6.6-23.4c-2.2 0-4 .8-5.5 2.2-1.5 1.4-2.3 3.2-2.3 5.4 0 2.1.8 3.9 2.3 5.3 1.5 1.4 3.3 2.1 5.5 2.1s4.1-.8 5.5-2.1c1.5-1.4 2.3-3.2 2.3-5.3s-.8-3.9-2.3-5.4c-1.3-1.4-3.2-2.2-5.5-2.2m52.5 22.9c-2.4-.5-4.9-.8-7.3-.8-5.9 0-11.3 1.3-15.8 3.9-4.5 2.6-8.1 6.2-10.4 10.7-2.4 4.6-3.6 9.9-3.6 16 0 5.3 1.2 10 3.5 14.3 2.3 4.2 5.5 7.6 9.8 9.9 4.1 2.3 8.9 3.5 14.3 3.5 6.2 0 11.5-1.3 15.7-3.7l.1-.1v-12l-.5.4c-1.9 1.4-4.1 2.6-6.3 3.3-2.3.8-4.4 1.2-6.2 1.2-5.2 0-9.3-1.5-12.2-4.8-3-3.2-4.5-7.6-4.5-13.1 0-5.7 1.5-10.2 4.6-13.5 3.1-3.3 7.2-5 12.2-5 4.2 0 8.5 1.4 12.4 4.2l.5.4V49.2l-.1-.1c-1.7-.7-3.6-1.5-6.2-2m42.9-.4c-3.2 0-6.2 1-8.8 3.1-2.2 1.8-3.7 4.4-5 7.5h-.1v-9.7h-13V103h13V74.7c0-4.8 1-8.8 3.2-11.7 2.2-3 5-4.5 8.4-4.5 1.2 0 2.4.3 3.9.5 1.4.4 2.4.8 3.1 1.3l.5.4v-13l-.3-.1c-.9-.6-2.7-.9-4.9-.9m35.4-.3c-9.1 0-16.4 2.7-21.5 8-5.2 5.3-7.7 12.6-7.7 21.8 0 8.6 2.6 15.6 7.6 20.7 5 5 11.8 7.6 20.3 7.6 8.9 0 16-2.7 21.1-8.1 5.2-5.4 7.7-12.6 7.7-21.5 0-8.8-2.4-15.8-7.3-20.9-4.7-5.1-11.6-7.6-20.2-7.6M411.6 89c-2.4 3.1-6.2 4.6-10.9 4.6s-8.5-1.5-11.2-4.8c-2.7-3.1-4-7.6-4-13.3 0-5.9 1.4-10.4 4-13.6 2.7-3.2 6.4-4.8 11.1-4.8 4.6 0 8.2 1.5 10.8 4.6 2.6 3.1 4 7.6 4 13.5-.2 6-1.3 10.7-3.8 13.8m46.1-18.4c-4.1-1.7-6.7-3-7.9-4.1-1-1-1.5-2.4-1.5-4.2 0-1.5.6-3 2.1-4s3.2-1.5 5.7-1.5c2.2 0 4.5.4 6.7 1s4.2 1.5 5.8 2.7l.5.4V48.7l-.3-.1c-1.5-.6-3.5-1.2-5.9-1.7-2.4-.4-4.6-.6-6.4-.6-6.2 0-11.3 1.5-15.3 4.8-4 3.1-5.9 7.3-5.9 12.2 0 2.6.4 4.9 1.3 6.8.9 1.9 2.2 3.7 4 5.2 1.8 1.4 4.4 3 8 4.5 3 1.3 5.3 2.3 6.7 3.1 1.4.8 2.3 1.7 3 2.4.5.8.8 1.8.8 3.1 0 3.7-2.8 5.5-8.5 5.5-2.2 0-4.5-.4-7.2-1.3s-5.2-2.2-7.3-3.7l-.5-.4v12.7l.3.1c1.9.9 4.2 1.5 7 2.2 2.8.5 5.3.9 7.5.9 6.7 0 12.2-1.5 16.1-4.8 4-3.2 6.1-7.3 6.1-12.6 0-3.7-1-7-3.2-9.5-2.9-2.4-6.5-4.9-11.7-6.9m49.2-24.2c-9.1 0-16.4 2.7-21.5 8s-7.7 12.6-7.7 21.8c0 8.6 2.6 15.6 7.6 20.7 5 5 11.8 7.6 20.3 7.6 8.9 0 16-2.7 21.1-8.1 5.2-5.4 7.7-12.6 7.7-21.5 0-8.8-2.4-15.8-7.3-20.9-4.7-5.1-11.6-7.6-20.2-7.6M517.2 89c-2.4 3.1-6.2 4.6-10.9 4.6-4.8 0-8.5-1.5-11.2-4.8-2.7-3.1-4-7.6-4-13.3 0-5.9 1.4-10.4 4-13.6 2.7-3.2 6.4-4.8 11.1-4.8 4.5 0 8.2 1.5 10.8 4.6 2.6 3.1 4 7.6 4 13.5 0 6-1.3 10.7-3.8 13.8M603.9 58.3V47.6h-13.1V31.2l-.4.1L578 35l-.3.1v12.5h-19.6v-7c0-3.2.8-5.7 2.2-7.3s3.5-2.4 6.1-2.4c1.8 0 3.7.4 5.8 1.3l.5.3V21.2l-.3-.1c-1.8-.6-4.2-1-7.3-1-3.9 0-7.3.9-10.4 2.4-3.1 1.7-5.4 4-7.1 7.1-1.7 3-2.6 6.4-2.6 10.3v7.7h-9.1v10.6h9.1V103h13.1V58.3h19.6v28.5c0 11.7 5.5 17.6 16.5 17.6 1.8 0 3.7-.3 5.5-.6 1.9-.4 3.3-.9 4.1-1.3l.1-.1V91.7l-.5.4c-.8.5-1.5.9-2.7 1.2-1 .3-1.9.4-2.6.4-2.6 0-4.4-.6-5.7-2.1-1.2-1.4-1.8-3.7-1.8-7.1V58.3h13.3z" />
<path fill="#F25022" d="M0 0h61.3v61.3H0z" />
<path fill="#7FBA00" d="M67.7 0H129v61.3H67.7z" />
<path fill="#00A4EF" d="M0 67.7h61.3V129H0z" />
<path fill="#FFB900" d="M67.7 67.7H129V129H67.7z" />
</svg>

Before

Width:  |  Height:  |  Size: 3.1 KiB

View File

@ -1,3 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="120" height="34" viewBox="0 0 201 56">
<path fill="#b92b27" d="M29 43.62c-1.93-3.77-4.18-7.6-8.57-7.6-.84 0-1.68.15-2.45.5l-1.5-2.98c1.83-1.56 4.77-2.8 8.54-2.8 5.87 0 8.88 2.83 11.27 6.44 1.42-3.08 2.1-7.24 2.1-12.4 0-12.88-4.04-19.5-13.45-19.5-9.27 0-13.28 6.62-13.28 19.5 0 12.82 4 19.36 13.28 19.36 1.47 0 2.8-.16 4.04-.52zm2.3 4.5c-2.05.54-4.2.85-6.35.85C12.6 48.96.5 39.1.5 24.76.5 10.32 12.6.48 24.96.48c12.56 0 24.53 9.77 24.53 24.3 0 8.1-3.77 14.67-9.26 18.9 1.78 2.67 3.6 4.43 6.14 4.43 2.77 0 3.9-2.14 4.08-3.82h3.6c.22 2.24-.9 11.53-11 11.53-6.1 0-9.33-3.53-11.76-7.68zm26.12-12.3V19.27c0-1.9-.7-2.73-2.86-2.73h-2.3v-4.4H67.3v23.5c0 3.95 2.15 5.7 5.4 5.7 2.7 0 5.37-1.2 6.8-3.9V19.26c0-1.9-.7-2.73-2.85-2.73h-2.45v-4.4h15.2v24.6c0 2.45.92 3.57 3.72 3.57h.5v4.54L80 47v-4.67h-.28c-2.63 3.2-6.34 5.38-11.62 5.38-5.95 0-10.7-3-10.7-11.87m56 7.48c5.36 0 7.4-4.66 7.5-14.04.1-9.2-2.14-13.63-7.5-13.63-4.68 0-7.62 4.45-7.62 13.63 0 9.38 2.9 14.04 7.62 14.04zm0 4.4c-9.7 0-18.43-7.4-18.43-18.44 0-10.84 8.52-18.04 18.42-18.04 10.32 0 18.6 7.34 18.6 18.04 0 11.04-8.28 18.45-18.6 18.45zm18.9-.7v-4.4h1.47c3.62 0 3.97-1.04 3.97-4.2V19.27c0-1.9-.98-2.72-3.2-2.72h-1.97v-4.4h13.82l.7 7.2h.27c1.53-5.18 5.66-7.9 9.52-7.9 3.2 0 5.7 1.8 5.7 5.5 0 2.55-1.25 5.28-4.7 5.28-3.1 0-3.7-2.1-6.26-2.1-2.3 0-4.06 2.17-4.06 5.36V38.4c0 3.16.77 4.2 4.34 4.2h2.02V47h-21.64m46-5.12c4.4 0 6.2-4.17 6.2-8.36v-5.6c-3.2 3.34-10.68 3.46-10.68 9.4 0 2.9 1.72 4.56 4.47 4.56zm6.42-.02c-1.82 3.5-5.55 5.85-10.76 5.85-6.06 0-9.97-3.2-9.97-8.87 0-11.4 15.87-8.36 20.53-15.9v-.83c0-5.8-2.28-6.7-4.8-6.7-7.06 0-3.84 7.6-10.34 7.6-3.14 0-4.35-1.9-4.35-4.02 0-4.3 5.13-7.76 14.75-7.76 9.1 0 14.7 2.52 14.7 11.58v14.47c0 2.24.82 3.45 2.77 3.45.84 0 1.54-.23 2.08-.6l1.16 2.83c-.94 1.47-3.48 4.06-8.3 4.06-4.2 0-6.83-1.95-7.18-5.14h-.28z"></path>
</svg>

Before

Width:  |  Height:  |  Size: 1.8 KiB

View File

@ -1,3 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="100" height="24" viewBox="0 0 824 203">
<path fill="#EE2C24" d="M84.8 90V49c-18 0-30.7 7.6-38 22.8v-20H4.6v148.1h45v-72.4c0-27.9 5.9-37.4 34.1-37.4h1.1zM823.4 137c0-54.5-31.9-88-78.5-88-19.9 0-39.2 8.2-54 22.9-7.9 7.9-13.8 17.1-17.6 27.2V4.2h-44.5v63.9c-10-13-24.1-19.1-42.4-19.1-19.1 0-36.3 7-50.4 20.8-10.2 9.8-17.3 21.3-21 34.1-3.8-12.5-10.8-24.2-20.8-33.7-14.7-13.9-33-21.3-54.3-21.3-19.9 0-39.3 8.2-54 22.9C378 79.7 372 89 368.2 99.2c-12.8-32.6-38.6-50.2-73-50.2-15.5 0-29.3 4.2-41.5 12.5l-21.2 51.3c-8.1-40-36.9-63.8-76.1-63.8-19.9 0-39.2 8.2-54 22.9C87.9 86.3 80 105.1 80 125.5c0 20.7 7.4 39.2 21.5 54.2 15 15.8 33 23.4 55.3 23.4 16.7 0 31.2-4.1 43.3-12.1l14-34h-33.6c-6.5 5.7-13.6 8.2-22.3 8.2-18.8 0-30-8.2-33.8-23.7h96.2c2.9 14.2 9.5 27.3 19.9 38.2 15 15.8 32.9 23.4 55.2 23.4 33.5 0 60.4-18.5 72.6-49.8 3.6 9.7 9.2 18.5 16.6 26.3 15 15.8 33 23.5 55.4 23.5 22.1 0 40.7-7.9 55.7-22.9 9.1-9.1 15.4-19.5 18.8-31.4 4 14 11.8 26.3 23.3 36.6 13.2 11.8 29 17.7 47.3 17.7 18 0 31.5-4.8 44.7-17.7v14.4h43.1v-47c3.6 9.8 9.2 18.9 16.8 26.9 15 15.8 33 23.4 55.3 23.4 32.7 0 57-15.5 71.1-46.1H769c-6.5 5.7-13.6 8.2-22.3 8.2-18.8 0-30-8.2-33.8-23.7h110.4V137zm-700.3-25.9c3-13.9 15.8-23.2 33.8-23.2 17.2 0 29.4 8.2 34.6 23.2h-68.4zm201.5 36c-7.1 9.5-16.6 14.4-28.3 14.4-20.7 0-34.8-13.9-34.8-36.2 0-20.1 14.1-34.8 33.5-34.8 14.1 0 24.2 5.4 30.2 15.5H366c-1.7 6.3-2.5 12.8-2.5 19.6 0 7.4 1 14.6 2.9 21.5h-41.8zm116.1 16.1c-19.3 0-35.6-16.5-35.6-37 0-20.7 16-37.3 35.6-37.3s35.6 16.5 35.6 37.3c0 20.5-16 37-35.6 37m154.7 0c-22.7 0-39-16-39-37 0-20.2 16.3-37.3 37.3-37.3 20.7 0 36.7 16.3 36.7 37.8.1 20.8-15.9 36.5-35 36.5m116.3-52.1c3-13.9 15.8-23.2 33.8-23.2 17.2 0 29.4 8.2 34.6 23.2h-68.4z"></path>
</svg>

Before

Width:  |  Height:  |  Size: 1.7 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 6.9 KiB

View File

@ -1,3 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="150" height="18" viewBox="0 0 224.6 26.6">
<path fill="#2F3237" d="M9.7 11.7l-.3-.1c-4-1.1-5.9-1.9-5.9-5.1 0-2.7 1.6-4.2 4.3-4.2 0 0 5.7 0 7.3 4.7l.1.2H17V0h-1.8l-.7 2.6C12.8 1 10.6 0 8.1 0c-4.7 0-8 3-8 7.3 0 4.8 4 6.2 8.5 7.3 4.5 1.1 6.4 2 6.4 5.3 0 2.9-2.4 4.3-4.5 4.3-3.8 0-6.5-1.5-8.6-4.6l-.1-.2H0v7.2h1.8l.8-3.1c2.2 2.1 4.6 3.1 7.6 3.1 4.8 0 8.2-3.3 8.2-7.7 0-4.9-4-6-8.7-7.2 M22.1 5.5h1.7l.1-.5c.6-2.9.7-2.9 2.7-2.9h4.6v22.3h-2.5v2.1h8.7v-2.1H35V2.1h4.5c2.1 0 2.3 0 2.9 2.9l.1.5h1.7V0H22.1v5.5z M63.2 5.5h1.7L65 5c.6-2.9.7-2.9 2.7-2.9h4.6v22.3h-2.4v2.1h8.7v-2.1h-2.4V2.1h4.5c2.1 0 2.3 0 2.9 2.9l.1.5h1.7V0H63.2v5.5zM49.3 0v2.1h2.5v22.4h-2.5v2.1H58v-2.1h-2.4V2.1H58V0zM132.4 0v2.1h2.5V13h-12.4V2.1h2.4V0h-8.7v2.1h2.6v22.4h-2.5v2.1h8.7v-2.1h-2.5v-9.4h12.4v9.4h-2.5v2.1h8.7v-2.1h-2.4V2.1h2.4V0zM186.4 0v2.1h2.5v22.4h-2.5v2.1h8.7v-2.1h-2.4V2.1h2.4V0zM215.9 0v2.1h2l-5.6 8.1-5.4-8.1h2V0h-8.7v2.1h2.5l7.6 11.3-7.5 11.1h-2.7v2.1h8.7v-2.1H206l5.8-8.7 5.8 8.7h-2.2v2.1h8.7v-2.1h-2.4l-7.8-11.9 7.2-10.5h3.5V0z M161.4 2.1h2.6v22.3h-2.6v2.1h8.7v-2.1h-2.3V15h4c1.5 0 1.6.2 2.1 1.7l.1.2v.2h1.7v-6.6H174v.2c0 .2-.1.3-.2.4-.5 1.2-.7 1.8-2.1 1.8h-4V2.1h9.1c1.7 0 2.1.5 3.1 3v.2h2V0h-20.6l.1 2.1z M108.2 16.1c-.8 5.2-3.6 8.1-7.9 8.1-5.1 0-8.2-4.3-8.2-11.1 0-6.5 3.1-10.7 7.9-10.7 4.6 0 7 2.9 8.1 5.4l.1.2h1.8V0h-2l-.7 2.8C105.4.9 103 0 100.1 0c-6.8 0-12 5.7-12 13.3 0 3.9 1.2 7.2 3.3 9.6 2.1 2.4 5.1 3.6 8.4 3.6 6 0 9.8-3.7 10.6-10.4v-.3h-2.3v.3z"></path>
</svg>

Before

Width:  |  Height:  |  Size: 1.5 KiB

View File

@ -1,3 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="150" height="28" viewBox="35 20 150 25">
<path fill="#001e2b" d="M175.1 28.47a2 2 0 1 1 2 2 2 2 0 0 1-2-2m3.46 0A1.47 1.47 0 1 0 177.1 30a1.45 1.45 0 0 0 1.46-1.53m-.48 1.1h-.52l-.48-.92h-.36v.92h-.46V27.4h1.05c.57 0 .84.16.84.67 0 .4-.2.56-.58.58zm-.77-1.24c.26 0 .42-.05.42-.32 0-.3-.3-.26-.5-.26h-.5v.6zM49.68 24.5h-4.6v14.74h-3.56V24.5h-4.6v-3h12.76v3M54.4 27.8a6.14 6.14 0 0 1 3.8-1.6c1.4 0 3.36.84 3.36 3.98v9.05h-3.38v-7.68c0-1.02.08-2.64-1.38-2.64a3.5 3.5 0 0 0-2.4 1.4v8.94h-3.37V21.5h3.37v6.3M62.95 32.85c0-3.4 1.87-6.65 5.6-6.65s5.62 3.26 5.62 6.65c0 3.38-1.9 6.66-5.6 6.66s-5.62-3.27-5.62-6.65m7.76 0c0-1.6-.32-4.17-2.14-4.17s-2.14 2.58-2.14 4.17.32 4.17 2.14 4.17 2.14-2.57 2.14-4.17M82.8 37.92a6.16 6.16 0 0 1-3.8 1.6c-1.4 0-3.35-.86-3.35-4v-9.05h3.37v7.68c0 1.03-.06 2.65 1.4 2.65a3.5 3.5 0 0 0 2.4-1.4v-8.93h3.37v12.77h-3.4v-1.32M91.7 35.04a.87.87 0 0 0-.96.8c0 1.66 8.85-1.23 8.85 3.76 0 1.9-1.3 3.9-6.35 3.9-4.4 0-5.9-1.55-5.9-3.2a2.2 2.2 0 0 1 1.4-1.95 2.03 2.03 0 0 1-1-1.62 2.9 2.9 0 0 1 1.65-2.55 4.8 4.8 0 0 1-1.6-3.5c0-2.84 2.4-4.48 5.15-4.48a6.13 6.13 0 0 1 3.15.9 3.13 3.13 0 0 1 2.3-1.06 4.97 4.97 0 0 1 .98.1v2.36a3 3 0 0 0-1.07-.27 1.86 1.86 0 0 0-.9.27 5.1 5.1 0 0 1 .68 2.17c0 2.82-2.37 4.46-5.13 4.46l-1.25-.1m-1.07 4.23a.88.88 0 0 0-.53.8c0 1.18 2.4 1.25 3.2 1.25.57 0 3.53-.06 3.53-1.08 0-.66-.45-.6-2.46-.7zm2.32-6.36c1.42 0 1.83-1.16 1.83-2.37 0-1.17-.55-2.15-1.83-2.15-1.4 0-1.84 1.2-1.84 2.4 0 1.16.55 2.12 1.85 2.12M104.2 27.8a6.14 6.14 0 0 1 3.82-1.6c1.4 0 3.35.84 3.35 3.98v9.05H108v-7.68c0-1.02.07-2.64-1.4-2.64a3.5 3.5 0 0 0-2.4 1.4v8.94h-3.36V21.5h3.37v6.3M117.5 26.47h2.63v2.8h-2.62v6.5c0 .62.2 1.25 1.28 1.25a2.4 2.4 0 0 0 1.35-.47v2.57a7.83 7.83 0 0 1-2.17.4c-2.1 0-3.83-1.2-3.83-3.98v-6.27h-1.57v-2.8h1.57v-3.2h3.38v3.2M134.17 39.24h-1.82l-3-14.5h-.06l-3.18 14.5h-1.82l-3.72-17.74h1.78l2.92 14.32h.05l3.2-14.32h1.74l3.04 14.32h.05l3.12-14.32h1.75l-4.05 17.74M137.46 32.94c0-4.7 2.23-6.65 5.03-6.65s5.03 1.93 5.03 6.64-2.24 6.66-5.04 6.66-5.04-1.94-5.04-6.66m8.3 0c0-2.14-.53-5.15-3.27-5.15s-3.26 3-3.26 5.14.52 5.16 3.25 5.16 3.26-3 3.26-5.16M150.7 29.36h.05c.7-1.5 1.6-2.9 3.48-2.9h.7v1.63c-2.34-.33-3.55 2.18-4.23 4v7.14h-1.78v-12.6h1.78v2.72M158.13 32.5l4.68-5.85h2l-3.48 4.3 4 8.3h-1.92l-3.22-6.85-2.05 2.53v4.3h-1.78V21.5h1.78v11M173.02 29.46a3.26 3.26 0 0 0-2.88-1.67c-1.16 0-2.25.58-2.25 1.86 0 3.08 6.62 1.73 6.62 5.97a3.84 3.84 0 0 1-4.12 3.97 5.17 5.17 0 0 1-4.7-2.64l1.4-.9a3.5 3.5 0 0 0 3.3 2.04 2.2 2.2 0 0 0 2.46-2.14c0-3.17-6.63-1.66-6.63-6.18a3.56 3.56 0 0 1 3.85-3.5 4.83 4.83 0 0 1 4.2 2.2l-1.26.98"></path>
</svg>

Before

Width:  |  Height:  |  Size: 2.6 KiB

View File

@ -1,3 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="90" height="31" viewBox="0 0 927 322">
<path fill="#010202" d="M53.328 229.809c3.917 10.395 9.34 19.283 16.27 26.664 6.93 7.382 15.14 13.031 24.63 16.948 9.491 3.917 19.81 5.875 30.958 5.875 10.847 0 21.015-2.034 30.506-6.102s17.776-9.792 24.856-17.173c7.08-7.382 12.579-16.194 16.496-26.438 3.917-10.244 5.875-21.692 5.875-34.347V0h47.453v316.354h-47.001v-29.376c-10.545 11.147-22.974 19.734-37.285 25.761-14.312 6.025-29.752 9.038-46.323 9.038-16.873 0-32.615-2.938-47.228-8.813-14.612-5.875-27.267-14.235-37.962-25.082S15.441 264.006 9.265 248.79C3.088 233.575 0 216.628 0 197.947V0h47.453v195.236C47.453 207.891 49.411 219.414 53.328 229.809zM332.168 0v115.243c10.545-10.545 22.748-18.905 36.607-25.082s28.924-9.265 45.193-9.265c16.873 0 32.689 3.163 47.453 9.49 14.763 6.327 27.567 14.914 38.414 25.761s19.434 23.651 25.761 38.414c6.327 14.764 9.49 30.431 9.49 47.002 0 16.57-3.163 32.162-9.49 46.774-6.327 14.613-14.914 27.343-25.761 38.188-10.847 10.847-23.651 19.434-38.414 25.761-14.764 6.327-30.581 9.49-47.453 9.49-16.27 0-31.409-3.088-45.419-9.265-14.01-6.176-26.288-14.537-36.833-25.082v28.924h-45.193V0H332.168zM337.365 232.746c4.067 9.642 9.717 18.078 16.948 25.309 7.231 7.231 15.667 12.956 25.308 17.174 9.642 4.218 20.036 6.327 31.184 6.327 10.847 0 21.09-2.109 30.731-6.327s18.001-9.942 25.083-17.174c7.08-7.23 12.729-15.667 16.947-25.309 4.218-9.641 6.327-20.035 6.327-31.183 0-11.148-2.109-21.618-6.327-31.41s-9.867-18.303-16.947-25.534c-7.081-7.23-15.441-12.88-25.083-16.947s-19.885-6.102-30.731-6.102c-10.846 0-21.09 2.034-30.731 6.102s-18.077 9.717-25.309 16.947c-7.23 7.231-12.955 15.742-17.173 25.534-4.218 9.792-6.327 20.262-6.327 31.41C331.264 212.711 333.298 223.105 337.365 232.746zM560.842 155.014c6.025-14.462 14.312-27.191 24.856-38.188s23.049-19.659 37.511-25.986 30.129-9.49 47.001-9.49c16.571 0 31.937 3.013 46.098 9.038 14.16 6.026 26.362 14.387 36.606 25.083 10.244 10.695 18.229 23.35 23.952 37.962 5.725 14.613 8.587 30.506 8.587 47.68v14.914H597.901c1.507 9.34 4.52 18.002 9.039 25.985 4.52 7.984 10.168 14.914 16.947 20.789 6.779 5.876 14.462 10.471 23.049 13.784 8.587 3.314 17.7 4.972 27.342 4.972 27.418 0 49.563-11.299 66.435-33.896l32.991 24.404c-11.449 15.366-25.609 27.418-42.481 36.155-16.873 8.737-35.854 13.106-56.944 13.106-17.174 0-33.217-3.014-48.131-9.039s-27.869-14.462-38.866-25.309-19.659-23.576-25.986-38.188-9.491-30.506-9.491-47.679C551.803 184.842 554.817 169.476 560.842 155.014zM624.339 137.162c-12.805 10.696-21.316 24.932-25.534 42.708h140.552c-3.917-17.776-12.278-32.012-25.083-42.708-12.805-10.695-27.794-16.043-44.967-16.043C652.133 121.119 637.144 126.467 624.339 137.162zM870.866 142.359c-9.641 10.545-14.462 24.856-14.462 42.934v131.062h-45.646V85.868h45.193v28.472c5.725-9.34 13.182-16.722 22.371-22.145 9.189-5.424 20.111-8.136 32.766-8.136h15.817v42.482h-18.981C892.86 126.542 880.507 131.814 870.866 142.359z"/>
</svg>

Before

Width:  |  Height:  |  Size: 2.9 KiB

View File

@ -1,4 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="150" height="19" viewBox="0 0 1743 222.2">
<path d="M208 0v44.4c-3.5 0-6.5.4-9.4-.1-4.1-.8-5.5.9-6.6 4.5-13.9 45-28 89.9-42 134.8-3.2 10.3-6.3 20.7-9.8 30.9-.5 1.4-2.5 3.3-3.8 3.3-22.5.2-45 .1-67.8.1-.5-1.4-1.1-2.7-1.6-4.1-17.4-55-34.8-110.1-52-165.1-1.2-3.7-2.7-5.1-6.7-4.5-2.6.5-5.5-.1-8.3-.2V0h94v44.3H74.9c10.5 41.1 20.9 81.7 31.3 122.3.3 0 .6.1 1 .1 11.2-40.6 22.4-81.3 33.8-122.5h-18.9V0H208z M356 58.3h63.2c.6 7.4 1.2 14.7 1.9 22.2 3.8-4.4 7-8.9 11-12.4 17.9-15.4 38.5-18.4 60.2-10.4 16.4 6.1 23.4 19.6 23.7 36.5.4 24.1.2 48.3.2 72.5v6.6l12.9.6v43.7h-70.8V212v-92.5c0-8.4-2.9-12.7-9.3-14.8-6.7-2.2-13.6 0-18.2 6-1.1 1.4-1.9 3.1-2.7 4.8-.5 1.2-1 2.6-1 3.8-.1 17.9 0 35.8 0 54.2h9.7v44.1H356v-43.9h12.3v-70.8h-12.2c-.1-15.2-.1-29.7-.1-44.6zM741.9 102.4h-10.8v-44c.8-.1 1.6-.3 2.4-.3h66.6v115.6H813v43.9h-65.5v-16.5c-2.9 3.1-4.9 5.4-7.2 7.5-15.9 14.1-43.9 17.9-62.9 8.3-14.5-7.3-21.7-19.7-22.3-35.4-.9-24.3-.7-48.6-1-72.9v-6.3h-12.7v-44H712v5.6c0 29.3-.1 58.6.1 88 0 4.1.7 8.3 2 12.2 2 5.9 7 8.9 13.2 8.7 6.1-.2 10.5-3.1 12.6-8.8.8-2.2 1.7-4.5 1.7-6.7.4-18.1.3-36.3.3-54.9z M345.7 149h-98.5c-.2 9.1.1 17.6 4.5 25.4 3.6 6.5 9.6 8.9 16.8 8.6 7.2-.3 12.9-3.3 15.9-10.1 1.3-3 2.1-6.2 3.3-9.6h54.6c-2.2 17.5-8.9 32.3-22.9 43.3-9.9 7.8-21.4 12-33.8 13.8-16.7 2.5-33.2 1.8-49.4-3.4-21.7-7-36.3-21.4-43-43-7.3-23.3-7.6-47 .1-70.3 9.4-28.7 30.1-44.2 59.5-48.6 13.2-2 26.3-1.1 39.1 2.4 29.9 8.1 45.9 28.7 50.8 58.4 1.8 10.6 2 21.5 3 33.1zm-96.9-30.8H287c.5-8.5-.7-16.1-8.2-20.9-6.8-4.3-14.3-4.7-21.2-.4-7.7 4.9-8.7 12.8-8.8 21.3zM1114 148.9h-98.2c-.2 9-.2 17.6 4.3 25.4 3.8 6.7 9.9 9.1 17.3 8.7 7.4-.4 13.1-3.8 15.9-10.9 1.1-2.8 1.8-5.7 2.8-8.8h54.7c-3.5 32.1-26 53.9-59.4 57.6-15.6 1.7-30.9 1-46-3.7-22.3-7-37.2-21.7-44-44-6.9-23-7.2-46.3.3-69.3 9.5-28.9 30.3-44.5 59.9-48.8 13.9-2 27.6-.9 41 3.1 27.5 8.3 43 27.6 48.1 55.2 2.1 11.4 2.2 23.1 3.3 35.5zm-96.4-30.8h38c.1-16-7.7-24.9-20.6-23.9-11.9.9-19.2 11-17.4 23.9z M535.6 58h18c0-10.6.4-20.9-.1-31.2-.3-5.4 1.5-7.4 6.8-8.5 15.2-3.1 30.2-6.7 46-10.3v50h25.6v44.7h-25c-.2 1.8-.4 3.3-.4 4.7v51.5c0 1.8.2 3.7.4 5.5 1.3 9.8 8.2 14.9 18 13.3 1.6-.3 3.2-.6 5.6-1v27.7c0 12.9 0 12.9-12.7 14.9-13.6 2.2-27.1 2.9-40.7-.3-19.1-4.6-27.8-15.5-27.9-35.3V103h-13.7c.1-15.3.1-29.8.1-45zM826.2 217.6v-43.9h12.7v-70.9h-12.6V58.3h62.1l1.9 25.3 2-4.4c5.1-12.9 14.4-20.7 28.3-22.2 6.7-.7 13.6-.1 20.3.3 1.2.1 3.4 2 3.4 3.1.2 15.8 0 31.6 0 47.5 0 .3-.3.6-.6 1.1-7.6 0-15.5-1-23.1.2-16.2 2.6-23.8 12-24.5 28.5-.2 5.8-.2 11.7-.3 17.5v18.2h18v44.3c-29.1-.1-58.1-.1-87.6-.1z" />
<path fill="#ED1E25" d="M1237 .3c8.5 1.4 17.1 2.2 25.4 4.3 34.3 8.6 51.7 50.6 33.5 80.3-4.4 7.2-10.5 12.4-17.7 16.5-3.2 1.8-6.4 3.5-10.3 5.5 2 .8 3.4 1.6 4.9 2 23.7 6.9 34.2 24.4 35.9 47.6 2.4 31.9-17.7 55.7-49.6 59.6-9.9 1.2-19.9 1.9-29.9 1.9-31.7.2-63.3.1-95 .1h-5.8v-43.8h18.9V44.4H1128V.2c36.3.1 72.7.1 109 .1zm-32.3 128.8c0 14.9-.1 28.5.1 42.2 0 .9 2 2.7 3 2.7 8.3 0 16.7 0 24.9-.7 6.1-.5 11.7-2.8 15.1-8.4 8-13.2.4-31.6-14.7-34.2-9-1.6-18.4-1.1-28.4-1.6zm.2-40.5c8.7-.5 16.9-.2 24.8-1.6 9.6-1.7 16.2-11 16.3-21.2 0-10.2-5.9-19.7-14.7-21.3-8.5-1.5-17.4-1.4-26.4-2v46.1z M1743 103.3c-7.5-.1-15-.4-22.4-.2-1.1 0-3.2 1.9-3.2 3-.2 18.8-.6 37.7.1 56.5.4 12.3 7.9 17.4 20 15.2 1-.2 2-.2 3.2-.3.2 1.2.5 2.3.5 3.4 0 10.8 0 21.7.1 32.5 0 2.4-.3 4.2-3.1 4.7-16.5 2.7-32.9 5.1-49.6 1.2-18.7-4.4-27.7-14.3-28.1-33.4-.5-25.5-.2-51-.3-76.5V103h-6.4c-8.3-.1-7.3.9-7.4-7.6V58.5h18.4c0-10.1-.1-19.8 0-29.4.1-10.6-1.5-8.2 8.7-10.7 14.2-3.4 28.5-6.5 43.5-10v49.9h26v45z M1569.2 119.2c0-5.4.3-10-.1-14.6-.6-8.5-6.1-14.1-13.8-14.3-7.7-.2-14.1 5.5-15.3 13.7-.3 1.8-.3 3.6-.5 5.8h-53.3c-1.9-20.2 8.6-38.7 28.2-47.2 28.5-12.3 57.2-11.2 85.1 2.2 17.1 8.2 25.9 22.7 26.2 41.7.4 20.3.2 40.7.3 61v6.6h12.8v43.8h-66.2c-.5-5.4-1-11-1.6-17.4-1.5 1.7-2.5 2.7-3.4 3.8-17.3 21.3-50.3 21.2-67.2 11.3-13.4-7.9-19.2-20.5-20.1-35.4-2-32.6 15.1-53.7 48.1-58.7 11.6-1.8 23.5-1.6 35.3-2.3 1.6-.1 3.2 0 5.5 0zm.7 28.2c-5.4 0-9.7-.6-13.9.1-12.9 2.1-19.5 11.1-18.1 24.1 1.2 10.7 10.4 16.1 20.3 11.9 5.3-2.2 8.9-6.3 9.7-11.8 1.2-7.9 1.4-16 2-24.3z M1475.6 149.2h-98.5c0 9.7.1 18.9 5.6 27 4.2 6.2 10.6 7.7 17.6 7 6.8-.7 11.9-4.1 14.6-10.5 1.2-2.7 1.8-5.7 2.8-9h54.4c-2.2 17.5-8.9 32.5-23.3 43.3-17 12.8-36.8 15.8-57.3 14.4-8.4-.5-16.9-2-25-4.5-21.4-6.5-36-20.6-42.8-41.9-8-25-8.2-50.2 1.1-74.9 10.3-27.1 31.1-41 59.2-44.8 13.7-1.8 27.3-.7 40.5 3.4 28.2 8.7 43.2 28.8 47.9 57 2.1 10.8 2.3 21.8 3.2 33.5zm-58.1-30.5c.1-9-.9-17.2-9.5-21.8-7.3-3.9-14.9-4-21.6 1.2-6.6 5.1-7.8 12.5-7.3 20.6h38.4z" />
</svg>

Before

Width:  |  Height:  |  Size: 4.4 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 8.8 KiB

View File

@ -38,7 +38,10 @@ function remarkCodeBlocks(userOptions = {}) {
} }
// Overwrite the code text with the rest of the lines // Overwrite the code text with the rest of the lines
node.value = lines.slice(1).join('\n') node.value = lines.slice(1).join('\n')
} else if (firstLine && /^https:\/\/github.com/.test(firstLine)) { } else if (
(firstLine && /^https:\/\/github.com/.test(firstLine)) ||
firstLine.startsWith('%%GITHUB_')
) {
// GitHub URL // GitHub URL
attrs.github = 'true' attrs.github = 'true'
} }

View File

@ -76,7 +76,7 @@
.banner .banner
background: var(--color-theme) background: var(--color-theme)
color: var(--color-back) color: var(--color-back)
padding: 5rem padding: 1rem 5rem
margin-bottom: var(--spacing-md) margin-bottom: var(--spacing-md)
background-size: cover background-size: cover
@ -143,21 +143,3 @@
.banner-button-element .banner-button-element
background: var(--color-theme) background: var(--color-theme)
.logos
text-align: center
padding-bottom: 1rem
& + &
padding-bottom: 7.5rem
.logos-content
display: flex
justify-content: center
align-items: center
flex-flow: row wrap
margin-bottom: 0
.logo
display: inline-block
padding: 1.8rem

View File

@ -84,7 +84,6 @@
--syntax-function: hsl(195, 70%, 54%) --syntax-function: hsl(195, 70%, 54%)
--syntax-keyword: hsl(343, 100%, 68%) --syntax-keyword: hsl(343, 100%, 68%)
--syntax-operator: var(--syntax-keyword) --syntax-operator: var(--syntax-keyword)
--syntax-regex: hsl(45, 90%, 55%)
// Other // Other
--color-inline-code-text: var(--color-dark) --color-inline-code-text: var(--color-dark)
@ -345,9 +344,6 @@ body [id]:target
&.atrule, &.function, &.selector &.atrule, &.function, &.selector
color: var(--syntax-function) color: var(--syntax-function)
&.regex, &.important
color: var(--syntax-regex)
&.keyword &.keyword
color: var(--syntax-keyword) color: var(--syntax-keyword)
@ -528,11 +524,14 @@ body [id]:target
.gatsby-resp-image-figure .gatsby-resp-image-figure
margin-bottom: var(--spacing-md) margin-bottom: var(--spacing-md)
.gatsby-resp-image-figcaption .gatsby-resp-image-figcaption, .caption
font: var(--font-size-xs)/var(--line-height-md) var(--font-primary) font: var(--font-size-xs)/var(--line-height-md) var(--font-primary)
color: var(--color-subtle-dark) color: var(--color-subtle-dark)
padding-top: 0.75rem padding-top: 0.75rem
padding-left: 3rem padding-left: 3rem
code p
font: inherit
code, a
color: inherit color: inherit

View File

@ -6,6 +6,9 @@
margin-bottom: var(--spacing-md) margin-bottom: var(--spacing-md)
max-width: 100% max-width: 100%
figure > .root
margin-bottom: var(--spacing-sm)
.fixed .fixed
table-layout: fixed table-layout: fixed
@ -54,6 +57,21 @@
border-bottom: 2px solid var(--color-theme) border-bottom: 2px solid var(--color-theme)
vertical-align: bottom vertical-align: bottom
.th-rotated
height: 100px
.tx
transform: translate(15px, 0) rotate(315deg) // 45 = 360 - 45
width: 30px
white-space: nowrap
& > span
padding-left: 0.5rem
[data-tooltip]
display: inline-block
transform: rotate(-316deg)
.divider .divider
height: 0 height: 0
border-bottom: 1px solid var(--color-subtle) border-bottom: 1px solid var(--color-subtle)

View File

@ -19,7 +19,7 @@ import Footer from '../components/footer'
import SEO from '../components/seo' import SEO from '../components/seo'
import Link from '../components/link' import Link from '../components/link'
import Section, { Hr } from '../components/section' import Section, { Hr } from '../components/section'
import { Table, Tr, Th, Td } from '../components/table' import { Table, Tr, Th, Tx, Td } from '../components/table'
import { Pre, Code, InlineCode, TypeAnnotation } from '../components/code' import { Pre, Code, InlineCode, TypeAnnotation } from '../components/code'
import { Ol, Ul, Li } from '../components/list' import { Ol, Ul, Li } from '../components/list'
import { H2, H3, H4, H5, P, Abbr, Help } from '../components/typography' import { H2, H3, H4, H5, P, Abbr, Help } from '../components/typography'
@ -64,6 +64,7 @@ const scopeComponents = {
Infobox, Infobox,
Table, Table,
Tr, Tr,
Tx,
Th, Th,
Td, Td,
Help, Help,

View File

@ -12,16 +12,21 @@ import {
LandingDemo, LandingDemo,
LandingBannerGrid, LandingBannerGrid,
LandingBanner, LandingBanner,
LandingLogos,
} from '../components/landing' } from '../components/landing'
import { H2 } from '../components/typography' import { H2 } from '../components/typography'
import { Ul, Li } from '../components/list' import { Ul, Li } from '../components/list'
import { InlineCode } from '../components/code'
import Button from '../components/button' import Button from '../components/button'
import Link from '../components/link' import Link from '../components/link'
import QuickstartTraining from './quickstart-training'
import Project from './project'
import courseImage from '../../docs/images/course.jpg' import courseImage from '../../docs/images/course.jpg'
import prodigyImage from '../../docs/images/prodigy_overview.jpg'
import projectsImage from '../../docs/images/projects.png'
import irlBackground from '../images/spacy-irl.jpg'
import BenchmarksChoi from 'usage/_benchmarks-choi.md' import Benchmarks from 'usage/_benchmarks-models.md'
const CODE_EXAMPLE = `# pip install spacy const CODE_EXAMPLE = `# pip install spacy
# python -m spacy download en_core_web_sm # python -m spacy download en_core_web_sm
@ -82,8 +87,7 @@ const Landing = ({ data }) => {
<LandingCard title="Get things done" url="/usage/spacy-101" button="Get started"> <LandingCard title="Get things done" url="/usage/spacy-101" button="Get started">
spaCy is designed to help you do real work to build real products, or gather spaCy is designed to help you do real work to build real products, or gather
real insights. The library respects your time, and tries to avoid wasting it. real insights. The library respects your time, and tries to avoid wasting it.
It's easy to install, and its API is simple and productive. We like to think of It's easy to install, and its API is simple and productive.
spaCy as the Ruby on Rails of Natural Language Processing.
</LandingCard> </LandingCard>
<LandingCard <LandingCard
title="Blazing fast" title="Blazing fast"
@ -91,16 +95,14 @@ const Landing = ({ data }) => {
button="Facts &amp; Figures" button="Facts &amp; Figures"
> >
spaCy excels at large-scale information extraction tasks. It's written from the spaCy excels at large-scale information extraction tasks. It's written from the
ground up in carefully memory-managed Cython. Independent research in 2015 found ground up in carefully memory-managed Cython. If your application needs to
spaCy to be the fastest in the world. If your application needs to process process entire web dumps, spaCy is the library you want to be using.
entire web dumps, spaCy is the library you want to be using.
</LandingCard> </LandingCard>
<LandingCard title="Deep learning" url="/usage/training" button="Read more"> <LandingCard title="Awesome ecosystem" url="/usage/projects" button="Read more">
spaCy is the best way to prepare text for deep learning. It interoperates In the five years since its release, spaCy has become an industry standard with
seamlessly with TensorFlow, PyTorch, scikit-learn, Gensim and the rest of a huge ecosystem. Choose from a variety of plugins, integrate with your machine
Python's awesome AI ecosystem. With spaCy, you can easily construct learning stack and build custom components and workflows.
linguistically sophisticated statistical models for a variety of NLP problems.
</LandingCard> </LandingCard>
</LandingGrid> </LandingGrid>
@ -110,43 +112,154 @@ const Landing = ({ data }) => {
<LandingCol> <LandingCol>
<H2>Features</H2> <H2>Features</H2>
<Ul> <Ul>
<Li>
Non-destructive <strong>tokenization</strong>
</Li>
<Li>
<strong>Named entity</strong> recognition
</Li>
<Li> <Li>
Support for <strong>{counts.langs}+ languages</strong> Support for <strong>{counts.langs}+ languages</strong>
</Li> </Li>
<Li> <Li>
<strong>{counts.models} statistical models</strong> for{' '} <strong>{counts.models} trained pipelines</strong> for{' '}
{counts.modelLangs} languages {counts.modelLangs} languages
</Li> </Li>
<Li>
Multi-task learning with pretrained <strong>transformers</strong> like
BERT
</Li>
<Li> <Li>
Pretrained <strong>word vectors</strong> Pretrained <strong>word vectors</strong>
</Li> </Li>
<Li>State-of-the-art speed</Li> <Li>State-of-the-art speed</Li>
<Li> <Li>
Easy <strong>deep learning</strong> integration Production-ready <strong>training system</strong>
</Li>
<Li>
Linguistically-motivated <strong>tokenization</strong>
</Li>
<Li>
Components for <strong>named entity</strong> recognition,
part-of-speech-tagging, dependency parsing, sentence segmentation,{' '}
<strong>text classification</strong>, lemmatization, morphological
analysis, entity linking and more
</Li>
<Li>
Easily extensible with <strong>custom components</strong> and attributes
</Li>
<Li>
Support for custom models in <strong>PyTorch</strong>,{' '}
<strong>TensorFlow</strong> and other frameworks
</Li> </Li>
<Li>Part-of-speech tagging</Li>
<Li>Labelled dependency parsing</Li>
<Li>Syntax-driven sentence segmentation</Li>
<Li> <Li>
Built in <strong>visualizers</strong> for syntax and NER Built in <strong>visualizers</strong> for syntax and NER
</Li> </Li>
<Li>Convenient string-to-hash mapping</Li>
<Li>Export to numpy data arrays</Li>
<Li>Efficient binary serialization</Li>
<Li> <Li>
Easy <strong>model packaging</strong> and deployment Easy <strong>model packaging</strong>, deployment and workflow
management
</Li> </Li>
<Li>Robust, rigorously evaluated accuracy</Li> <Li>Robust, rigorously evaluated accuracy</Li>
</Ul> </Ul>
</LandingCol> </LandingCol>
</LandingGrid> </LandingGrid>
<LandingBannerGrid>
<LandingBanner
label="New in v3.0"
title="Transformer-based pipelines, new training system, project templates &amp; more"
to="/usage/v3"
button="See what's new"
small
>
spaCy v3.0 features all new <strong>transformer-based pipelines</strong> that
bring spaCy's accuracy right up to the current <strong>state-of-the-art</strong>
. You can use any pretrained transformer to train your own pipelines, and even
share one transformer between multiple components with{' '}
<strong>multi-task learning</strong>. Training is now fully configurable and
extensible, and you can define your own custom models using{' '}
<strong>PyTorch</strong>, <strong>TensorFlow</strong> and other frameworks. The
new spaCy projects system lets you describe whole{' '}
<strong>end-to-end workflows</strong> in a single file, giving you an easy path
from prototype to production, and making it easy to clone and adapt
best-practice projects for your own use cases.
</LandingBanner>
<LandingBanner
title="Prodigy: Radically efficient machine teaching"
label="From the makers of spaCy"
to="https://prodi.gy"
button="Try it out"
background="#f6f6f6"
color="#000"
small
>
<Link to="https://prodi.gy" hidden>
{/** Update image */}
<img
src={prodigyImage}
alt="Prodigy: Radically efficient machine teaching"
/>
</Link>
<br />
<br />
Prodigy is an <strong>annotation tool</strong> so efficient that data scientists
can do the annotation themselves, enabling a new level of rapid iteration.
Whether you're working on entity recognition, intent detection or image
classification, Prodigy can help you <strong>train and evaluate</strong> your
models faster.
</LandingBanner>
</LandingBannerGrid>
<LandingGrid cols={2} style={{ gridTemplateColumns: '1fr calc(80ch + 14rem)' }}>
<LandingCol>
<H2>Reproducible training for custom pipelines</H2>
<p>
spaCy v3.0 introduces a comprehensive and extensible system for{' '}
<strong>configuring your training runs</strong>. Your configuration file
will describe every detail of your training run, with no hidden defaults,
making it easy to <strong>rerun your experiments</strong> and track changes.
You can use the quickstart widget or the{' '}
<Link to="/api/cli#init-config">
<InlineCode>init config</InlineCode>
</Link>{' '}
command to get started, or clone a project template for an end-to-end
workflow.
</p>
<p>
<Button to="/usage/training">Get started</Button>
</p>
</LandingCol>
<LandingCol>
<QuickstartTraining />
</LandingCol>
</LandingGrid>
<LandingGrid cols={2}>
<LandingCol>
<Link to="/usage/projects" hidden>
<img src={projectsImage} />
</Link>
<br />
<br />
<br />
{/** TODO: update with actual example */}
<Project id="some_example">
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus interdum
sodales lectus.
</Project>
</LandingCol>
<LandingCol>
<H2>End-to-end workflows from prototype to production</H2>
<p>
spaCy's new project system gives you a smooth path from prototype to
production. It lets you keep track of all those{' '}
<strong>data transformation</strong>, preprocessing and{' '}
<strong>training steps</strong>, so you can make sure your project is always
ready to hand over for automation. It features source asset download,
command execution, checksum verification, and caching with a variety of
backends and integrations.
</p>
<p>
<Button to="/usage/projects">Try it out</Button>
</p>
</LandingCol>
</LandingGrid>
<LandingBannerGrid> <LandingBannerGrid>
<LandingBanner <LandingBanner
to="https://course.spacy.io" to="https://course.spacy.io"
@ -169,68 +282,41 @@ const Landing = ({ data }) => {
<strong>55 exercises</strong> featuring videos, slide decks, multiple-choice <strong>55 exercises</strong> featuring videos, slide decks, multiple-choice
questions and interactive coding practice in the browser. questions and interactive coding practice in the browser.
</LandingBanner> </LandingBanner>
<LandingBanner <LandingBanner
title="Prodigy: Radically efficient machine teaching" title="spaCy IRL: Two days of NLP"
label="From the makers of spaCy" label="Watch the videos"
to="https://prodi.gy" to="https://www.youtube.com/playlist?list=PLBmcuObd5An4UC6jvK_-eSl6jCvP1gwXc"
button="Try it out" button="Watch the videos"
background="#eee" background="#ffc194"
color="#000" backgroundImage={irlBackground}
color="#1a1e23"
small small
> >
Prodigy is an <strong>annotation tool</strong> so efficient that data scientists We were pleased to invite the spaCy community and other folks working on NLP to
can do the annotation themselves, enabling a new level of rapid iteration. Berlin for a small and intimate event. We booked a beautiful venue, hand-picked
Whether you're working on entity recognition, intent detection or image an awesome lineup of speakers and scheduled plenty of social time to get to know
classification, Prodigy can help you <strong>train and evaluate</strong> your each other. The YouTube playlist includes 12 talks about NLP research,
models faster. Stream in your own examples or real-world data from live APIs, development and applications, with keynotes by Sebastian Ruder (DeepMind) and
update your model in real-time and chain models together to build more complex Yoav Goldberg (Allen AI).
systems.
</LandingBanner> </LandingBanner>
</LandingBannerGrid> </LandingBannerGrid>
<LandingLogos title="spaCy is trusted by" logos={data.logosUsers}> <LandingGrid cols={2} style={{ gridTemplateColumns: '1fr 60%' }}>
<Button to={`https://github.com/${data.repo}/stargazers`}>and many more</Button>
</LandingLogos>
<LandingLogos title="Featured on" logos={data.logosPublications} />
<LandingBanner
title="BERT-style language model pretraining"
label="New in v2.1"
to="/usage/v2-1"
button="Read more"
>
Learn more from small training corpora by initializing your models with{' '}
<strong>knowledge from raw text</strong>. The new pretrain command teaches spaCy's
CNN model to predict words based on their context, producing representations of
words in contexts. If you've seen Google's BERT system or fast.ai's ULMFiT, spaCy's
pretraining is similar but much more efficient. It's still experimental, but users
are already reporting good results, so give it a try!
</LandingBanner>
<LandingGrid cols={2}>
<LandingCol> <LandingCol>
<H2>Benchmarks</H2> <H2>Benchmarks</H2>
<p> <p>
In 2015, independent researchers from Emory University and Yahoo! Labs spaCy v3.0 introduces transformer-based pipelines that bring spaCy's
showed that spaCy offered the{' '} accuracy right up to the current <strong>state-of-the-art</strong>. You can
<strong>fastest syntactic parser in the world</strong> and that its accuracy also use a CPU-optimized pipeline, which is less accurate but much cheaper
was <strong>within 1% of the best</strong> available ( to run.
<Link to="https://aclweb.org/anthology/P/P15/P15-1038.pdf">
Choi et al., 2015
</Link>
). spaCy v2.0, released in 2017, is more accurate than any of the systems
Choi et al. evaluated.
</p> </p>
<p> <p>
<Button to="/usage/facts-figures#benchmarks" large> <Button to="/usage/facts-figures#benchmarks">See details</Button>
See details
</Button>
</p> </p>
</LandingCol> </LandingCol>
<LandingCol> <LandingCol>
<BenchmarksChoi /> <Benchmarks />
</LandingCol> </LandingCol>
</LandingGrid> </LandingGrid>
</> </>
@ -245,18 +331,6 @@ Landing.propTypes = {
models: PropTypes.arrayOf(PropTypes.string), models: PropTypes.arrayOf(PropTypes.string),
}) })
), ),
logosUsers: PropTypes.arrayOf(
PropTypes.shape({
id: PropTypes.string.isRequired,
url: PropTypes.string.isRequired,
})
),
logosPublications: PropTypes.arrayOf(
PropTypes.shape({
id: PropTypes.string.isRequired,
url: PropTypes.string.isRequired,
})
),
}), }),
} }
@ -274,14 +348,6 @@ const landingQuery = graphql`
models models
starters starters
} }
logosUsers {
id
url
}
logosPublications {
id
url
}
} }
} }
} }

View File

@ -58,7 +58,8 @@ const QuickstartInstall = ({ id, title }) => (
<StaticQuery <StaticQuery
query={query} query={query}
render={({ site }) => { render={({ site }) => {
const models = site.siteMetadata.languages.filter(({ models }) => models !== null) const { nightly, languages } = site.siteMetadata
const models = languages.filter(({ models }) => models !== null)
const data = [ const data = [
...DATA, ...DATA,
{ {
@ -82,7 +83,10 @@ const QuickstartInstall = ({ id, title }) => (
</QS> </QS>
<QS package="pip">pip install -U spacy</QS> <QS package="pip">pip install -U spacy</QS>
<QS package="conda">conda install -c conda-forge spacy</QS> <QS package="conda">conda install -c conda-forge spacy</QS>
<QS package="source">git clone https://github.com/{repo}</QS> <QS package="source">
git clone https://github.com/{repo}
{nightly ? ` --branch develop` : ''}
</QS>
<QS package="source">cd spaCy</QS> <QS package="source">cd spaCy</QS>
<QS package="source" os="linux"> <QS package="source" os="linux">
export PYTHONPATH=`pwd` export PYTHONPATH=`pwd`
@ -127,6 +131,7 @@ const query = graphql`
query QuickstartInstallQuery { query QuickstartInstallQuery {
site { site {
siteMetadata { siteMetadata {
nightly
languages { languages {
code code
name name

View File

@ -47,7 +47,7 @@ const DATA = [
}, },
] ]
export default function QuickstartTraining({ id, title, download = 'config.cfg' }) { export default function QuickstartTraining({ id, title, download = 'base_config.cfg' }) {
const [lang, setLang] = useState(DEFAULT_LANG) const [lang, setLang] = useState(DEFAULT_LANG)
const [components, setComponents] = useState([]) const [components, setComponents] = useState([])
const [[hardware], setHardware] = useState([DEFAULT_HARDWARE]) const [[hardware], setHardware] = useState([DEFAULT_HARDWARE])