Update docs [ci skip]

This commit is contained in:
Ines Montani 2020-09-12 17:05:10 +02:00
parent eedaaaec75
commit 8b0dabe987
76 changed files with 722 additions and 790 deletions

View File

@ -609,7 +609,6 @@ In addition to the native markdown elements, you can use the components
├── docs # the actual markdown content
├── meta # JSON-formatted site metadata
| ├── languages.json # supported languages and statistical models
| ├── logos.json # logos and links for landing page
| ├── sidebars.json # sidebar navigations for different sections
| ├── site.json # general site metadata
| └── universe.json # data for the spaCy universe section

View File

@ -38,7 +38,7 @@ how the component should be configured. You can override its settings via the
| `validate` | Whether patterns should be validated (passed to the `Matcher`). Defaults to `False`. ~~bool~~ |
```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/attributeruler.py
%%GITHUB_SPACY/spacy/pipeline/attributeruler.py
```
## AttributeRuler.\_\_init\_\_ {#init tag="method"}

View File

@ -230,12 +230,12 @@ $ python -m spacy convert [input_file] [output_dir] [--converter] [--file-type]
### Converters {#converters}
| ID | Description |
| ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `auto` | Automatically pick converter based on file extension and file content (default). |
| `json` | JSON-formatted training data used in spaCy v2.x. |
| `conll` | Universal Dependencies `.conllu` or `.conll` format. |
| `ner` | NER with IOB/IOB2 tags, one token per line with columns separated by whitespace. The first column is the token and the final column is the IOB tag. Sentences are separated by blank lines and documents are separated by the line `-DOCSTART- -X- O O`. Supports CoNLL 2003 NER format. See [sample data](https://github.com/explosion/spaCy/tree/master/examples/training/ner_example_data). |
| `iob` | NER with IOB/IOB2 tags, one sentence per line with tokens separated by whitespace and annotation separated by `|`, either `word|B-ENT` or `word|POS|B-ENT`. See [sample data](https://github.com/explosion/spaCy/tree/master/examples/training/ner_example_data). |
| `ner` | NER with IOB/IOB2 tags, one token per line with columns separated by whitespace. The first column is the token and the final column is the IOB tag. Sentences are separated by blank lines and documents are separated by the line `-DOCSTART- -X- O O`. Supports CoNLL 2003 NER format. See [sample data](%%GITHUB_SPACY/extra/example_data/ner_example_data). |
| `iob` | NER with IOB/IOB2 tags, one sentence per line with tokens separated by whitespace and annotation separated by `|`, either `word|B-ENT` or `word|POS|B-ENT`. See [sample data](%%GITHUB_SPACY/extra/example_data/ner_example_data). |
## debug {#debug new="3"}
@ -358,37 +358,37 @@ File /path/to/spacy/ml/models/tok2vec.py (line 207)
Registry @loggers
Name spacy.ConsoleLogger.v1
Module spacy.training.loggers
File /path/to/spacy/gold/loggers.py (line 8)
File /path/to/spacy/training/loggers.py (line 8)
[training.batcher]
Registry @batchers
Name spacy.batch_by_words.v1
Module spacy.training.batchers
File /path/to/spacy/gold/batchers.py (line 49)
File /path/to/spacy/training/batchers.py (line 49)
[training.batcher.size]
Registry @schedules
Name compounding.v1
Module thinc.schedules
File /Users/ines/Repos/explosion/thinc/thinc/schedules.py (line 43)
File /path/to/thinc/thinc/schedules.py (line 43)
[training.dev_corpus]
Registry @readers
Name spacy.Corpus.v1
Module spacy.training.corpus
File /path/to/spacy/gold/corpus.py (line 18)
File /path/to/spacy/training/corpus.py (line 18)
[training.optimizer]
Registry @optimizers
Name Adam.v1
Module thinc.optimizers
File /Users/ines/Repos/explosion/thinc/thinc/optimizers.py (line 58)
File /path/to/thinc/thinc/optimizers.py (line 58)
[training.optimizer.learn_rate]
Registry @schedules
Name warmup_linear.v1
Module thinc.schedules
File /Users/ines/Repos/explosion/thinc/thinc/schedules.py (line 91)
File /path/to/thinc/thinc/schedules.py (line 91)
[training.train_corpus]
Registry @readers
Name spacy.Corpus.v1
Module spacy.training.corpus
File /path/to/spacy/gold/corpus.py (line 18)
File /path/to/spacy/training/corpus.py (line 18)
```
</Accordion>

View File

@ -2,7 +2,7 @@
title: Corpus
teaser: An annotated corpus
tag: class
source: spacy/gold/corpus.py
source: spacy/training/corpus.py
new: 3
---
@ -42,7 +42,7 @@ streaming.
| `limit` | Limit corpus to a subset of examples, e.g. for debugging. Defaults to `0` for no limit. ~~int~~ |
```python
https://github.com/explosion/spaCy/blob/develop/spacy/gold/corpus.py
%%GITHUB_SPACY/spacy/training/corpus.py
```
## Corpus.\_\_init\_\_ {#init tag="method"}

View File

@ -24,11 +24,11 @@ With Cython there are four ways of declaring complex data types. Unfortunately
we use all four in different places, as they all have different utility:
| Declaration | Description | Example |
| --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------- |
| --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------- |
| `class` | A normal Python class. | [`Language`](/api/language) |
| `cdef class` | A Python extension type. Differs from a normal Python class in that its attributes can be defined on the underlying struct. Can have C-level objects as attributes (notably structs and pointers), and can have methods which have C-level objects as arguments or return types. | [`Lexeme`](/api/cython-classes#lexeme) |
| `cdef struct` | A struct is just a collection of variables, sort of like a named tuple, except the memory is contiguous. Structs can't have methods, only attributes. | [`LexemeC`](/api/cython-structs#lexemec) |
| `cdef cppclass` | A C++ class. Like a struct, this can be allocated on the stack, but can have methods, a constructor and a destructor. Differs from `cdef class` in that it can be created and destroyed without acquiring the Python global interpreter lock. This style is the most obscure. | [`StateC`](https://github.com/explosion/spaCy/tree/master/spacy/syntax/_state.pxd) |
| `cdef cppclass` | A C++ class. Like a struct, this can be allocated on the stack, but can have methods, a constructor and a destructor. Differs from `cdef class` in that it can be created and destroyed without acquiring the Python global interpreter lock. This style is the most obscure. | [`StateC`](%%GITHUB_SPACY/spacy/pipeline/_parser_internals/_state.pxd) |
The most important classes in spaCy are defined as `cdef class` objects. The
underlying data for these objects is usually gathered into a struct, which is

View File

@ -37,7 +37,7 @@ recommended settings for your use case, check out the
> guide on [registered functions](/usage/training#config-functions) for details.
```ini
https://github.com/explosion/spaCy/blob/develop/spacy/default_config.cfg
%%GITHUB_SPACY/spacy/default_config.cfg
```
<Infobox title="Notes on data validation" emoji="💡">
@ -45,8 +45,7 @@ https://github.com/explosion/spaCy/blob/develop/spacy/default_config.cfg
Under the hood, spaCy's configs are powered by our machine learning library
[Thinc's config system](https://thinc.ai/docs/usage-config), which uses
[`pydantic`](https://github.com/samuelcolvin/pydantic/) for data validation
based on type hints. See
[`spacy/schemas.py`](https://github.com/explosion/spaCy/blob/develop/spacy/schemas.py)
based on type hints. See [`spacy/schemas.py`](%%GITHUB_SPACY/spacy/schemas.py)
for the schemas used to validate the default config. Arguments of registered
functions are validated against their type annotations, if available. To debug
your config and check that it's valid, you can run the
@ -456,7 +455,7 @@ lexical data.
Here's an example of the 20 most frequent lexemes in the English training data:
```json
https://github.com/explosion/spaCy/tree/master/examples/training/vocab-data.jsonl
%%GITHUB_SPACY / extra / example_data / vocab - data.jsonl
```
## Pipeline meta {#meta}

View File

@ -57,7 +57,7 @@ architectures and their arguments and hyperparameters.
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [TransitionBasedParser](/api/architectures#TransitionBasedParser). ~~Model[List[Doc], List[Floats2d]]~~ |
```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/dep_parser.pyx
%%GITHUB_SPACY/spacy/pipeline/dep_parser.pyx
```
## DependencyParser.\_\_init\_\_ {#init tag="method"}

View File

@ -50,7 +50,7 @@ architectures and their arguments and hyperparameters.
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/entity_linker.py
%%GITHUB_SPACY/spacy/pipeline/entity_linker.py
```
## EntityLinker.\_\_init\_\_ {#init tag="method"}

View File

@ -48,7 +48,7 @@ architectures and their arguments and hyperparameters.
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [TransitionBasedParser](/api/architectures#TransitionBasedParser). ~~Model[List[Doc], List[Floats2d]]~~ |
```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/ner.pyx
%%GITHUB_SPACY/spacy/pipeline/ner.pyx
```
## EntityRecognizer.\_\_init\_\_ {#init tag="method"}

View File

@ -42,7 +42,7 @@ how the component should be configured. You can override its settings via the
| `ent_id_sep` | Separator used internally for entity IDs. Defaults to `"||"`. ~~str~~ |
```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/entityruler.py
%%GITHUB_SPACY/spacy/pipeline/entityruler.py
```
## EntityRuler.\_\_init\_\_ {#init tag="method"}

View File

@ -2,7 +2,7 @@
title: Example
teaser: A training instance
tag: class
source: spacy/gold/example.pyx
source: spacy/training/example.pyx
new: 3.0
---

View File

@ -945,10 +945,10 @@ available to the loaded object.
## Class attributes {#class-attributes}
| Name | Description |
| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `Defaults` | Settings, data and factory methods for creating the `nlp` object and processing pipeline. ~~Defaults~~ |
| `lang` | Two-letter language ID, i.e. [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). ~~str~~ |
| `default_config` | Base [config](/usage/training#config) to use for [Language.config](/api/language#config). Defaults to [`default_config.cfg`](https://github.com/explosion/spaCy/tree/develop/spacy/default_config.cfg). ~~Config~~ |
| `default_config` | Base [config](/usage/training#config) to use for [Language.config](/api/language#config). Defaults to [`default_config.cfg`](%%GITHUB_SPACY/spacy/default_config.cfg). ~~Config~~ |
## Defaults {#defaults}
@ -982,33 +982,16 @@ customize the default language data:
> ```
| Name | Description |
| --------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `stop_words` | List of stop words, used for `Token.is_stop`.<br />**Example:** [`stop_words.py`][stop_words.py] ~~Set[str]~~ |
| `tokenizer_exceptions` | Tokenizer exception rules, string mapped to list of token attributes.<br />**Example:** [`de/tokenizer_exceptions.py`][de/tokenizer_exceptions.py] ~~Dict[str, List[dict]]~~ |
| `prefixes`, `suffixes`, `infixes` | Prefix, suffix and infix rules for the default tokenizer.<br />**Example:** [`puncutation.py`][punctuation.py] ~~Optional[List[Union[str, Pattern]]]~~ |
| `token_match` | Optional regex for matching strings that should never be split, overriding the infix rules.<br />**Example:** [`fr/tokenizer_exceptions.py`][fr/tokenizer_exceptions.py] ~~Optional[Pattern]~~ |
| `url_match` | Regular expression for matching URLs. Prefixes and suffixes are removed before applying the match.<br />**Example:** [`tokenizer_exceptions.py`][tokenizer_exceptions.py] ~~Optional[Pattern]~~ |
| `lex_attr_getters` | Custom functions for setting lexical attributes on tokens, e.g. `like_num`.<br />**Example:** [`lex_attrs.py`][lex_attrs.py] ~~Dict[int, Callable[[str], Any]]~~ |
| `syntax_iterators` | Functions that compute views of a `Doc` object based on its syntax. At the moment, only used for [noun chunks](/usage/linguistic-features#noun-chunks).<br />**Example:** [`syntax_iterators.py`][syntax_iterators.py]. ~~Dict[str, Callable[[Union[Doc, Span]], Iterator[Span]]]~~ |
| `writing_system` | Information about the language's writing system, available via `Vocab.writing_system`. Defaults to: `{"direction": "ltr", "has_case": True, "has_letters": True}.`.<br />**Example:** [`zh/__init__.py`][zh/__init__.py] ~~Dict[str, Any]~~ |
| `config` | Default [config](/usage/training#config) added to `nlp.config`. This can include references to custom tokenizers or lemmatizers.<br />**Example:** [`zh/__init__.py`][zh/__init__.py] ~~Config~~ |
[stop_words.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/en/stop_words.py
[tokenizer_exceptions.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/tokenizer_exceptions.py
[de/tokenizer_exceptions.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/de/tokenizer_exceptions.py
[fr/tokenizer_exceptions.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/fr/tokenizer_exceptions.py
[punctuation.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py
[lex_attrs.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/en/lex_attrs.py
[syntax_iterators.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/en/syntax_iterators.py
[zh/__init__.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/zh/__init__.py
| --------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `stop_words` | List of stop words, used for `Token.is_stop`.<br />**Example:** [`stop_words.py`](%%GITHUB_SPACY/spacy/lang/en/stop_words.py) ~~Set[str]~~ |
| `tokenizer_exceptions` | Tokenizer exception rules, string mapped to list of token attributes.<br />**Example:** [`de/tokenizer_exceptions.py`](%%GITHUB_SPACY/spacy/lang/de/tokenizer_exceptions.py) ~~Dict[str, List[dict]]~~ |
| `prefixes`, `suffixes`, `infixes` | Prefix, suffix and infix rules for the default tokenizer.<br />**Example:** [`puncutation.py`](%%GITHUB_SPACY/spacy/lang/punctuation.py) ~~Optional[List[Union[str, Pattern]]]~~ |
| `token_match` | Optional regex for matching strings that should never be split, overriding the infix rules.<br />**Example:** [`fr/tokenizer_exceptions.py`](%%GITHUB_SPACY/spacy/lang/fr/tokenizer_exceptions.py) ~~Optional[Pattern]~~ |
| `url_match` | Regular expression for matching URLs. Prefixes and suffixes are removed before applying the match.<br />**Example:** [`tokenizer_exceptions.py`](%%GITHUB_SPACY/spacy/lang/tokenizer_exceptions.py) ~~Optional[Pattern]~~ |
| `lex_attr_getters` | Custom functions for setting lexical attributes on tokens, e.g. `like_num`.<br />**Example:** [`lex_attrs.py`](%%GITHUB_SPACY/spacy/lang/en/lex_attrs.py) ~~Dict[int, Callable[[str], Any]]~~ |
| `syntax_iterators` | Functions that compute views of a `Doc` object based on its syntax. At the moment, only used for [noun chunks](/usage/linguistic-features#noun-chunks).<br />**Example:** [`syntax_iterators.py`](%%GITHUB_SPACY/spacy/lang/en/syntax_iterators.py). ~~Dict[str, Callable[[Union[Doc, Span]], Iterator[Span]]]~~ |
| `writing_system` | Information about the language's writing system, available via `Vocab.writing_system`. Defaults to: `{"direction": "ltr", "has_case": True, "has_letters": True}.`.<br />**Example:** [`zh/__init__.py`](%%GITHUB_SPACY/spacy/lang/zh/__init__.py) ~~Dict[str, Any]~~ |
| `config` | Default [config](/usage/training#config) added to `nlp.config`. This can include references to custom tokenizers or lemmatizers.<br />**Example:** [`zh/__init__.py`](%%GITHUB_SPACY/spacy/lang/zh/__init__.py) ~~Config~~ |
## Serialization fields {#serialization-fields}

View File

@ -56,7 +56,7 @@ data formats used by the lookup and rule-based lemmatizers, see
| `model` | **Not yet implemented:** the model to use. ~~Model~~ |
```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/lemmatizer.py
%%GITHUB_SPACY/spacy/pipeline/lemmatizer.py
```
## Lemmatizer.\_\_init\_\_ {#init tag="method"}

View File

@ -37,7 +37,7 @@ architectures and their arguments and hyperparameters.
| `model` | The model to use. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/morphologizer.pyx
%%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx
```
## Morphologizer.\_\_init\_\_ {#init tag="method"}

View File

@ -22,7 +22,7 @@ for how to use the `Pipe` base class to implement custom components.
> inherit from `Pipe`.
```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/pipe.pyx
%%GITHUB_SPACY/spacy/pipeline/pipe.pyx
```
## Pipe.\_\_init\_\_ {#init tag="method"}

View File

@ -34,7 +34,7 @@ architectures and their arguments and hyperparameters.
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/senter.pyx
%%GITHUB_SPACY/spacy/pipeline/senter.pyx
```
## SentenceRecognizer.\_\_init\_\_ {#init tag="method"}

View File

@ -33,7 +33,7 @@ how the component should be configured. You can override its settings via the
| `punct_chars` | Optional custom list of punctuation characters that mark sentence ends. See below for defaults if not set. Defaults to `None`. ~~Optional[List[str]]~~ | `None` |
```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/sentencizer.pyx
%%GITHUB_SPACY/spacy/pipeline/sentencizer.pyx
```
## Sentencizer.\_\_init\_\_ {#init tag="method"}

View File

@ -34,7 +34,7 @@ architectures and their arguments and hyperparameters.
| `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/tagger.pyx
%%GITHUB_SPACY/spacy/pipeline/tagger.pyx
```
## Tagger.\_\_init\_\_ {#init tag="method"}

View File

@ -41,7 +41,7 @@ architectures and their arguments and hyperparameters.
| `model` | A model instance that predicts scores for each category. Defaults to [TextCatEnsemble](/api/architectures#TextCatEnsemble). ~~Model[List[Doc], List[Floats2d]]~~ |
```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/textcat.py
%%GITHUB_SPACY/spacy/pipeline/textcat.py
```
## TextCategorizer.\_\_init\_\_ {#init tag="method"}

View File

@ -45,7 +45,7 @@ architectures and their arguments and hyperparameters.
| `model` | The model to use. Defaults to [HashEmbedCNN](/api/architectures#HashEmbedCNN). ~~Model[List[Doc], List[Floats2d]~~ |
```python
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/tok2vec.py
%%GITHUB_SPACY/spacy/pipeline/tok2vec.py
```
## Tok2Vec.\_\_init\_\_ {#init tag="method"}

View File

@ -105,8 +105,7 @@ your installation, installed pipelines and local setup from within spaCy.
### spacy.explain {#spacy.explain tag="function"}
Get a description for a given POS tag, dependency label or entity type. For a
list of available terms, see
[`glossary.py`](https://github.com/explosion/spaCy/tree/master/spacy/glossary.py).
list of available terms, see [`glossary.py`](%%GITHUB_SPACY/spacy/glossary.py).
> #### Example
>
@ -263,10 +262,10 @@ If a setting is not present in the options, the default value will be used.
> ```
| Name | Description |
| --------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| --------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `ents` | Entity types to highlight or `None` for all types (default). ~~Optional[List[str]]~~ |
| `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ |
| `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](https://github.com/explosion/spaCy/blob/master/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
| `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](GITHUB_SPACY/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
By default, displaCy comes with colors for all entity types used by
[spaCy's trained pipelines](/models). If you're using custom entity types, you
@ -348,7 +347,7 @@ See the [`Transformer`](/api/transformer) API reference and
| [`span_getters`](/api/transformer#span_getters) | Registry for functions that take a batch of `Doc` objects and return a list of `Span` objects to process by the transformer, e.g. sentences. |
| [`annotation_setters`](/api/transformer#annotation_setters) | Registry for functions that create annotation setters. Annotation setters are functions that take a batch of `Doc` objects and a [`FullTransformerBatch`](/api/transformer#fulltransformerbatch) and can set additional annotations on the `Doc`. |
## Loggers {#loggers source="spacy/gold/loggers.py" new="3"}
## Loggers {#loggers source="spacy/training/loggers.py" new="3"}
A logger records the training results. When a logger is created, two functions
are returned: one for logging the information for each training step, and a
@ -452,7 +451,7 @@ remain in the config file stored on your local system.
| `project_name` | The name of the project in the Weights & Biases interface. The project will be created automatically if it doesn't exist yet. ~~str~~ |
| `remove_config_values` | A list of values to include from the config before it is uploaded to W&B (default: empty). ~~List[str]~~ |
## Batchers {#batchers source="spacy/gold/batchers.py" new="3"}
## Batchers {#batchers source="spacy/training/batchers.py" new="3"}
A data batcher implements a batching strategy that essentially turns a stream of
items into a stream of batches, with each batch consisting of one item or a list
@ -536,7 +535,7 @@ sequences in the batch.
| `discard_oversize` | Whether to discard sequences that are by themselves longer than the largest padded batch size. ~~bool~~ |
| `get_length` | Optional function that receives a sequence item and returns its length. Defaults to the built-in `len()` if not set. ~~Optional[Callable[[Any], int]]~~ |
## Training data and alignment {#gold source="spacy/gold"}
## Training data and alignment {#gold source="spacy/training"}
### training.biluo_tags_from_offsets {#biluo_tags_from_offsets tag="function"}
@ -616,12 +615,12 @@ token-based tags, e.g. to overwrite the `doc.ents`.
## Utility functions {#util source="spacy/util.py"}
spaCy comes with a small collection of utility functions located in
[`spacy/util.py`](https://github.com/explosion/spaCy/tree/master/spacy/util.py).
Because utility functions are mostly intended for **internal use within spaCy**,
their behavior may change with future releases. The functions documented on this
page should be safe to use and we'll try to ensure backwards compatibility.
However, we recommend having additional tests in place if your application
depends on any of spaCy's utilities.
[`spacy/util.py`](%%GITHUB_SPACY/spacy/util.py). Because utility functions are
mostly intended for **internal use within spaCy**, their behavior may change
with future releases. The functions documented on this page should be safe to
use and we'll try to ensure backwards compatibility. However, we recommend
having additional tests in place if your application depends on any of spaCy's
utilities.
### util.get_lang_class {#util.get_lang_class tag="function"}
@ -833,8 +832,8 @@ Compile a sequence of prefix rules into a regex object.
> ```
| Name | Description |
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `entries` | The prefix rules, e.g. [`lang.punctuation.TOKENIZER_PREFIXES`](https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
| `entries` | The prefix rules, e.g. [`lang.punctuation.TOKENIZER_PREFIXES`](%%GITHUB_SPACY/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
| **RETURNS** | The regex object. to be used for [`Tokenizer.prefix_search`](/api/tokenizer#attributes). ~~Pattern~~ |
### util.compile_suffix_regex {#util.compile_suffix_regex tag="function"}
@ -850,8 +849,8 @@ Compile a sequence of suffix rules into a regex object.
> ```
| Name | Description |
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `entries` | The suffix rules, e.g. [`lang.punctuation.TOKENIZER_SUFFIXES`](https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
| `entries` | The suffix rules, e.g. [`lang.punctuation.TOKENIZER_SUFFIXES`](%%GITHUB_SPACY/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
| **RETURNS** | The regex object. to be used for [`Tokenizer.suffix_search`](/api/tokenizer#attributes). ~~Pattern~~ |
### util.compile_infix_regex {#util.compile_infix_regex tag="function"}
@ -867,8 +866,8 @@ Compile a sequence of infix rules into a regex object.
> ```
| Name | Description |
| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `entries` | The infix rules, e.g. [`lang.punctuation.TOKENIZER_INFIXES`](https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
| ----------- | ----------------------------------------------------------------------------------------------------------------------------------------- |
| `entries` | The infix rules, e.g. [`lang.punctuation.TOKENIZER_INFIXES`](%%GITHUB_SPACY/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
| **RETURNS** | The regex object. to be used for [`Tokenizer.infix_finditer`](/api/tokenizer#attributes). ~~Pattern~~ |
### util.minibatch {#util.minibatch tag="function" new="2"}

View File

@ -31,7 +31,7 @@ supports all models that are available via the
Usually you will connect subsequent components to the shared transformer using
the [TransformerListener](/api/architectures#TransformerListener) layer. This
works similarly to spaCy's [Tok2Vec](/api/tok2vec) component and
[Tok2VecListener](/api/architectures/Tok2VecListener) sublayer.
[Tok2VecListener](/api/architectures/#Tok2VecListener) sublayer.
The component assigns the output of the transformer to the `Doc`'s extension
attributes. We also calculate an alignment between the word-piece tokens and the

Binary file not shown.

After

Width:  |  Height:  |  Size: 281 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 99 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 151 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 146 KiB

View File

@ -2,9 +2,8 @@ Every language is different and usually full of **exceptions and special
cases**, especially amongst the most common words. Some of these exceptions are
shared across languages, while others are **entirely specific** usually so
specific that they need to be hard-coded. The
[`lang`](https://github.com/explosion/spaCy/tree/master/spacy/lang) module
contains all language-specific data, organized in simple Python files. This
makes the data easy to update and extend.
[`lang`](%%GITHUB_SPACY/spacy/lang) module contains all language-specific data,
organized in simple Python files. This makes the data easy to update and extend.
The **shared language data** in the directory root includes rules that can be
generalized across languages for example, rules for basic punctuation, emoji,
@ -23,27 +22,11 @@ values are defined in the [`Language.Defaults`](/api/language#defaults).
> ```
| Name | Description |
| ----------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **Stop words**<br />[`stop_words.py`][stop_words.py] | List of most common words of a language that are often useful to filter out, for example "and" or "I". Matching tokens will return `True` for `is_stop`. |
| **Tokenizer exceptions**<br />[`tokenizer_exceptions.py`][tokenizer_exceptions.py] | Special-case rules for the tokenizer, for example, contractions like "can't" and abbreviations with punctuation, like "U.K.". |
| **Punctuation rules**<br />[`punctuation.py`][punctuation.py] | Regular expressions for splitting tokens, e.g. on punctuation or special characters like emoji. Includes rules for prefixes, suffixes and infixes. |
| **Character classes**<br />[`char_classes.py`][char_classes.py] | Character classes to be used in regular expressions, for example, Latin characters, quotes, hyphens or icons. |
| **Lexical attributes**<br />[`lex_attrs.py`][lex_attrs.py] | Custom functions for setting lexical attributes on tokens, e.g. `like_num`, which includes language-specific words like "ten" or "hundred". |
| **Syntax iterators**<br />[`syntax_iterators.py`][syntax_iterators.py] | Functions that compute views of a `Doc` object based on its syntax. At the moment, only used for [noun chunks](/usage/linguistic-features#noun-chunks). |
| **Lemmatizer**<br />[`lemmatizer.py`][lemmatizer.py] [`spacy-lookups-data`][spacy-lookups-data] | Custom lemmatizer implementation and lemmatization tables. |
[stop_words.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/en/stop_words.py
[tokenizer_exceptions.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/de/tokenizer_exceptions.py
[punctuation.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py
[char_classes.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/char_classes.py
[lex_attrs.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/en/lex_attrs.py
[syntax_iterators.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/en/syntax_iterators.py
[lemmatizer.py]:
https://github.com/explosion/spaCy/tree/master/spacy/lang/fr/lemmatizer.py
[spacy-lookups-data]: https://github.com/explosion/spacy-lookups-data
| ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **Stop words**<br />[`stop_words.py`](%%GITHUB_SPACY/spacy/lang/en/stop_words.py) | List of most common words of a language that are often useful to filter out, for example "and" or "I". Matching tokens will return `True` for `is_stop`. |
| **Tokenizer exceptions**<br />[`tokenizer_exceptions.py`](%%GITHUB_SPACY/spacy/lang/de/tokenizer_exceptions.py) | Special-case rules for the tokenizer, for example, contractions like "can't" and abbreviations with punctuation, like "U.K.". |
| **Punctuation rules**<br />[`punctuation.py`](%%GITHUB_SPACY/spacy/lang/punctuation.py) | Regular expressions for splitting tokens, e.g. on punctuation or special characters like emoji. Includes rules for prefixes, suffixes and infixes. |
| **Character classes**<br />[`char_classes.py`](%%GITHUB_SPACY/spacy/lang/char_classes.py) | Character classes to be used in regular expressions, for example, Latin characters, quotes, hyphens or icons. |
| **Lexical attributes**<br />[`lex_attrs.py`](%%GITHUB_SPACY/spacy/lang/en/lex_attrs.py) | Custom functions for setting lexical attributes on tokens, e.g. `like_num`, which includes language-specific words like "ten" or "hundred". |
| **Syntax iterators**<br />[`syntax_iterators.py`](%%GITHUB_SPACY/spacy/lang/en/syntax_iterators.py) | Functions that compute views of a `Doc` object based on its syntax. At the moment, only used for [noun chunks](/usage/linguistic-features#noun-chunks). |
| **Lemmatizer**<br />[`lemmatizer.py`](%%GITHUB_SPACY/master/spacy/lang/fr/lemmatizer.py) [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) | Custom lemmatizer implementation and lemmatization tables. |

View File

@ -1,10 +0,0 @@
import { Help } from 'components/typography'
| System | Year | Language | Accuracy | Speed (wps) |
| -------------- | ---- | --------------- | -------: | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| **spaCy v2.x** | 2017 | Python / Cython | **92.6** | _n/a_ <Help>This table shows speed as benchmarked by Choi et al. We therefore can't provide comparable figures, as we'd be running the benchmark on different hardware.</Help> |
| **spaCy v1.x** | 2015 | Python / Cython | 91.8 | 13,963 |
| ClearNLP | 2015 | Java | 91.7 | 10,271 |
| CoreNLP | 2015 | Java | 89.6 | 8,602 |
| MATE | 2015 | Java | 92.5 | 550 |
| Turbo | 2015 | C++ | 92.4 | 349 |

View File

@ -0,0 +1,44 @@
import { Help } from 'components/typography'; import Link from 'components/link'
<!-- TODO: update, add project template -->
<figure>
| System | Parser | Tagger | NER | WPS<br />CPU <Help>words per second on CPU, higher is better</Help> | WPS<br/>GPU <Help>words per second on GPU, higher is better</Help> |
| ------------------------------------------------------------------------- | ----------------: | ----------------: | ---: | ------------------------------------------------------------------: | -----------------------------------------------------------------: |
| [`en_core_web_trf`](/models/en#en_core_web_trf) (spaCy v3) | | | | | 6k |
| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | | | | | |
| `en_core_web_lg` (spaCy v2) | 91.9 | 97.2 | 85.9 | 10k | |
| [Stanza](https://stanfordnlp.github.io/stanza/) (StanfordNLP)<sup>1</sup> | _n/a_<sup>2</sup> | _n/a_<sup>2</sup> | 88.8 | 234 | 2k |
| <Link to="https://github.com/flairNLP/flair" hideIcon>Flair</Link> | - | 97.9 | 89.3 | | |
<figcaption class="caption">
**Accuracy and speed on the
[OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19) corpus.**<br />**1. **
[Qi et al. (2020)](https://arxiv.org/pdf/2003.07082.pdf). **2. ** _Coming soon_:
Qi et al. don't report parsing and tagging results on OntoNotes. We're working
on training Stanza on this corpus to allow direct comparison.
</figcaption>
</figure>
<figure>
| System | POS | USA | LAS |
| ------------------------------------------------------------------------------ | ---: | ---: | ---: |
| spaCy RoBERTa (2020) | | | |
| spaCy CNN (2020) | | | |
| [Mrini et al.](https://khalilmrini.github.io/Label_Attention_Layer.pdf) (2019) | 97.3 | 97.4 | 96.3 |
| [Zhou and Zhao](https://www.aclweb.org/anthology/P19-1230/) (2019) | 97.3 | 97.2 | 95.7 |
<figcaption class="caption">
**Accuracy on the Penn Treebank.** See
[NLP-progress](http://nlpprogress.com/english/dependency_parsing.html) for more
results.
</figcaption>
</figure>

View File

@ -579,12 +579,17 @@ def MyCustomVectors(
## Pretraining {#pretraining}
<Infobox title="This section is still under construction" emoji="🚧" variant="warning">
</Infobox>
<!--
- explain general concept and idea (short!)
- present it as a separate lightweight mechanism for pretraining the tok2vec
layer
- advantages (could also be pros/cons table)
- explain how it generates a separate file (!) and how it depends on the same
vectors
-->
> #### Raw text format
>

View File

@ -5,254 +5,55 @@ next: /usage/spacy-101
menu:
- ['Feature Comparison', 'comparison']
- ['Benchmarks', 'benchmarks']
# TODO: - ['Citing spaCy', 'citation']
---
## Feature comparison {#comparison}
## Comparison {#comparison hidden="true"}
Here's a quick comparison of the functionalities offered by spaCy,
[NLTK](http://www.nltk.org/py-modindex.html) and
[CoreNLP](http://stanfordnlp.github.io/CoreNLP/).
### When should I use spaCy? {#comparison-usage}
| | spaCy | NLTK | CoreNLP |
| ----------------------- | :----: | :----: | :-----------: |
| Programming language | Python | Python | Java / Python |
| Neural network models | ✅ | ❌ | ✅ |
| Integrated word vectors | ✅ | ❌ | ❌ |
| Multi-language support | ✅ | ✅ | ✅ |
| Tokenization | ✅ | ✅ | ✅ |
| Part-of-speech tagging | ✅ | ✅ | ✅ |
| Sentence segmentation | ✅ | ✅ | ✅ |
| Dependency parsing | ✅ | ❌ | ✅ |
| Entity recognition | ✅ | ✅ | ✅ |
| Entity linking | ✅ | ❌ | ❌ |
| Coreference resolution | ❌ | ❌ | ✅ |
<!-- TODO: update -->
### When should I use what? {#comparison-usage}
Natural Language Understanding is an active area of research and development, so
there are many different tools or technologies catering to different use-cases.
The table below summarizes a few libraries (spaCy,
[NLTK](http://www.nltk.org/py-modindex.html), [AllenNLP](https://allennlp.org/),
[StanfordNLP](https://stanfordnlp.github.io/stanfordnlp/) and
[TensorFlow](https://www.tensorflow.org/)) to help you get a feel for things fit
together.
| | spaCy | NLTK | Allen-<br />NLP | Stanford-<br />NLP | Tensor-<br />Flow |
| ----------------------------------------------------------------- | :---: | :--: | :-------------: | :----------------: | :---------------: |
| I'm a beginner and just getting started with NLP. | ✅ | ✅ | ❌ | ✅ | ❌ |
| I want to build an end-to-end production application. | ✅ | ❌ | ❌ | ❌ | ✅ |
| I want to try out different neural network architectures for NLP. | ❌ | ❌ | ✅ | ❌ | ✅ |
| I want to try the latest models with state-of-the-art accuracy. | ❌ | ❌ | ✅ | ✅ | ✅ |
| I want to train models from my own data. | ✅ | ✅ | ✅ | ✅ | ✅ |
| I want my application to be efficient on CPU. | ✅ | ✅ | ❌ | ❌ | ❌ |
| Use Cases |
| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| ✅ **I'm a beginner and just getting started with NLP.**<br />spaCy makes it easy to get started and comes with extensive documentation, including a beginner-friendly [101 guide](/usage/spacy-101) and a free interactive [online course](https://course.spacy.io). |
| ✅ **I want to build an end-to-end production application.** |
| ✅ **I want my application to be efficient on CPU.**<br />While spaCy lets you train modern NLP models that are best run on GPU, it also offers CPU-optimized pipelines, which may be less accurate but much cheaper to run. |
| ✅ **I want to try out different neural network architectures for NLP.** |
| ❌ **I want to build a language generation application.**<br />spaCy's focus is natural language _processing_ and extracting information from large volumes of text. While you can use it to help you re-write existing text, it doesn't include any specific functionality for language generation tasks. |
| ❌ **I want to research machine learning algorithms.** |
## Benchmarks {#benchmarks}
Two peer-reviewed papers in 2015 confirmed that spaCy offers the **fastest
syntactic parser in the world** and that **its accuracy is within 1% of the
best** available. The few systems that are more accurate are 20× slower or more.
spaCy v3.0 introduces transformer-based pipelines that bring spaCy's accuracy
right up to **current state-of-the-art**. You can also use a CPU-optimized
pipeline, which is less accurate but much cheaper to run.
> #### About the evaluation
<!-- TODO: -->
> #### Evaluation details
>
> The first of the evaluations was published by **Yahoo! Labs** and **Emory
> University**, as part of a survey of current parsing technologies
> ([Choi et al., 2015](https://aclweb.org/anthology/P/P15/P15-1038.pdf)). Their
> results and subsequent discussions helped us develop a novel
> psychologically-motivated technique to improve spaCy's accuracy, which we
> published in joint work with Macquarie University
> ([Honnibal and Johnson, 2015](https://www.aclweb.org/anthology/D/D15/D15-1162.pdf)).
> - **OntoNotes 5.0:** spaCy's English models are trained on this corpus, as
> it's several times larger than other English treebanks. However, most
> systems do not report accuracies on it.
> - **Penn Treebank:** The "classic" parsing evaluation for research. However,
> it's quite far removed from actual usage: it uses sentences with
> gold-standard segmentation and tokenization, from a pretty specific type of
> text (articles from a single newspaper, 1984-1989).
import BenchmarksChoi from 'usage/\_benchmarks-choi.md'
import Benchmarks from 'usage/\_benchmarks-models.md'
<BenchmarksChoi />
<Benchmarks />
### Algorithm comparison {#algorithm}
<!-- TODO: update -->
In this section, we compare spaCy's algorithms to recently published systems,
using some of the most popular benchmarks. These benchmarks are designed to help
isolate the contributions of specific algorithmic decisions, so they promote
slightly "idealized" conditions. Specifically, the text comes pre-processed with
"gold standard" token and sentence boundaries. The data sets also tend to be
fairly small, to help researchers iterate quickly. These conditions mean the
models trained on these data sets are not always useful for practical purposes.
<Project id="benchmarks/penn_treebank">
#### Parse accuracy (Penn Treebank / Wall Street Journal) {#parse-accuracy-penn}
The easiest way to reproduce spaCy's benchmarks on the Penn Treebank is to clone
our project template.
This is the "classic" evaluation, so it's the number parsing researchers are
most easily able to put in context. However, it's quite far removed from actual
usage: it uses sentences with gold-standard segmentation and tokenization, from
a pretty specific type of text (articles from a single newspaper, 1984-1989).
</Project>
> #### Methodology
>
> [Andor et al. (2016)](http://arxiv.org/abs/1603.06042) chose slightly
> different experimental conditions from
> [Choi et al. (2015)](https://aclweb.org/anthology/P/P15/P15-1038.pdf), so the
> two accuracy tables here do not present directly comparable figures.
<!-- ## Citing spaCy {#citation}
| System | Year | Type | Accuracy |
| ------------------------------------------------------------ | ---- | ------ | --------: |
| spaCy v2.0.0 | 2017 | neural | 94.48 |
| spaCy v1.1.0 | 2016 | linear | 92.80 |
| [Dozat and Manning][dozat and manning] | 2017 | neural | **95.75** |
| [Andor et al.][andor et al.] | 2016 | neural | 94.44 |
| [SyntaxNet Parsey McParseface][syntaxnet parsey mcparseface] | 2016 | neural | 94.15 |
| [Weiss et al.][weiss et al.] | 2015 | neural | 93.91 |
| [Zhang and McDonald][zhang and mcdonald] | 2014 | linear | 93.32 |
| [Martins et al.][martins et al.] | 2013 | linear | 93.10 |
[dozat and manning]: https://arxiv.org/pdf/1611.01734.pdf
[andor et al.]: http://arxiv.org/abs/1603.06042
[syntaxnet parsey mcparseface]:
https://github.com/tensorflow/models/tree/master/research/syntaxnet
[weiss et al.]:
http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43800.pdf
[zhang and mcdonald]: http://research.google.com/pubs/archive/38148.pdf
[martins et al.]: http://www.cs.cmu.edu/~ark/TurboParser/
#### NER accuracy (OntoNotes 5, no pre-process) {#ner-accuracy-ontonotes5}
This is the evaluation we use to tune spaCy's parameters to decide which
algorithms are better than the others. It's reasonably close to actual usage,
because it requires the parses to be produced from raw text, without any
pre-processing.
| System | Year | Type | Accuracy |
| -------------------------------------------------- | ---- | ------ | --------: |
| spaCy [`en_core_web_lg`][en_core_web_lg] v2.0.0a3  | 2017 | neural | 85.85 |
| [Strubell et al.][strubell et al.]  | 2017 | neural | **86.81** |
| [Chiu and Nichols][chiu and nichols]  | 2016 | neural | 86.19 |
| [Durrett and Klein][durrett and klein]  | 2014 | neural | 84.04 |
| [Ratinov and Roth][ratinov and roth]  | 2009 | linear | 83.45 |
[en_core_web_lg]: /models/en#en_core_web_lg
[strubell et al.]: https://arxiv.org/pdf/1702.02098.pdf
[chiu and nichols]:
https://www.semanticscholar.org/paper/Named-Entity-Recognition-with-Bidirectional-LSTM-C-Chiu-Nichols/10a4db59e81d26b2e0e896d3186ef81b4458b93f
[durrett and klein]:
https://www.semanticscholar.org/paper/A-Joint-Model-for-Entity-Analysis-Coreference-Typi-Durrett-Klein/28eb033eee5f51c5e5389cbb6b777779203a6778
[ratinov and roth]: http://www.aclweb.org/anthology/W09-1119
### Model comparison {#spacy-models}
In this section, we provide benchmark accuracies for the pretrained model
pipelines we distribute with spaCy. Evaluations are conducted end-to-end from
raw text, with no "gold standard" pre-processing, over text from a mix of genres
where possible.
> #### Methodology
>
> The evaluation was conducted on raw text with no gold standard information.
> The parser, tagger and entity recognizer were trained on the
> [OntoNotes 5](https://www.gabormelli.com/RKB/OntoNotes_Corpus) corpus, the
> word vectors on [Common Crawl](http://commoncrawl.org).
#### English {#benchmarks-models-english}
| Model | spaCy | Type | UAS | NER F | POS | WPS | Size |
| --------------------------------------------------- | ----- | ------ | -------: | -------: | -------: | --------: | -------: |
| [`en_core_web_sm`](/models/en#en_core_web_sm) 2.0.0 | 2.x | neural | 91.7 | 85.3 | 97.0 | 10.1k | **35MB** |
| [`en_core_web_md`](/models/en#en_core_web_md) 2.0.0 | 2.x | neural | 91.7 | **85.9** | 97.1 | 10.0k | 115MB |
| [`en_core_web_lg`](/models/en#en_core_web_lg) 2.0.0 | 2.x | neural | **91.9** | **85.9** | **97.2** | 10.0k | 812MB |
| `en_core_web_sm` 1.2.0 | 1.x | linear | 86.6 | 78.5 | 96.6 | **25.7k** | 50MB |
| `en_core_web_md` 1.2.1 | 1.x | linear | 90.6 | 81.4 | 96.7 | 18.8k | 1GB |
#### Spanish {#benchmarks-models-spanish}
> #### Evaluation note
>
> The NER accuracy refers to the "silver standard" annotations in the WikiNER
> corpus. Accuracy on these annotations tends to be higher than correct human
> annotations.
| Model | spaCy | Type | UAS | NER F | POS | WPS | Size |
| ----------------------------------------------------- | ----- | ------ | -------: | -------: | -------: | ----: | -------: |
| [`es_core_news_sm`](/models/es#es_core_news_sm) 2.0.0 | 2.x | neural | 89.8 | 88.7 | **96.9** | _n/a_ | **35MB** |
| [`es_core_news_md`](/models/es#es_core_news_md) 2.0.0 | 2.x | neural | **90.2** | 89.0 | 97.8 | _n/a_ | 93MB |
| `es_core_web_md` 1.1.0 | 1.x | linear | 87.5 | **94.2** | 96.7 | _n/a_ | 377MB |
### Detailed speed comparison {#speed-comparison}
Here we compare the per-document processing time of various spaCy
functionalities against other NLP libraries. We show both absolute timings (in
ms) and relative performance (normalized to spaCy). Lower is better.
<Infobox title="Important note" variant="warning">
This evaluation was conducted in 2015. We're working on benchmarks on current
CPU and GPU hardware. In the meantime, we're grateful to the Stanford folks for
drawing our attention to what seems to be
[a long-standing error](https://nlp.stanford.edu/software/tokenizer.html#Speed)
in our CoreNLP benchmarks, especially for their tokenizer. Until we run
corrected experiments, we have updated the table using their figures.
</Infobox>
> #### Methodology
>
> - **Set up:** 100,000 plain-text documents were streamed from an SQLite3
> database, and processed with an NLP library, to one of three levels of
> detail — tokenization, tagging, or parsing. The tasks are additive: to parse
> the text you have to tokenize and tag it. The pre-processing was not
> subtracted from the times — we report the time required for the pipeline to
> complete. We report mean times per document, in milliseconds.
> - **Hardware**: Intel i7-3770 (2012)
> - **Implementation**:
> [`spacy-benchmarks`](https://github.com/explosion/spacy-benchmarks)
<Table>
<thead>
<Tr>
<Th></Th>
<Th colSpan="3">Absolute (ms per doc)</Th>
<Th colSpan="3">Relative (to spaCy)</Th>
</Tr>
<Tr>
<Th>System</Th>
<Th>Tokenize</Th>
<Th>Tag</Th>
<Th>Parse</Th>
<Th>Tokenize</Th>
<Th>Tag</Th>
<Th>Parse</Th>
</Tr>
</thead>
<tbody style="text-align: right">
<Tr>
<Td style="text-align: left"><strong>spaCy</strong></Td>
<Td>0.2ms</Td>
<Td>1ms</Td>
<Td>19ms</Td>
<Td>1x</Td>
<Td>1x</Td>
<Td>1x</Td>
</Tr>
<Tr>
<Td style="text-align: left">CoreNLP</Td>
<Td>0.18ms</Td>
<Td>10ms</Td>
<Td>49ms</Td>
<Td>0.9x</Td>
<Td>10x</Td>
<Td>2.6x</Td>
</Tr>
<Tr>
<Td style="text-align: left">ZPar</Td>
<Td>1ms</Td>
<Td>8ms</Td>
<Td>850ms</Td>
<Td>5x</Td>
<Td>8x</Td>
<Td>44.7x</Td>
</Tr>
<Tr>
<Td style="text-align: left">NLTK</Td>
<Td>4ms</Td>
<Td>443ms</Td>
<Td><em>n/a</em></Td>
<Td>20x</Td>
<Td>443x</Td>
<Td><em>n/a</em></Td>
</Tr>
</tbody>
</Table>
<!-- TODO: update -->

View File

@ -166,10 +166,9 @@ $ python setup.py build_ext --inplace # compile spaCy
```
Compared to regular install via pip, the
[`requirements.txt`](https://github.com/explosion/spaCy/tree/master/requirements.txt)
additionally installs developer dependencies such as Cython. See the
[quickstart widget](#quickstart) to get the right commands for your platform and
Python version.
[`requirements.txt`](%%GITHUB_SPACY/requirements.txt) additionally installs
developer dependencies such as Cython. See the [quickstart widget](#quickstart)
to get the right commands for your platform and Python version.
#### Ubuntu {#source-ubuntu}
@ -195,16 +194,14 @@ that matches the version that was used to compile your Python interpreter.
### Run tests {#run-tests}
spaCy comes with an
[extensive test suite](https://github.com/explosion/spaCy/tree/master/spacy/tests).
In order to run the tests, you'll usually want to clone the
[repository](https://github.com/explosion/spaCy/tree/master/) and
[build spaCy from source](#source). This will also install the required
spaCy comes with an [extensive test suite](%%GITHUB_SPACY/spacy/tests). In order
to run the tests, you'll usually want to clone the [repository](%%GITHUB_SPACY)
and [build spaCy from source](#source). This will also install the required
development dependencies and test utilities defined in the `requirements.txt`.
Alternatively, you can find out where spaCy is installed and run `pytest` on
that directory. Don't forget to also install the test utilities via spaCy's
[`requirements.txt`](https://github.com/explosion/spaCy/tree/master/requirements.txt):
[`requirements.txt`](%%GITHUB_SPACY/requirements.txt):
```bash
$ python -c "import os; import spacy; print(os.path.dirname(spacy.__file__))"

View File

@ -28,9 +28,9 @@ A **model architecture** is a function that wires up a
neural network that is run internally as part of a component in a spaCy
pipeline. To define the actual architecture, you can implement your logic in
Thinc directly, or you can use Thinc as a thin wrapper around frameworks such as
PyTorch, TensorFlow and MXNet. Each Model can also be used as a sublayer of a
PyTorch, TensorFlow and MXNet. Each `Model` can also be used as a sublayer of a
larger network, allowing you to freely combine implementations from different
frameworks into one `Thinc` Model.
frameworks into a single model.
spaCy's built-in components require a `Model` instance to be passed to them via
the config system. To change the model architecture of an existing component,
@ -253,7 +253,7 @@ torch_model = nn.Sequential(
nn.ReLU(),
nn.Dropout2d(dropout),
nn.Softmax(dim=1)
)
)
```
The resulting wrapped `Model` can be used as a **custom architecture** as such,
@ -264,9 +264,10 @@ larger network. This effectively means that you can easily wrap different
components from different frameworks, and "glue" them together with Thinc:
```python
from thinc.api import chain, with_array
from thinc.api import chain, with_array, PyTorchWrapper
from spacy.ml import CharacterEmbed
wrapped_pt_model = PyTorchWrapper(torch_model)
char_embed = CharacterEmbed(width, embed_size, nM, nC)
model = chain(char_embed, with_array(wrapped_pt_model))
```
@ -473,18 +474,17 @@ with Model.define_operators({">>": chain}):
## Create new trainable components {#components}
<!-- TODO:
<Infobox title="This section is still under construction" emoji="🚧" variant="warning">
</Infobox>
<!-- TODO:
- Interaction with `predict`, `get_loss` and `set_annotations`
- Initialization life-cycle with `begin_training`, correlation with add_label
Example: relation extraction component (implemented as project template)
Avoid duplication with usage/processing-pipelines#trainable-components ?
-->
![Diagram of a pipeline component with its model](../images/layers-architectures.svg)
<!-- ![Diagram of a pipeline component with its model](../images/layers-architectures.svg)
```python
def update(self, examples):
@ -498,3 +498,4 @@ def __call__(self, doc):
predictions = self.model([doc])
self.set_annotations(predictions)
```
-->

View File

@ -854,24 +854,22 @@ The algorithm can be summarized as follows:
</Accordion>
**Global** and **language-specific** tokenizer data is supplied via the language
data in
[`spacy/lang`](https://github.com/explosion/spaCy/tree/master/spacy/lang). The
tokenizer exceptions define special cases like "don't" in English, which needs
to be split into two tokens: `{ORTH: "do"}` and `{ORTH: "n't", NORM: "not"}`.
The prefixes, suffixes and infixes mostly define punctuation rules for
example, when to split off periods (at the end of a sentence), and when to leave
tokens containing periods intact (abbreviations like "U.S.").
data in [`spacy/lang`](%%GITHUB_SPACY/spacy/lang). The tokenizer exceptions
define special cases like "don't" in English, which needs to be split into two
tokens: `{ORTH: "do"}` and `{ORTH: "n't", NORM: "not"}`. The prefixes, suffixes
and infixes mostly define punctuation rules for example, when to split off
periods (at the end of a sentence), and when to leave tokens containing periods
intact (abbreviations like "U.S.").
<Accordion title="Should I change the language data or add custom tokenizer rules?" id="lang-data-vs-tokenizer">
Tokenization rules that are specific to one language, but can be **generalized
across that language** should ideally live in the language data in
[`spacy/lang`](https://github.com/explosion/spaCy/tree/master/spacy/lang)  we
always appreciate pull requests! Anything that's specific to a domain or text
type like financial trading abbreviations, or Bavarian youth slang should be
added as a special case rule to your tokenizer instance. If you're dealing with
a lot of customizations, it might make sense to create an entirely custom
subclass.
[`spacy/lang`](%%GITHUB_SPACY/spacy/lang)  we always appreciate pull requests!
Anything that's specific to a domain or text type like financial trading
abbreviations, or Bavarian youth slang should be added as a special case rule
to your tokenizer instance. If you're dealing with a lot of customizations, it
might make sense to create an entirely custom subclass.
</Accordion>
@ -1059,7 +1057,7 @@ but also detailed regular expressions that take the surrounding context into
account. For example, there is a regular expression that treats a hyphen between
letters as an infix. If you do not want the tokenizer to split on hyphens
between letters, you can modify the existing infix definition from
[`lang/punctuation.py`](https://github.com/explosion/spaCy/blob/master/spacy/lang/punctuation.py):
[`lang/punctuation.py`](%%GITHUB_SPACY/spacy/lang/punctuation.py):
```python
### {executable="true"}
@ -1096,10 +1094,10 @@ print([t.text for t in doc]) # ['mother-in-law']
```
For an overview of the default regular expressions, see
[`lang/punctuation.py`](https://github.com/explosion/spaCy/blob/master/spacy/lang/punctuation.py)
and language-specific definitions such as
[`lang/de/punctuation.py`](https://github.com/explosion/spaCy/blob/master/spacy/lang/de/punctuation.py)
for German.
[`lang/punctuation.py`](%%GITHUB_SPACY/spacy/lang/punctuation.py) and
language-specific definitions such as
[`lang/de/punctuation.py`](%%GITHUB_SPACY/spacy/lang/de/punctuation.py) for
German.
### Hooking a custom tokenizer into the pipeline {#custom-tokenizer}

View File

@ -76,7 +76,7 @@ spaCy also supports pipelines trained on more than one language. This is
especially useful for named entity recognition. The language ID used for
multi-language or language-neutral pipelines is `xx`. The language class, a
generic subclass containing only the base language data, can be found in
[`lang/xx`](https://github.com/explosion/spaCy/tree/master/spacy/lang/xx).
[`lang/xx`](%%GITHUB_SPACY/spacy/lang/xx).
To train a pipeline using the neutral multi-language class, you can set
`lang = "xx"` in your [training config](/usage/training#config). You can also

View File

@ -728,18 +728,21 @@ workflows, but only one can be tracked by DVC.
</Infobox>
<Project id="integrations/dvc">
<!-- TODO: <Project id="integrations/dvc">
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus interdum
sodales lectus, ut sodales orci ullamcorper id. Sed condimentum neque ut erat
mattis pretium.
</Project>
</Project> -->
---
### Prodigy {#prodigy} <IntegrationLogo name="prodigy" width={100} height="auto" align="right" />
<Infobox title="This section is still under construction" emoji="🚧" variant="warning">
The Prodigy integration will require a nightly version of Prodigy that supports
spaCy v3+.
</Infobox>
[Prodigy](https://prodi.gy) is a modern annotation tool for creating training
data for machine learning models, developed by us. It integrates with spaCy
out-of-the-box and provides many different
@ -795,9 +798,7 @@ results.
<Project id="integrations/prodigy">
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus interdum
sodales lectus, ut sodales orci ullamcorper id. Sed condimentum neque ut erat
mattis pretium.
<!-- TODO: -->
</Project>
@ -805,10 +806,6 @@ mattis pretium.
### Streamlit {#streamlit} <IntegrationLogo name="streamlit" width={150} height="auto" align="right" />
<Grid cols={2} gutterBottom={false}>
<div>
[Streamlit](https://streamlit.io) is a Python framework for building interactive
data apps. The [`spacy-streamlit`](https://github.com/explosion/spacy-streamlit)
package helps you integrate spaCy visualizations into your Streamlit apps and
@ -817,16 +814,14 @@ full embedded visualizer, as well as individual components.
<!-- TODO: update once version is stable -->
```bash
$ pip install "spacy_streamlit>=1.0.0a0"
```
</div>
> #### Installation
>
> ```bash
> $ pip install "spacy_streamlit>=1.0.0a0"
> ```
![](../images/spacy-streamlit.png)
</Grid>
Using [`spacy-streamlit`](https://github.com/explosion/spacy-streamlit), your
projects can easily define their own scripts that spin up an interactive
visualizer, using the latest pipeline you trained, or a selection of pipelines
@ -917,10 +912,43 @@ https://github.com/explosion/projects/blob/v3/integrations/fastapi/scripts/main.
### Ray {#ray} <IntegrationLogo name="ray" width={100} height="auto" align="right" />
<Infobox title="This section is still under construction" emoji="🚧" variant="warning">
</Infobox>
<!-- TODO: document -->
---
### Weights & Biases {#wandb} <IntegrationLogo name="wandb" width={175} height="auto" align="right" />
<!-- TODO: link to WandB logger, explain that it's built-in but that you can also do other cool stuff with WandB? And then include example project (still need to decide what we want to do here) -->
[Weights & Biases](https://www.wandb.com/) is a popular platform for experiment
tracking. spaCy integrates with it out-of-the-box via the
[`WandbLogger`](/api/top-level#WandbLogger), which you can add as the
`[training.logger]` block of your training [config](/usage/training#config). The
results of each step are then logged in your project, together with the full
**training config**. This means that _every_ hyperparameter, registered function
name and argument will be tracked and you'll be able to see the impact it has on
your results.
> #### Example config
>
> ```ini
> [training.logger]
> @loggers = "spacy.WandbLogger.v1"
> project_name = "monitor_spacy_training"
> remove_config_values = ["paths.train", "paths.dev", "training.dev_corpus.path", "training.train_corpus.path"]
> ```
![Screenshot: Visualized training results](../images/wandb1.jpg)
![Screenshot: Parameter importance using config values](../images/wandb2.jpg 'Parameter importance using config values')
<Project id="integrations/wandb">
Get started with tracking your spaCy training runs in Weights & Biases using our
project template. It includes a simple config using the `WandbLogger`, as well
as a custom logger implementation you can adjust for your specific use case.
<!-- TODO: -->
</Project>

View File

@ -192,12 +192,11 @@ of [`Token`](/api/token). This means that all of the attributes that refer to
computed properties can't be accessed.
The uppercase attribute names like `LOWER` or `IS_PUNCT` refer to symbols from
the
[`spacy.attrs`](https://github.com/explosion/spaCy/tree/master/spacy/attrs.pyx)
enum table. They're passed into a function that essentially is a big case/switch
statement, to figure out which struct field to return. The same attribute
identifiers are used in [`Doc.to_array`](/api/doc#to_array), and a few other
places in the code where you need to describe fields like this.
the [`spacy.attrs`](%%GITHUB_SPACY/spacy/attrs.pyx) enum table. They're passed
into a function that essentially is a big case/switch statement, to figure out
which struct field to return. The same attribute identifiers are used in
[`Doc.to_array`](/api/doc#to_array), and a few other places in the code where
you need to describe fields like this.
</Accordion>

View File

@ -187,11 +187,11 @@ add to that data and saves and loads the data to and from a JSON file.
>
> To see custom serialization methods in action, check out the new
> [`EntityRuler`](/api/entityruler) component and its
> [source](https://github.com/explosion/spaCy/tree/master/spacy/pipeline/entityruler.py).
> Patterns added to the component will be saved to a `.jsonl` file if the
> pipeline is serialized to disk, and to a bytestring if the pipeline is
> serialized to bytes. This allows saving out a pipeline with a rule-based
> entity recognizer and including all rules _with_ the component data.
> [source](%%GITHUB_SPACY/spacy/pipeline/entityruler.py). Patterns added to the
> component will be saved to a `.jsonl` file if the pipeline is serialized to
> disk, and to a bytestring if the pipeline is serialized to bytes. This allows
> saving out a pipeline with a rule-based entity recognizer and including all
> rules _with_ the component data.
```python
### {highlight="14-18,20-25"}

View File

@ -494,7 +494,7 @@ regressions to the parts of the library that you care about the most.
**For more details on the types of contributions we're looking for, the code
conventions and other useful tips, make sure to check out the
[contributing guidelines](https://github.com/explosion/spaCy/tree/master/CONTRIBUTING.md).**
[contributing guidelines](%%GITHUB_SPACY/CONTRIBUTING.md).**
<Infobox title="Code of Conduct" variant="warning">

View File

@ -59,7 +59,7 @@ specific use case. It's also available in spaCy as the
import QuickstartTraining from 'widgets/quickstart-training.js'
<QuickstartTraining download="base_config.cfg" />
<QuickstartTraining />
After you've saved the starter config to a file `base_config.cfg`, you can use
the [`init fill-config`](/api/cli#init-fill-config) command to fill in the
@ -127,7 +127,7 @@ Some of the main advantages and features of spaCy's training config are:
config which types of data to expect.
```ini
https://github.com/explosion/spaCy/blob/develop/spacy/default_config.cfg
%%GITHUB_SPACY/spacy/default_config.cfg
```
Under the hood, the config is parsed into a dictionary. It's divided into

View File

@ -76,9 +76,7 @@ noise contrastive estimation or reinforcement learning.
## New features {#features}
This section contains an overview of the most important **new features and
improvements**. The [API docs](/api) include additional deprecation notes. New
methods and functions that were introduced in this version are marked with the
tag <Tag variant="new">2</Tag>.
improvements**. The [API docs](/api) include additional deprecation notes.
### Convolutional neural network models {#features-models}

View File

@ -8,20 +8,30 @@ menu:
- ['Migrating from v2.x', 'migrating']
---
## Summary {#summary}
## Summary {#summary hidden="true"}
<Grid cols={2}>
<Grid cols={2} gutterBottom={false}>
<div>
spaCy v3.0 features all new **transformer-based pipelines** that bring spaCy's
accuracy right up to the current **state-of-the-art**. You can use any
pretrained transformer to train your own pipelines, and even share one
transformer between multiple components with **multi-task learning**. Training
is now fully configurable and extensible, and you can define your own custom
models using **PyTorch**, **TensorFlow** and other frameworks. The new spaCy
projects system lets you describe whole **end-to-end workflows** in a single
file, giving you an easy path from prototype to production, and making it easy
to clone and adapt best-practice projects for your own use cases.
</div>
<Infobox title="Table of Contents" id="toc">
- [Summary](#summary)
- [New features](#features)
- [Training & config system](#features-training)
- [Transformer-based pipelines](#features-transformers)
- [Training & config system](#features-training)
- [Custom models](#features-custom-models)
- [End-to-end project workflows](#features-projects)
- [New built-in components](#features-pipeline-components)
@ -39,47 +49,126 @@ menu:
## New Features {#features}
### New training workflow and config system {#features-training}
<Infobox title="Details & Documentation" emoji="📖" list>
- **Usage:** [Training pipelines and models](/usage/training)
- **Thinc:** [Thinc's config system](https://thinc.ai/docs/usage-config),
[`Config`](https://thinc.ai/docs/api-config#config)
- **CLI:** [`train`](/api/cli#train), [`pretrain`](/api/cli#pretrain),
[`evaluate`](/api/cli#evaluate)
- **API:** [Config format](/api/data-formats#config),
[`registry`](/api/top-level#registry)
</Infobox>
This section contains an overview of the most important **new features and
improvements**. The [API docs](/api) include additional deprecation notes. New
methods and functions that were introduced in this version are marked with the
tag <Tag variant="new">3</Tag>.
### Transformer-based pipelines {#features-transformers}
> #### Example
>
> ```cli
> $ python -m spacy download en_core_web_trf
> ```
spaCy v3.0 features all new transformer-based pipelines that bring spaCy's
accuracy right up to the current **state-of-the-art**. You can use any
pretrained transformer to train your own pipelines, and even share one
transformer between multiple components with **multi-task learning**. spaCy's
transformer support interoperates with [PyTorch](https://pytorch.org) and the
[HuggingFace `transformers`](https://huggingface.co/transformers/) library,
giving you access to thousands of pretrained models for your pipelines.
![Pipeline components listening to shared embedding component](../images/tok2vec-listener.svg)
import Benchmarks from 'usage/\_benchmarks-models.md'
<Benchmarks />
<Infobox title="Details & Documentation" emoji="📖" list>
- **Usage:** [Embeddings & Transformers](/usage/embeddings-transformers),
[Training pipelines and models](/usage/training)
[Training pipelines and models](/usage/training),
[Benchmarks](/usage/facts-figures#benchmarks)
- **API:** [`Transformer`](/api/transformer),
[`TransformerData`](/api/transformer#transformerdata),
[`FullTransformerBatch`](/api/transformer#fulltransformerbatch)
- **Architectures: ** [TransformerModel](/api/architectures#TransformerModel),
[TransformerListener](/api/architectures#TransformerListener),
[Tok2VecTransformer](/api/architectures#Tok2VecTransformer)
- **Trained Pipelines:** [`en_core_trf_lg_sm`](/models/en)
- **Trained Pipelines:** [`en_core_web_trf`](/models/en#en_core_web_trf)
- **Implementation:**
[`spacy-transformers`](https://github.com/explosion/spacy-transformers)
</Infobox>
### New training workflow and config system {#features-training}
> #### Example
>
> ```ini
> [training]
> vectors = null
> accumulate_gradient = 3
>
> [training.optimizer]
> @optimizers = "Adam.v1"
>
> [training.optimizer.learn_rate]
> @schedules = "warmup_linear.v1"
> warmup_steps = 250
> total_steps = 20000
> initial_rate = 0.01
> ```
spaCy v3.0 introduces a comprehensive and extensible system for **configuring
your training runs**. A single configuration file describes every detail of your
training run, with no hidden defaults, making it easy to rerun your experiments
and track changes. You can use the
[quickstart widget](/usage/training#quickstart) or the `init config` command to
get started. Instead of providing lots of arguments on the command line, you
only need to pass your `config.cfg` file to `spacy train`.
Training config files include all **settings and hyperparameters** for training
your pipeline. Some settings can also be registered **functions** that you can
swap out and customize, making it easy to implement your own custom models and
architectures.
<Infobox title="Details & Documentation" emoji="📖" list>
- **Usage:** [Training pipelines and models](/usage/training)
- **Thinc:** [Thinc's config system](https://thinc.ai/docs/usage-config),
[`Config`](https://thinc.ai/docs/api-config#config)
- **CLI:** [`init config`](/api/cli#init-config),
[`init fill-config`](/api/cli#init-fill-config), [`train`](/api/cli#train),
[`pretrain`](/api/cli#pretrain), [`evaluate`](/api/cli#evaluate)
- **API:** [Config format](/api/data-formats#config),
[`registry`](/api/top-level#registry)
</Infobox>
### Custom models using any framework {#features-custom-models}
> #### Example
>
> ```python
> from torch import nn
> from thinc.api import PyTorchWrapper
>
> torch_model = nn.Sequential(
> nn.Linear(32, 32),
> nn.ReLU(),
> nn.Softmax(dim=1)
> )
> model = PyTorchWrapper(torch_model)
> ```
spaCy's new configuration system makes it easy to customize the neural network
models used by the different pipeline components. You can also implement your
own architectures via spaCy's machine learning library [Thinc](https://thinc.ai)
that provides various layers and utilities, as well as thin wrappers around
frameworks like **PyTorch**, **TensorFlow** and **MXNet**. Component models all
follow the same unified [`Model`](https://thinc.ai/docs/api-model) API and each
`Model` can also be used as a sublayer of a larger network, allowing you to
freely combine implementations from different frameworks into a single model.
<Infobox title="Details & Documentation" emoji="📖" list>
- **Usage: ** [Layers and architectures](/usage/layers-architectures)
- **Thinc: **
[Wrapping PyTorch, TensorFlow & MXNet](https://thinc.ai/docs/usage-frameworks)
[Wrapping PyTorch, TensorFlow & MXNet](https://thinc.ai/docs/usage-frameworks),
[`Model` API](https://thinc.ai/docs/api-model)
- **API:** [Model architectures](/api/architectures), [`Pipe`](/api/pipe)
</Infobox>
@ -159,8 +248,7 @@ add to your pipeline and customize for your use case:
- **Usage:** [Processing pipelines](/usage/processing-pipelines)
- **API:** [Built-in pipeline components](/api#architecture-pipeline)
- **Implementation:**
[`spacy/pipeline`](https://github.com/explosion/spaCy/tree/develop/spacy/pipeline)
- **Implementation:** [`spacy/pipeline`](%%GITHUB_SPACY/spacy/pipeline)
</Infobox>
@ -197,15 +285,12 @@ aren't set.
[`@Language.factory`](/api/language#factory),
[`Language.add_pipe`](/api/language#add_pipe),
[`Language.analyze_pipes`](/api/language#analyze_pipes)
- **Implementation:**
[`spacy/language.py`](https://github.com/explosion/spaCy/tree/develop/spacy/language.py)
- **Implementation:** [`spacy/language.py`](%%GITHUB_SPACY/spacy/language.py)
</Infobox>
### Dependency matching {#features-dep-matcher}
<!-- TODO: improve summary -->
> #### Example
>
> ```python
@ -233,7 +318,7 @@ dictionaries**, with each dictionary describing a **token to match** and its
[Dependency matching](/usage/rule-based-matching#dependencymatcher),
- **API:** [`DependencyMatcher`](/api/dependencymatcher),
- **Implementation:**
[`spacy/matcher/dependencymatcher.pyx`](https://github.com/explosion/spaCy/tree/develop/spacy/matcher/dependencymatcher.pyx)
[`spacy/matcher/dependencymatcher.pyx`](%%GITHUB_SPACY/spacy/matcher/dependencymatcher.pyx)
</Infobox>
@ -404,11 +489,12 @@ Note that spaCy v3.0 now requires **Python 3.6+**.
[`Pipe.begin_training`](/api/pipe#begin_training) now take a function that
returns a sequence of `Example` objects to initialize the model instead of a
list of tuples.
- [`Matcher.add`](/api/matcher#add),
[`PhraseMatcher.add`](/api/phrasematcher#add) and
[`DependencyMatcher.add`](/api/dependencymatcher#add) now only accept a list
of patterns as the second argument (instead of a variable number of
arguments). The `on_match` callback becomes an optional keyword argument.
- [`Matcher.add`](/api/matcher#add) and
[`PhraseMatcher.add`](/api/phrasematcher#add) now only accept a list of
patterns as the second argument (instead of a variable number of arguments).
The `on_match` callback becomes an optional keyword argument.
- The `spacy.gold` module has been renamed to
[`spacy.training`](%%GITHUB_SPACY/spacy/training).
- The `PRON_LEMMA` symbol and `-PRON-` as an indicator for pronoun lemmas has
been removed.
- The `TAG_MAP` and `MORPH_RULES` in the language data have been replaced by the
@ -779,6 +865,20 @@ python -m spacy package ./output ./packages
- python setup.py sdist
```
#### Data utilities and gold module {#migrating-gold}
The `spacy.gold` module has been renamed to `spacy.training`. This mostly
affects internals, but if you've been using the span offset conversion utilities
[`biluo_tags_from_offsets`](/api/top-level#biluo_tags_from_offsets),
[`offsets_from_biluo_tags`](/api/top-level#offsets_from_biluo_tags) or
[`spans_from_biluo_tags`](/api/top-level#spans_from_biluo_tags), you'll have to
change your imports:
```diff
- from spacy.gold import biluo_tags_from_offsets, spans_from_biluo_tags
+ from spacy.training import biluo_tags_from_offsets, spans_from_biluo_tags
```
#### Migration notes for plugin maintainers {#migrating-plugins}
Thanks to everyone who's been contributing to the spaCy ecosystem by developing

View File

@ -8,7 +8,6 @@ const codeBlocksPlugin = require('./src/plugins/remark-code-blocks.js')
// Import metadata
const site = require('./meta/site.json')
const logos = require('./meta/logos.json')
const sidebars = require('./meta/sidebars.json')
const models = require('./meta/languages.json')
const universe = require('./meta/universe.json')
@ -20,11 +19,16 @@ const favicon = isNightly ? `src/images/icon_nightly.png` : `src/images/icon.png
const binderBranch = isNightly ? 'nightly' : site.binderBranch
const siteUrl = isNightly ? site.siteUrlNightly : site.siteUrl
const domain = isNightly ? site.domainNightly : site.domain
const branch = isNightly ? 'develop' : 'master'
// Those variables are going to be replaced in the Markdown, e.g. %%GITHUB_SPACY
const replacements = {
GITHUB_SPACY: `https://github.com/explosion/spaCy/tree/${branch}`,
}
module.exports = {
siteMetadata: {
...site,
...logos,
sidebars,
...models,
universe,
@ -121,6 +125,13 @@ module.exports = {
{
resolve: `gatsby-remark-copy-linked-files`,
},
{
resolve: 'gatsby-remark-find-replace',
options: {
replacements,
prefix: '%%',
},
},
],
},
},

View File

@ -1,37 +0,0 @@
{
"logosUsers": [
{ "id": "airbnb", "url": "https://www.airbnb.com" },
{ "id": "uber", "url": "https://www.uber.com" },
{ "id": "quora", "url": "https://www.quora.com" },
{ "id": "retriever", "url": "https://www.retriever.no" },
{ "id": "stitchfix", "url": "https://www.stitchfix.com" },
{ "id": "chartbeat", "url": "https://chartbeat.com" },
{ "id": "allenai", "url": "https://allenai.org" }
],
"logosPublications": [
{
"id": "recode",
"url": "https://www.recode.net/2017/6/22/15855492/ai-artificial-intelligence-nonprofit-good-human-chatbots-machine-learning"
},
{
"id": "wapo",
"url": "https://www.washingtonpost.com/news/wonk/wp/2016/05/18/googles-new-artificial-intelligence-cant-understand-these-sentences-can-you/"
},
{
"id": "bbc",
"url": "http://www.bbc.co.uk/rd/blog/2017-08-irfs-weeknotes-number-250"
},
{
"id": "microsoft",
"url": "https://www.microsoft.com/developerblog/2016/09/13/training-a-classifier-for-relation-extraction-from-medical-literature/"
},
{
"id": "venturebeat",
"url": "https://venturebeat.com/2017/01/27/4-ai-startups-that-analyze-customer-reviews/"
},
{
"id": "thoughtworks",
"url": "https://www.thoughtworks.com/radar/tools"
}
]
}

View File

@ -28,7 +28,7 @@
},
"binderUrl": "explosion/spacy-io-binder",
"binderBranch": "live",
"binderVersion": "2.3.0",
"binderVersion": "3.0.0",
"sections": [
{ "id": "usage", "title": "Usage Documentation", "theme": "blue" },
{ "id": "models", "title": "Models Documentation", "theme": "blue" },
@ -47,20 +47,19 @@
"items": [
{ "text": "Usage", "url": "/usage" },
{ "text": "Models", "url": "/models" },
{ "text": "API", "url": "/api" },
{ "text": "Universe", "url": "/universe" }
{ "text": "API Reference", "url": "/api" },
{ "text": "Online Course", "url": "https://course.spacy.io" }
]
},
{
"label": "Support",
"label": "Community",
"items": [
{ "text": "Universe", "url": "/universe" },
{ "text": "Issue Tracker", "url": "https://github.com/explosion/spaCy/issues" },
{
"text": "Stack Overflow",
"url": "http://stackoverflow.com/questions/tagged/spacy"
},
{ "text": "Reddit User Group", "url": "https://www.reddit.com/r/spacynlp/" },
{ "text": "Gitter Chat", "url": "https://gitter.im/explosion/spaCy" }
}
]
},
{

View File

@ -14238,6 +14238,46 @@
}
}
},
"gatsby-remark-find-replace": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/gatsby-remark-find-replace/-/gatsby-remark-find-replace-0.3.0.tgz",
"integrity": "sha512-tTXt+ZxD+7hEVtZVbZVrifcQUk2mt4uJNUHhc9cje+93sDa4PrrFBbny9IWgXLj9QH9xDxWOZrI768ApMtbPUQ==",
"requires": {
"escape-string-regexp": "^2.0.0",
"unist-util-visit": "^2.0.1"
},
"dependencies": {
"escape-string-regexp": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-2.0.0.tgz",
"integrity": "sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w=="
},
"unist-util-is": {
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-4.0.2.tgz",
"integrity": "sha512-Ofx8uf6haexJwI1gxWMGg6I/dLnF2yE+KibhD3/diOqY2TinLcqHXCV6OI5gFVn3xQqDH+u0M625pfKwIwgBKQ=="
},
"unist-util-visit": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-2.0.3.tgz",
"integrity": "sha512-iJ4/RczbJMkD0712mGktuGpm/U4By4FfDonL7N/9tATGIF4imikjOuagyMY53tnZq3NP6BcmlrHhEKAfGWjh7Q==",
"requires": {
"@types/unist": "^2.0.0",
"unist-util-is": "^4.0.0",
"unist-util-visit-parents": "^3.0.0"
}
},
"unist-util-visit-parents": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-3.1.0.tgz",
"integrity": "sha512-0g4wbluTF93npyPrp/ymd3tCDTMnP0yo2akFD2FIBAYXq/Sga3lwaU1D8OYKbtpioaI6CkDcQ6fsMnmtzt7htw==",
"requires": {
"@types/unist": "^2.0.0",
"unist-util-is": "^4.0.0"
}
}
}
},
"gatsby-remark-images": {
"version": "3.0.4",
"resolved": "https://registry.npmjs.org/gatsby-remark-images/-/gatsby-remark-images-3.0.4.tgz",
@ -22152,6 +22192,14 @@
"clipboard": "^2.0.0"
}
},
"prismjs-bibtex": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/prismjs-bibtex/-/prismjs-bibtex-1.1.0.tgz",
"integrity": "sha512-IjZUJP3iTkV1DZ8qcjUF7p7Ji/LPns56jw+INUBPtnBaX4Q/VhtzlRGHM0lxSvdfqUvqgTGF3oM8aogWqzZz2g==",
"requires": {
"prismjs": "^1.15"
}
},
"private": {
"version": "0.1.8",
"resolved": "https://registry.npmjs.org/private/-/private-0.1.8.tgz",

View File

@ -31,6 +31,7 @@
"gatsby-plugin-sitemap": "^2.0.5",
"gatsby-plugin-svgr": "^2.0.1",
"gatsby-remark-copy-linked-files": "^2.0.9",
"gatsby-remark-find-replace": "^0.3.0",
"gatsby-remark-images": "^3.0.4",
"gatsby-remark-prismjs": "^3.2.4",
"gatsby-remark-smartypants": "^2.0.8",
@ -44,6 +45,7 @@
"node-sass": "^4.11.0",
"parse-numeric-range": "0.0.2",
"prismjs": "^1.15.0",
"prismjs-bibtex": "^1.1.0",
"prop-types": "^15.7.2",
"react": "^16.8.2",
"react-dom": "^16.8.2",

View File

@ -2,6 +2,7 @@ import React, { Fragment } from 'react'
import PropTypes from 'prop-types'
import classNames from 'classnames'
import highlightCode from 'gatsby-remark-prismjs/highlight-code.js'
import 'prismjs-bibtex'
import rangeParser from 'parse-numeric-range'
import { StaticQuery, graphql } from 'gatsby'
import { window } from 'browser-monads'

View File

@ -9,6 +9,7 @@ export default function Grid({
narrow = false,
gutterBottom = true,
className,
style,
children,
}) {
const gridClassNames = classNames(classes.root, className, {
@ -18,7 +19,11 @@ export default function Grid({
[classes.third]: cols === 3,
[classes.quarter]: cols === 4,
})
return <div className={gridClassNames}>{children}</div>
return (
<div className={gridClassNames} style={style}>
{children}
</div>
)
}
Grid.propTypes = {

View File

@ -23,7 +23,7 @@ export default function Infobox({
<aside className={infoboxClassNames} id={id}>
{title && (
<h4 className={classes.title}>
{variant !== 'default' && (
{variant !== 'default' && !emoji && (
<Icon width={18} name={variant} inline className={classes.icon} />
)}
<span className={classes.titleText}>

View File

@ -1,19 +1,17 @@
import React, { Fragment } from 'react'
import React from 'react'
import classNames from 'classnames'
import pattern from '../images/pattern_blue.jpg'
import patternNightly from '../images/pattern_nightly.jpg'
import patternOverlay from '../images/pattern_landing.jpg'
import patternOverlayNightly from '../images/pattern_landing_nightly.jpg'
import logoSvgs from '../images/logos'
import Grid from './grid'
import { Content } from './main'
import Button from './button'
import CodeBlock from './code'
import { H1, H2, H3, Label, InlineList } from './typography'
import { H1, H2, H3 } from './typography'
import Link from './link'
import { chunkArray } from './util'
import classes from '../styles/landing.module.sass'
export const LandingHeader = ({ nightly, style = {}, children }) => {
@ -39,9 +37,9 @@ export const LandingSubtitle = ({ children }) => (
</h2>
)
export const LandingGrid = ({ cols = 3, blocks = false, children }) => (
export const LandingGrid = ({ cols = 3, blocks = false, style, children }) => (
<Content className={classNames(classes.grid, { [classes.blocks]: blocks })}>
<Grid cols={cols} narrow={blocks}>
<Grid cols={cols} narrow={blocks} style={style}>
{children}
</Grid>
</Content>
@ -142,33 +140,3 @@ export const LandingBannerButton = ({ to, small, children }) => (
</Button>
</div>
)
export const LandingLogos = ({ logos = [], title, maxRow = 4, children }) => {
const rows = chunkArray(logos, maxRow)
return (
<Content className={classes.logos}>
{title && <Label>{title}</Label>}
{rows.map((logos, i) => (
<Fragment key={i}>
<InlineList className={classes.logosContent}>
{logos.map(({ id, url }, j) => {
const Component = logoSvgs[id]
return !Component ? null : (
<Link
to={url}
key={j}
aria-label={id}
hidden
className={classes.logo}
>
<Component />
</Link>
)
})}
{i === rows.length - 1 && children}
</InlineList>
</Fragment>
))}
</Content>
)
}

View File

@ -1,7 +1,8 @@
import React from 'react'
import React, { Fragment } from 'react'
import classNames from 'classnames'
import Icon from './icon'
import { Help } from './typography'
import { isString } from './util'
import classes from '../styles/table.module.sass'
@ -16,14 +17,26 @@ function getCellContent(cellChildren) {
'✅': { name: 'yes', variant: 'success', 'aria-label': 'positive' },
'❌': { name: 'no', variant: 'error', 'aria-label': 'negative' },
}
const iconRe = new RegExp(`^(${Object.keys(icons).join('|')})`, 'g')
let children = isString(cellChildren) ? [cellChildren] : cellChildren
if (Array.isArray(children)) {
return children.map((child, i) => {
if (isString(child)) {
const icon = icons[child.trim()]
const props = {
inline: i < children.length,
'aria-hidden': undefined,
}
if (icon) {
const props = { ...icon, inline: i < children.length, 'aria-hidden': undefined }
return <Icon {...props} key={i} />
return <Icon {...icon} {...props} key={i} />
} else if (iconRe.test(child)) {
const [, iconName, text] = child.split(iconRe)
return (
<Fragment key={i}>
<Icon {...icons[iconName]} {...props} />
{text.trim()}
</Fragment>
)
}
// Work around prettier auto-escape
if (child.startsWith('\\')) return child.slice(1)
@ -66,7 +79,22 @@ export const Table = ({ fixed, className, ...props }) => {
return <table className={tableClassNames} {...props} />
}
export const Th = props => <th className={classes.th} {...props} />
export const Th = ({ children, ...props }) => {
const isRotated = children && !isString(children) && children.type && children.type.name == 'Tx'
const thClassNames = classNames(classes.th, { [classes.thRotated]: isRotated })
return (
<th className={thClassNames} {...props}>
{children}
</th>
)
}
// Rotated head, child of Th
export const Tx = ({ children, ...props }) => (
<div className={classes.tx} {...props}>
<span>{children}</span>
</div>
)
export const Tr = ({ evenodd = true, children, ...props }) => {
const foot = isFootRow(children)

View File

@ -64,8 +64,8 @@ export const InlineList = ({ Component = 'p', gutterBottom = true, className, ch
return <Component className={listClassNames}>{children}</Component>
}
export const Help = ({ children, size = 16 }) => (
<span className={classes.help} data-tooltip={children}>
export const Help = ({ children, className, size = 16 }) => (
<span className={classNames(classes.help, className)} data-tooltip={children}>
<Icon name="help2" width={size} />
</span>
)

View File

@ -6,6 +6,8 @@ import siteMetadata from '../../meta/site.json'
const htmlToReactParser = new HtmlToReactParser()
// TODO: update this
const DEFAULT_BRANCH = 'develop'
export const repo = siteMetadata.repo
export const modelsRepo = siteMetadata.modelsRepo
@ -21,7 +23,7 @@ export const headingTextClassName = 'heading-text'
* @param {string} [branch] - Optional branch. Defaults to master.
* @returns {string} - URL to the file on GitHub.
*/
export function github(filepath, branch = 'master') {
export function github(filepath, branch = DEFAULT_BRANCH) {
if (filepath && filepath.startsWith('github.com')) return `https://${filepath}`
const path = filepath ? '/tree/' + (branch || 'master') + '/' + filepath : ''
return `https://github.com/${repo}${path}`
@ -33,7 +35,7 @@ export function github(filepath, branch = 'master') {
* @param {boolean} [isIndex] - Whether the page is an index, e.g. /api/index.md
* @param {string} [branch] - Optional branch on GitHub. Defaults to master.
*/
export function getCurrentSource(slug, isIndex = false, branch = 'master') {
export function getCurrentSource(slug, isIndex = false, branch = DEFAULT_BRANCH) {
const ext = isIndex ? '/index.md' : '.md'
return github(`website/docs${slug}${ext}`, branch)
}

View File

@ -1,3 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="150" height="45" viewBox="0 0 320 100">
<path fill="#FF5A5F" d="M168.7 25.1c0 3.6-2.9 6.5-6.5 6.5s-6.5-2.9-6.5-6.5 2.8-6.5 6.5-6.5c3.7.1 6.5 3 6.5 6.5zm-26.8 13.1v1.6s-3.1-4-9.7-4c-10.9 0-19.4 8.3-19.4 19.8 0 11.4 8.4 19.8 19.4 19.8 6.7 0 9.7-4.1 9.7-4.1V73c0 .8.6 1.4 1.4 1.4h8.1V36.8h-8.1c-.8 0-1.4.7-1.4 1.4zm0 24.1c-1.5 2.2-4.5 4.1-8.1 4.1-6.4 0-11.3-4-11.3-10.8s4.9-10.8 11.3-10.8c3.5 0 6.7 2 8.1 4.1v13.4zm15.5-25.5h9.6v37.6h-9.6V36.8zm143.4-1c-6.6 0-9.7 4-9.7 4V18.7h-9.6v55.7h8.1c.8 0 1.4-.7 1.4-1.4v-1.7s3.1 4.1 9.7 4.1c10.9 0 19.4-8.4 19.4-19.8s-8.5-19.8-19.3-19.8zm-1.6 30.5c-3.7 0-6.6-1.9-8.1-4.1V48.8c1.5-2 4.7-4.1 8.1-4.1 6.4 0 11.3 4 11.3 10.8s-4.9 10.8-11.3 10.8zm-22.7-14.2v22.4h-9.6V53.2c0-6.2-2-8.7-7.4-8.7-2.9 0-5.9 1.5-7.8 3.7v26.2h-9.6V36.8h7.6c.8 0 1.4.7 1.4 1.4v1.6c2.8-2.9 6.5-4 10.2-4 4.2 0 7.7 1.2 10.5 3.6 3.4 2.8 4.7 6.4 4.7 12.7zm-57.7-16.3c-6.6 0-9.7 4-9.7 4V18.7h-9.6v55.7h8.1c.8 0 1.4-.7 1.4-1.4v-1.7s3.1 4.1 9.7 4.1c10.9 0 19.4-8.4 19.4-19.8.1-11.4-8.4-19.8-19.3-19.8zm-1.6 30.5c-3.7 0-6.6-1.9-8.1-4.1V48.8c1.5-2 4.7-4.1 8.1-4.1 6.4 0 11.3 4 11.3 10.8s-4.9 10.8-11.3 10.8zm-26-30.5c2.9 0 4.4.5 4.4.5v8.9s-8-2.7-13 3v26.3H173V36.8h8.1c.8 0 1.4.7 1.4 1.4v1.6c1.8-2.1 5.7-4 8.7-4zM91.5 71c-.5-1.2-1-2.5-1.5-3.6-.8-1.8-1.6-3.5-2.3-5.1l-.1-.1C80.7 47.2 73.3 32 65.5 17l-.3-.6c-.8-1.5-1.6-3.1-2.4-4.7-1-1.8-2-3.7-3.6-5.5C56 2.2 51.4 0 46.5 0c-5 0-9.5 2.2-12.8 6-1.5 1.8-2.6 3.7-3.6 5.5-.8 1.6-1.6 3.2-2.4 4.7l-.3.6C19.7 31.8 12.2 47 5.3 62l-.1.2c-.7 1.6-1.5 3.3-2.3 5.1-.5 1.1-1 2.3-1.5 3.6C.1 74.6-.3 78.1.2 81.7c1.1 7.5 6.1 13.8 13 16.6 2.6 1.1 5.3 1.6 8.1 1.6.8 0 1.8-.1 2.6-.2 3.3-.4 6.7-1.5 10-3.4 4.1-2.3 8-5.6 12.4-10.4 4.4 4.8 8.4 8.1 12.4 10.4 3.3 1.9 6.7 3 10 3.4.8.1 1.8.2 2.6.2 2.8 0 5.6-.5 8.1-1.6 7-2.8 11.9-9.2 13-16.6.8-3.5.4-7-.9-10.7zm-45.1 5.2C41 69.4 37.5 63 36.3 57.6c-.5-2.3-.6-4.3-.3-6.1.2-1.6.8-3 1.6-4.2 1.9-2.7 5.1-4.4 8.8-4.4 3.7 0 7 1.6 8.8 4.4.8 1.2 1.4 2.6 1.6 4.2.3 1.8.2 3.9-.3 6.1-1.2 5.3-4.7 11.7-10.1 18.6zm39.9 4.7c-.7 5.2-4.2 9.7-9.1 11.7-2.4 1-5 1.3-7.6 1-2.5-.3-5-1.1-7.6-2.6-3.6-2-7.2-5.1-11.4-9.7 6.6-8.1 10.6-15.5 12.1-22.1.7-3.1.8-5.9.5-8.5-.4-2.5-1.3-4.8-2.7-6.8-3.1-4.5-8.3-7.1-14.1-7.1s-11 2.7-14.1 7.1c-1.4 2-2.3 4.3-2.7 6.8-.4 2.6-.3 5.5.5 8.5 1.5 6.6 5.6 14.1 12.1 22.2-4.1 4.6-7.8 7.7-11.4 9.7-2.6 1.5-5.1 2.3-7.6 2.6-2.7.3-5.3-.1-7.6-1-4.9-2-8.4-6.5-9.1-11.7-.3-2.5-.1-5 .9-7.8.3-1 .8-2 1.3-3.2.7-1.6 1.5-3.3 2.3-5l.1-.2c6.9-14.9 14.3-30.1 22-44.9l.3-.6c.8-1.5 1.6-3.1 2.4-4.6.8-1.6 1.7-3.1 2.8-4.4 2.1-2.4 4.9-3.7 8-3.7 3.1 0 5.9 1.3 8 3.7 1.1 1.3 2 2.8 2.8 4.4.8 1.5 1.6 3.1 2.4 4.6l.3.6c7.6 14.9 15 30.1 21.9 45v.1c.8 1.6 1.5 3.4 2.3 5 .5 1.2 1 2.2 1.3 3.2.8 2.6 1.1 5.1.7 7.7z"></path>
</svg>

Before

Width:  |  Height:  |  Size: 2.7 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 9.6 KiB

View File

@ -1,3 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="90" height="26" viewBox="0 0 1000 285">
<path fill="#111" d="M542.1 184.3c0 30.97-38.56 29.14-38.56 29.14H465v-56h38.54c39.7-.23 38.56 26.85 38.56 26.85M465 72.05h29.36c30.53 1.6 29.38 24.33 29.38 24.33 0 28.22-33.74 28.68-33.74 28.68h-25V72.06zm70 64.03s26.38-11.25 26.15-41.1c0 0 4-48.87-60.76-54.84h-71.9V245.1h82.4s68.84.24 68.84-57.83c0 0 1.6-39.47-44.75-51.18M348.95 0h302.6v285.17h-302.6V0zM193.14 184.3c0 30.97-38.56 29.14-38.56 29.14h-38.56v-56h38.56c39.7-.23 38.56 26.85 38.56 26.85M116.02 72.05h29.38c30.52 1.6 29.38 24.33 29.38 24.33 0 28.22-33.74 28.68-33.74 28.68h-25.02V72.06zm70 64.03s26.4-11.25 26.17-41.1c0 0 4-48.87-60.78-54.84h-71.9V245.1h82.4s68.86.24 68.86-57.83c0 0 1.6-39.47-44.76-51.18M0 0h302.6v285.17H0V0zM938.8 54.85v37.87S901.85 70 861 69.54c0 0-76.2-1.5-79.64 73.04 0 0-2.75 68.57 78.72 72.47 0 0 34.2 4.13 80.56-25.48v39.25s-62.2 36.95-134.26 8.5c0 0-60.6-22.15-62.9-94.74 0 0-2.52-74.65 78.27-99.43 0 0 21.58-8.26 60.36-4.6 0 0 23.2 2.3 56.7 16.3M697.93 285.17h302.6V0h-302.6v285.17z" />
</svg>

Before

Width:  |  Height:  |  Size: 1.0 KiB

View File

@ -1,6 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="180" height="25" viewBox="0 0 915.7 130.1">
<path fill="#51a4da" d="M157.8 8.6c-8.1 6.8-15.5 13-23 19.1-.8.7-2.2.7-3.4.7H23.3c-.8 0-1.6-.3-2.6-.4V8.6h137.1z" />
<path fill="#b3e4fa" d="M0 33.9c2-.1 4.1-.3 6.1-.3H129c-8.7 7.2-16.2 13.5-23.8 19.6-.8.6-2.3.7-3.4.7H2.4c-.8 0-1.7-.3-2.5-.5.1-6.5.1-13 .1-19.5z" />
<path fill="#5bc4bf" d="M36 79.2V59.4h63.2C91 66.3 83.6 72.5 76.1 78.7c-.5.4-1.3.5-1.9.5H36z" />
<path fill="#657d8c" d="M613.3 49.4c6.5-3.8 12.5-8.7 19.3-10.9 19.6-6.4 39.7 2.9 48 21.8 6.8 15.4 6.3 31-1.3 46-12.1 24.2-47.3 28-66 8-.1 2-.3 3.6-.3 5.2.1 4.3-2 6.4-6.2 6.3-4.2 0-6.2-2.2-6.2-6.4V8.2c0-4.3 2-6.1 6.3-6.1 4.4 0 6.2 2.1 6.2 6.3.2 13.9.2 27.7.2 41zm-.2 30.1c0 2.6-.1 5.1 0 7.7.5 19.7 19.2 33.3 38.2 27.7 13.9-4.1 22.5-18.7 21-35.8-1.2-13.7-6.6-24.9-20.5-29.7-9.8-3.4-19.3-1.6-27.7 4.4-8.7 6.3-13.1 14.7-11 25.7z M561.7 39h27.7c.3 8.4-2.3 11.2-9.9 11.2-5.3 0-10.6 0-15.9.1-.3 0-.6.2-1.3.3-.2 1-.5 2.1-.5 3.2v45.6c0 1.5.1 3.1.2 4.6 1.2 10.6 8.8 15.1 18.7 10.8 3.5-1.5 7-4.1 9.7.8 1.8 3.2-.7 6.9-5.8 9.4-16.6 8-34.1-1.7-34.8-20-.7-16-.2-32.1-.3-48.2v-6.1c-7.6 0-14.6-.6-21.5.2-7.7.9-13.5 5.7-16.1 12.8-2.4 6.6-3.9 13.8-4.4 20.8-.7 11.4-.1 22.9-.2 34.3-.1 5.5-2.7 7.7-7.6 7-4.5-.6-5.2-3.8-5.2-7.5V72.2 45.5c0-4.3 1.1-7.4 6.2-7.5 5.1-.1 6.4 3 6.4 7.3 0 3.1 0 6.1.4 9.9 9-17.9 25.3-17 41.9-16.2 0-5.1.1-9.5 0-13.9s1.6-6.8 6.2-6.7c4.8 0 6.2 2.7 6.1 6.9-.1 4.3 0 8.6 0 13.7z M267.1 127.1c-36.6 0-61.2-28.5-57.6-66.8 4.8-50.7 52.1-62.9 83-48.9 6.3 2.8 11.5 8.2 16.6 13.1 2.6 2.5 3.4 6.6-.4 9.5-3.6 2.8-6.5 1.2-9.3-1.8-12.7-13.6-28.3-17.6-45.8-12.3-17.9 5.4-27 18.9-30.1 36.7-2.4 13.4-.5 26.3 6.5 38.1 14.1 23.7 48.5 28.7 67.2 9.7 2-2 3.8-4.4 5.3-6.9 1.8-2.9 4.4-4 6.9-2.3 1.9 1.3 4 4.7 3.5 6.4-1.1 3.8-3.2 7.6-5.9 10.5-10.8 11-24.7 15-39.9 15z M704.6 85.7c1.5 13.7 7.5 23.6 20 28 13 4.5 25.6 3.4 36.1-6.9 5.2-5.1 7.6-5.9 10.5-3.1 2.8 2.8 1.9 7-2.4 11.7-.6.6-1.2 1.2-1.9 1.8-15.4 11.7-32.5 13.2-49.8 6-16.7-6.9-24.3-20.9-24.9-38.5-.9-24 11.8-42.2 33-46.7 28.6-6.1 50.8 11.2 50.4 43-.1 4.4-2.9 4.8-6.2 4.8h-58.9c-1.9-.1-3.7-.1-5.9-.1zm1.2-11.4h55.9c-.1-15.3-11-26.2-26-26.3-15.6-.2-29.6 12.1-29.9 26.3zM842.8 73.1c1.4-14-5.1-22.5-18.2-24.3-11.1-1.5-20.7 1.6-28 10.3-2.5 2.9-4.9 5.1-8.6 2.4-3.7-2.8-2.7-6.3-.5-9.5 1.5-2.2 3.3-4.5 5.4-6.1 14.4-10.7 30.2-12.1 46.5-5.1 9.5 4.1 15.2 12.3 15.6 22.5.9 18.9.7 37.9.8 56.9 0 3.9-2.3 5.9-6.3 5.8-4-.1-6-2-5.8-6.1.1-1.8 0-3.7 0-6.5-1.6 1.3-2.4 1.9-3.2 2.6-12.4 11.4-26.8 13.7-42.4 8.8-9.1-2.9-14.5-9.4-15.5-19.1-1-9.9 2.6-17.8 11.3-22.9 10.8-6.3 22.9-7.7 35-8.7 4.5-.5 9.1-.7 13.9-1zm-.2 9.7c-9.8 1.2-19.4 2.1-28.9 3.6-3.8.6-7.5 2.2-10.9 4-5.9 3.1-8.4 8.4-7.4 14.4 1 6.2 5.3 9.5 11 10.7 17.7 3.9 40.5-6.1 36.2-32.7z M338.5 50.2c.7-1.1 1.3-2.4 2.2-3.3 10.5-10.7 23.3-12.4 36.9-8.2 13.3 4.1 20 14.6 20.9 27.7 1.2 18 .8 36.2.9 54.3 0 4-2.5 5.4-6.1 5.2-3.9-.1-5.8-2.1-5.8-6.2.1-13.7.1-27.3 0-41 0-3.2-.2-6.5-.7-9.7-1.9-11.5-8.4-18.5-18.2-20-12.1-1.8-23.5 3.1-28.1 13.2-2.1 4.7-3.2 10.1-3.4 15.3-.5 13.7-.1 27.3-.2 41 0 6-3.2 8.7-8.8 7.1-1.8-.5-3.2-2.9-4.5-4.6-.5-.7-.2-2-.2-3V9.9c0-6.2 1.2-7.5 6.3-7.6 5.3-.1 7.1 1.4 7.1 6.9.1 11.8 0 23.6 0 35.4 0 1.6.1 3.3.2 4.9.7.2 1.1.5 1.5.7z M469 73.1c1.3-13.6-5.3-22.3-17.9-24.2-11.3-1.7-21 1.4-28.5 10.2-2.5 2.9-5 5.1-8.6 2.4-3.7-2.8-2.7-6.4-.5-9.6 6.2-9.2 15.4-13.3 25.9-14.6 5.2-.7 10.6-.7 15.8.1 16.6 2.7 26.4 14.3 26.5 31.3.2 16.6.1 33.1 0 49.7 0 5.6-1.6 7.5-6 7.5-5 0-6.4-3.1-6.1-7.5.1-1.4 0-2.7 0-4.8-1.3 1-2.3 1.5-3 2.2-12.1 11.4-26.4 13.7-41.8 9.1-9.8-2.9-15.5-9.9-16.2-20.2-.9-10.1 3.4-17.8 12.4-22.7 10.6-5.7 22.3-7.1 34.1-8.1 4.6-.3 9.2-.5 13.9-.8zm0 9.9c-8.8.9-17.4 1.5-25.9 2.9-4.8.8-9.6 2.4-14 4.6-6.3 3.1-8.8 8.6-7.7 14.7.9 5.3 5.2 9.5 11.7 10.7 18.7 3.1 39.3-7.4 35.9-32.9z M63.9 127.4c-5.1-1.2-8.2-3.2-9.7-7.3-1.7-4.6-.3-8.3 3.2-11.5C68 98.9 78.6 89.2 89.1 79.5c24.2-22.1 48.4-44.3 72.7-66.4.5-.5.9-1.2 1.5-1.3 2-.6 4.1-1 6.1-1.5-.6 2.1-.5 4.7-1.8 6.1-31.8 35.3-63.8 70.4-95.8 105.5-2 2.3-5.2 3.7-7.9 5.5z M873.9 49.4h-8.8c-3.2 0-5.1-2-4.4-4.9.5-2 2.3-4.5 4.2-5.3 2.4-.9 5.3-.2 9-.2 0-4.6-.1-8.8 0-12.9.1-5.9 1.7-7.6 6.5-7.7 5.3-.1 6.1 3.3 6.1 7.4v12.9h27.8c-.2 8.1-2.7 10.6-9.7 10.7h-18.3v12.9l.3 35.9c0 1.5 0 3.1.2 4.6.9 12 8.5 16.6 19.5 11.6 3.3-1.5 6.6-3.2 8.8 1.1 2.1 4-.9 6.4-3.8 8.4-14.4 9.7-34.8 1-36.3-16.2-1.3-14.2-.8-28.7-1-43-.1-4.8-.1-9.6-.1-15.3z" />
</svg>

Before

Width:  |  Height:  |  Size: 4.2 KiB

View File

@ -1,31 +0,0 @@
import { ReactComponent as AirbnbLogo } from './airbnb.svg'
import { ReactComponent as UberLogo } from './uber.svg'
import { ReactComponent as QuoraLogo } from './quora.svg'
import { ReactComponent as RetrieverLogo } from './retriever.svg'
import { ReactComponent as StitchfixLogo } from './stitchfix.svg'
import { ReactComponent as ChartbeatLogo } from './chartbeat.svg'
import { ReactComponent as AllenAILogo } from './allenai.svg'
import { ReactComponent as RecodeLogo } from './recode.svg'
import { ReactComponent as WapoLogo } from './wapo.svg'
import { ReactComponent as BBCLogo } from './bbc.svg'
import { ReactComponent as MicrosoftLogo } from './microsoft.svg'
import { ReactComponent as VenturebeatLogo } from './venturebeat.svg'
import { ReactComponent as ThoughtworksLogo } from './thoughtworks.svg'
export default {
airbnb: AirbnbLogo,
uber: UberLogo,
quora: QuoraLogo,
retriever: RetrieverLogo,
stitchfix: StitchfixLogo,
chartbeat: ChartbeatLogo,
allenai: AllenAILogo,
recode: RecodeLogo,
wapo: WapoLogo,
bbc: BBCLogo,
microsoft: MicrosoftLogo,
venturebeat: VenturebeatLogo,
thoughtworks: ThoughtworksLogo,
}

View File

@ -1,7 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="130" height="28" viewBox="0 0 609 130">
<path fill="#737373" d="M213.2 74.3l-3.6 10.2h-.3c-.6-2.3-1.7-5.8-3.5-10L186.5 26h-18.9v77.3h12.5V55.6c0-3 0-6.4-.1-10.6-.1-2.1-.3-3.7-.4-4.9h.3c.6 3 1.3 5.2 1.8 6.6l23.2 56.4h8.8l23-56.9c.5-1.3 1-3.9 1.5-6.1h.3c-.3 5.7-.5 10.8-.6 13.9v49h13.3V25.8H233l-19.8 48.5zm50.6-26.7h13V103h-13zm6.6-23.4c-2.2 0-4 .8-5.5 2.2-1.5 1.4-2.3 3.2-2.3 5.4 0 2.1.8 3.9 2.3 5.3 1.5 1.4 3.3 2.1 5.5 2.1s4.1-.8 5.5-2.1c1.5-1.4 2.3-3.2 2.3-5.3s-.8-3.9-2.3-5.4c-1.3-1.4-3.2-2.2-5.5-2.2m52.5 22.9c-2.4-.5-4.9-.8-7.3-.8-5.9 0-11.3 1.3-15.8 3.9-4.5 2.6-8.1 6.2-10.4 10.7-2.4 4.6-3.6 9.9-3.6 16 0 5.3 1.2 10 3.5 14.3 2.3 4.2 5.5 7.6 9.8 9.9 4.1 2.3 8.9 3.5 14.3 3.5 6.2 0 11.5-1.3 15.7-3.7l.1-.1v-12l-.5.4c-1.9 1.4-4.1 2.6-6.3 3.3-2.3.8-4.4 1.2-6.2 1.2-5.2 0-9.3-1.5-12.2-4.8-3-3.2-4.5-7.6-4.5-13.1 0-5.7 1.5-10.2 4.6-13.5 3.1-3.3 7.2-5 12.2-5 4.2 0 8.5 1.4 12.4 4.2l.5.4V49.2l-.1-.1c-1.7-.7-3.6-1.5-6.2-2m42.9-.4c-3.2 0-6.2 1-8.8 3.1-2.2 1.8-3.7 4.4-5 7.5h-.1v-9.7h-13V103h13V74.7c0-4.8 1-8.8 3.2-11.7 2.2-3 5-4.5 8.4-4.5 1.2 0 2.4.3 3.9.5 1.4.4 2.4.8 3.1 1.3l.5.4v-13l-.3-.1c-.9-.6-2.7-.9-4.9-.9m35.4-.3c-9.1 0-16.4 2.7-21.5 8-5.2 5.3-7.7 12.6-7.7 21.8 0 8.6 2.6 15.6 7.6 20.7 5 5 11.8 7.6 20.3 7.6 8.9 0 16-2.7 21.1-8.1 5.2-5.4 7.7-12.6 7.7-21.5 0-8.8-2.4-15.8-7.3-20.9-4.7-5.1-11.6-7.6-20.2-7.6M411.6 89c-2.4 3.1-6.2 4.6-10.9 4.6s-8.5-1.5-11.2-4.8c-2.7-3.1-4-7.6-4-13.3 0-5.9 1.4-10.4 4-13.6 2.7-3.2 6.4-4.8 11.1-4.8 4.6 0 8.2 1.5 10.8 4.6 2.6 3.1 4 7.6 4 13.5-.2 6-1.3 10.7-3.8 13.8m46.1-18.4c-4.1-1.7-6.7-3-7.9-4.1-1-1-1.5-2.4-1.5-4.2 0-1.5.6-3 2.1-4s3.2-1.5 5.7-1.5c2.2 0 4.5.4 6.7 1s4.2 1.5 5.8 2.7l.5.4V48.7l-.3-.1c-1.5-.6-3.5-1.2-5.9-1.7-2.4-.4-4.6-.6-6.4-.6-6.2 0-11.3 1.5-15.3 4.8-4 3.1-5.9 7.3-5.9 12.2 0 2.6.4 4.9 1.3 6.8.9 1.9 2.2 3.7 4 5.2 1.8 1.4 4.4 3 8 4.5 3 1.3 5.3 2.3 6.7 3.1 1.4.8 2.3 1.7 3 2.4.5.8.8 1.8.8 3.1 0 3.7-2.8 5.5-8.5 5.5-2.2 0-4.5-.4-7.2-1.3s-5.2-2.2-7.3-3.7l-.5-.4v12.7l.3.1c1.9.9 4.2 1.5 7 2.2 2.8.5 5.3.9 7.5.9 6.7 0 12.2-1.5 16.1-4.8 4-3.2 6.1-7.3 6.1-12.6 0-3.7-1-7-3.2-9.5-2.9-2.4-6.5-4.9-11.7-6.9m49.2-24.2c-9.1 0-16.4 2.7-21.5 8s-7.7 12.6-7.7 21.8c0 8.6 2.6 15.6 7.6 20.7 5 5 11.8 7.6 20.3 7.6 8.9 0 16-2.7 21.1-8.1 5.2-5.4 7.7-12.6 7.7-21.5 0-8.8-2.4-15.8-7.3-20.9-4.7-5.1-11.6-7.6-20.2-7.6M517.2 89c-2.4 3.1-6.2 4.6-10.9 4.6-4.8 0-8.5-1.5-11.2-4.8-2.7-3.1-4-7.6-4-13.3 0-5.9 1.4-10.4 4-13.6 2.7-3.2 6.4-4.8 11.1-4.8 4.5 0 8.2 1.5 10.8 4.6 2.6 3.1 4 7.6 4 13.5 0 6-1.3 10.7-3.8 13.8M603.9 58.3V47.6h-13.1V31.2l-.4.1L578 35l-.3.1v12.5h-19.6v-7c0-3.2.8-5.7 2.2-7.3s3.5-2.4 6.1-2.4c1.8 0 3.7.4 5.8 1.3l.5.3V21.2l-.3-.1c-1.8-.6-4.2-1-7.3-1-3.9 0-7.3.9-10.4 2.4-3.1 1.7-5.4 4-7.1 7.1-1.7 3-2.6 6.4-2.6 10.3v7.7h-9.1v10.6h9.1V103h13.1V58.3h19.6v28.5c0 11.7 5.5 17.6 16.5 17.6 1.8 0 3.7-.3 5.5-.6 1.9-.4 3.3-.9 4.1-1.3l.1-.1V91.7l-.5.4c-.8.5-1.5.9-2.7 1.2-1 .3-1.9.4-2.6.4-2.6 0-4.4-.6-5.7-2.1-1.2-1.4-1.8-3.7-1.8-7.1V58.3h13.3z" />
<path fill="#F25022" d="M0 0h61.3v61.3H0z" />
<path fill="#7FBA00" d="M67.7 0H129v61.3H67.7z" />
<path fill="#00A4EF" d="M0 67.7h61.3V129H0z" />
<path fill="#FFB900" d="M67.7 67.7H129V129H67.7z" />
</svg>

Before

Width:  |  Height:  |  Size: 3.1 KiB

View File

@ -1,3 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="120" height="34" viewBox="0 0 201 56">
<path fill="#b92b27" d="M29 43.62c-1.93-3.77-4.18-7.6-8.57-7.6-.84 0-1.68.15-2.45.5l-1.5-2.98c1.83-1.56 4.77-2.8 8.54-2.8 5.87 0 8.88 2.83 11.27 6.44 1.42-3.08 2.1-7.24 2.1-12.4 0-12.88-4.04-19.5-13.45-19.5-9.27 0-13.28 6.62-13.28 19.5 0 12.82 4 19.36 13.28 19.36 1.47 0 2.8-.16 4.04-.52zm2.3 4.5c-2.05.54-4.2.85-6.35.85C12.6 48.96.5 39.1.5 24.76.5 10.32 12.6.48 24.96.48c12.56 0 24.53 9.77 24.53 24.3 0 8.1-3.77 14.67-9.26 18.9 1.78 2.67 3.6 4.43 6.14 4.43 2.77 0 3.9-2.14 4.08-3.82h3.6c.22 2.24-.9 11.53-11 11.53-6.1 0-9.33-3.53-11.76-7.68zm26.12-12.3V19.27c0-1.9-.7-2.73-2.86-2.73h-2.3v-4.4H67.3v23.5c0 3.95 2.15 5.7 5.4 5.7 2.7 0 5.37-1.2 6.8-3.9V19.26c0-1.9-.7-2.73-2.85-2.73h-2.45v-4.4h15.2v24.6c0 2.45.92 3.57 3.72 3.57h.5v4.54L80 47v-4.67h-.28c-2.63 3.2-6.34 5.38-11.62 5.38-5.95 0-10.7-3-10.7-11.87m56 7.48c5.36 0 7.4-4.66 7.5-14.04.1-9.2-2.14-13.63-7.5-13.63-4.68 0-7.62 4.45-7.62 13.63 0 9.38 2.9 14.04 7.62 14.04zm0 4.4c-9.7 0-18.43-7.4-18.43-18.44 0-10.84 8.52-18.04 18.42-18.04 10.32 0 18.6 7.34 18.6 18.04 0 11.04-8.28 18.45-18.6 18.45zm18.9-.7v-4.4h1.47c3.62 0 3.97-1.04 3.97-4.2V19.27c0-1.9-.98-2.72-3.2-2.72h-1.97v-4.4h13.82l.7 7.2h.27c1.53-5.18 5.66-7.9 9.52-7.9 3.2 0 5.7 1.8 5.7 5.5 0 2.55-1.25 5.28-4.7 5.28-3.1 0-3.7-2.1-6.26-2.1-2.3 0-4.06 2.17-4.06 5.36V38.4c0 3.16.77 4.2 4.34 4.2h2.02V47h-21.64m46-5.12c4.4 0 6.2-4.17 6.2-8.36v-5.6c-3.2 3.34-10.68 3.46-10.68 9.4 0 2.9 1.72 4.56 4.47 4.56zm6.42-.02c-1.82 3.5-5.55 5.85-10.76 5.85-6.06 0-9.97-3.2-9.97-8.87 0-11.4 15.87-8.36 20.53-15.9v-.83c0-5.8-2.28-6.7-4.8-6.7-7.06 0-3.84 7.6-10.34 7.6-3.14 0-4.35-1.9-4.35-4.02 0-4.3 5.13-7.76 14.75-7.76 9.1 0 14.7 2.52 14.7 11.58v14.47c0 2.24.82 3.45 2.77 3.45.84 0 1.54-.23 2.08-.6l1.16 2.83c-.94 1.47-3.48 4.06-8.3 4.06-4.2 0-6.83-1.95-7.18-5.14h-.28z"></path>
</svg>

Before

Width:  |  Height:  |  Size: 1.8 KiB

View File

@ -1,3 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="100" height="24" viewBox="0 0 824 203">
<path fill="#EE2C24" d="M84.8 90V49c-18 0-30.7 7.6-38 22.8v-20H4.6v148.1h45v-72.4c0-27.9 5.9-37.4 34.1-37.4h1.1zM823.4 137c0-54.5-31.9-88-78.5-88-19.9 0-39.2 8.2-54 22.9-7.9 7.9-13.8 17.1-17.6 27.2V4.2h-44.5v63.9c-10-13-24.1-19.1-42.4-19.1-19.1 0-36.3 7-50.4 20.8-10.2 9.8-17.3 21.3-21 34.1-3.8-12.5-10.8-24.2-20.8-33.7-14.7-13.9-33-21.3-54.3-21.3-19.9 0-39.3 8.2-54 22.9C378 79.7 372 89 368.2 99.2c-12.8-32.6-38.6-50.2-73-50.2-15.5 0-29.3 4.2-41.5 12.5l-21.2 51.3c-8.1-40-36.9-63.8-76.1-63.8-19.9 0-39.2 8.2-54 22.9C87.9 86.3 80 105.1 80 125.5c0 20.7 7.4 39.2 21.5 54.2 15 15.8 33 23.4 55.3 23.4 16.7 0 31.2-4.1 43.3-12.1l14-34h-33.6c-6.5 5.7-13.6 8.2-22.3 8.2-18.8 0-30-8.2-33.8-23.7h96.2c2.9 14.2 9.5 27.3 19.9 38.2 15 15.8 32.9 23.4 55.2 23.4 33.5 0 60.4-18.5 72.6-49.8 3.6 9.7 9.2 18.5 16.6 26.3 15 15.8 33 23.5 55.4 23.5 22.1 0 40.7-7.9 55.7-22.9 9.1-9.1 15.4-19.5 18.8-31.4 4 14 11.8 26.3 23.3 36.6 13.2 11.8 29 17.7 47.3 17.7 18 0 31.5-4.8 44.7-17.7v14.4h43.1v-47c3.6 9.8 9.2 18.9 16.8 26.9 15 15.8 33 23.4 55.3 23.4 32.7 0 57-15.5 71.1-46.1H769c-6.5 5.7-13.6 8.2-22.3 8.2-18.8 0-30-8.2-33.8-23.7h110.4V137zm-700.3-25.9c3-13.9 15.8-23.2 33.8-23.2 17.2 0 29.4 8.2 34.6 23.2h-68.4zm201.5 36c-7.1 9.5-16.6 14.4-28.3 14.4-20.7 0-34.8-13.9-34.8-36.2 0-20.1 14.1-34.8 33.5-34.8 14.1 0 24.2 5.4 30.2 15.5H366c-1.7 6.3-2.5 12.8-2.5 19.6 0 7.4 1 14.6 2.9 21.5h-41.8zm116.1 16.1c-19.3 0-35.6-16.5-35.6-37 0-20.7 16-37.3 35.6-37.3s35.6 16.5 35.6 37.3c0 20.5-16 37-35.6 37m154.7 0c-22.7 0-39-16-39-37 0-20.2 16.3-37.3 37.3-37.3 20.7 0 36.7 16.3 36.7 37.8.1 20.8-15.9 36.5-35 36.5m116.3-52.1c3-13.9 15.8-23.2 33.8-23.2 17.2 0 29.4 8.2 34.6 23.2h-68.4z"></path>
</svg>

Before

Width:  |  Height:  |  Size: 1.7 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 6.9 KiB

View File

@ -1,3 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="150" height="18" viewBox="0 0 224.6 26.6">
<path fill="#2F3237" d="M9.7 11.7l-.3-.1c-4-1.1-5.9-1.9-5.9-5.1 0-2.7 1.6-4.2 4.3-4.2 0 0 5.7 0 7.3 4.7l.1.2H17V0h-1.8l-.7 2.6C12.8 1 10.6 0 8.1 0c-4.7 0-8 3-8 7.3 0 4.8 4 6.2 8.5 7.3 4.5 1.1 6.4 2 6.4 5.3 0 2.9-2.4 4.3-4.5 4.3-3.8 0-6.5-1.5-8.6-4.6l-.1-.2H0v7.2h1.8l.8-3.1c2.2 2.1 4.6 3.1 7.6 3.1 4.8 0 8.2-3.3 8.2-7.7 0-4.9-4-6-8.7-7.2 M22.1 5.5h1.7l.1-.5c.6-2.9.7-2.9 2.7-2.9h4.6v22.3h-2.5v2.1h8.7v-2.1H35V2.1h4.5c2.1 0 2.3 0 2.9 2.9l.1.5h1.7V0H22.1v5.5z M63.2 5.5h1.7L65 5c.6-2.9.7-2.9 2.7-2.9h4.6v22.3h-2.4v2.1h8.7v-2.1h-2.4V2.1h4.5c2.1 0 2.3 0 2.9 2.9l.1.5h1.7V0H63.2v5.5zM49.3 0v2.1h2.5v22.4h-2.5v2.1H58v-2.1h-2.4V2.1H58V0zM132.4 0v2.1h2.5V13h-12.4V2.1h2.4V0h-8.7v2.1h2.6v22.4h-2.5v2.1h8.7v-2.1h-2.5v-9.4h12.4v9.4h-2.5v2.1h8.7v-2.1h-2.4V2.1h2.4V0zM186.4 0v2.1h2.5v22.4h-2.5v2.1h8.7v-2.1h-2.4V2.1h2.4V0zM215.9 0v2.1h2l-5.6 8.1-5.4-8.1h2V0h-8.7v2.1h2.5l7.6 11.3-7.5 11.1h-2.7v2.1h8.7v-2.1H206l5.8-8.7 5.8 8.7h-2.2v2.1h8.7v-2.1h-2.4l-7.8-11.9 7.2-10.5h3.5V0z M161.4 2.1h2.6v22.3h-2.6v2.1h8.7v-2.1h-2.3V15h4c1.5 0 1.6.2 2.1 1.7l.1.2v.2h1.7v-6.6H174v.2c0 .2-.1.3-.2.4-.5 1.2-.7 1.8-2.1 1.8h-4V2.1h9.1c1.7 0 2.1.5 3.1 3v.2h2V0h-20.6l.1 2.1z M108.2 16.1c-.8 5.2-3.6 8.1-7.9 8.1-5.1 0-8.2-4.3-8.2-11.1 0-6.5 3.1-10.7 7.9-10.7 4.6 0 7 2.9 8.1 5.4l.1.2h1.8V0h-2l-.7 2.8C105.4.9 103 0 100.1 0c-6.8 0-12 5.7-12 13.3 0 3.9 1.2 7.2 3.3 9.6 2.1 2.4 5.1 3.6 8.4 3.6 6 0 9.8-3.7 10.6-10.4v-.3h-2.3v.3z"></path>
</svg>

Before

Width:  |  Height:  |  Size: 1.5 KiB

View File

@ -1,3 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="150" height="28" viewBox="35 20 150 25">
<path fill="#001e2b" d="M175.1 28.47a2 2 0 1 1 2 2 2 2 0 0 1-2-2m3.46 0A1.47 1.47 0 1 0 177.1 30a1.45 1.45 0 0 0 1.46-1.53m-.48 1.1h-.52l-.48-.92h-.36v.92h-.46V27.4h1.05c.57 0 .84.16.84.67 0 .4-.2.56-.58.58zm-.77-1.24c.26 0 .42-.05.42-.32 0-.3-.3-.26-.5-.26h-.5v.6zM49.68 24.5h-4.6v14.74h-3.56V24.5h-4.6v-3h12.76v3M54.4 27.8a6.14 6.14 0 0 1 3.8-1.6c1.4 0 3.36.84 3.36 3.98v9.05h-3.38v-7.68c0-1.02.08-2.64-1.38-2.64a3.5 3.5 0 0 0-2.4 1.4v8.94h-3.37V21.5h3.37v6.3M62.95 32.85c0-3.4 1.87-6.65 5.6-6.65s5.62 3.26 5.62 6.65c0 3.38-1.9 6.66-5.6 6.66s-5.62-3.27-5.62-6.65m7.76 0c0-1.6-.32-4.17-2.14-4.17s-2.14 2.58-2.14 4.17.32 4.17 2.14 4.17 2.14-2.57 2.14-4.17M82.8 37.92a6.16 6.16 0 0 1-3.8 1.6c-1.4 0-3.35-.86-3.35-4v-9.05h3.37v7.68c0 1.03-.06 2.65 1.4 2.65a3.5 3.5 0 0 0 2.4-1.4v-8.93h3.37v12.77h-3.4v-1.32M91.7 35.04a.87.87 0 0 0-.96.8c0 1.66 8.85-1.23 8.85 3.76 0 1.9-1.3 3.9-6.35 3.9-4.4 0-5.9-1.55-5.9-3.2a2.2 2.2 0 0 1 1.4-1.95 2.03 2.03 0 0 1-1-1.62 2.9 2.9 0 0 1 1.65-2.55 4.8 4.8 0 0 1-1.6-3.5c0-2.84 2.4-4.48 5.15-4.48a6.13 6.13 0 0 1 3.15.9 3.13 3.13 0 0 1 2.3-1.06 4.97 4.97 0 0 1 .98.1v2.36a3 3 0 0 0-1.07-.27 1.86 1.86 0 0 0-.9.27 5.1 5.1 0 0 1 .68 2.17c0 2.82-2.37 4.46-5.13 4.46l-1.25-.1m-1.07 4.23a.88.88 0 0 0-.53.8c0 1.18 2.4 1.25 3.2 1.25.57 0 3.53-.06 3.53-1.08 0-.66-.45-.6-2.46-.7zm2.32-6.36c1.42 0 1.83-1.16 1.83-2.37 0-1.17-.55-2.15-1.83-2.15-1.4 0-1.84 1.2-1.84 2.4 0 1.16.55 2.12 1.85 2.12M104.2 27.8a6.14 6.14 0 0 1 3.82-1.6c1.4 0 3.35.84 3.35 3.98v9.05H108v-7.68c0-1.02.07-2.64-1.4-2.64a3.5 3.5 0 0 0-2.4 1.4v8.94h-3.36V21.5h3.37v6.3M117.5 26.47h2.63v2.8h-2.62v6.5c0 .62.2 1.25 1.28 1.25a2.4 2.4 0 0 0 1.35-.47v2.57a7.83 7.83 0 0 1-2.17.4c-2.1 0-3.83-1.2-3.83-3.98v-6.27h-1.57v-2.8h1.57v-3.2h3.38v3.2M134.17 39.24h-1.82l-3-14.5h-.06l-3.18 14.5h-1.82l-3.72-17.74h1.78l2.92 14.32h.05l3.2-14.32h1.74l3.04 14.32h.05l3.12-14.32h1.75l-4.05 17.74M137.46 32.94c0-4.7 2.23-6.65 5.03-6.65s5.03 1.93 5.03 6.64-2.24 6.66-5.04 6.66-5.04-1.94-5.04-6.66m8.3 0c0-2.14-.53-5.15-3.27-5.15s-3.26 3-3.26 5.14.52 5.16 3.25 5.16 3.26-3 3.26-5.16M150.7 29.36h.05c.7-1.5 1.6-2.9 3.48-2.9h.7v1.63c-2.34-.33-3.55 2.18-4.23 4v7.14h-1.78v-12.6h1.78v2.72M158.13 32.5l4.68-5.85h2l-3.48 4.3 4 8.3h-1.92l-3.22-6.85-2.05 2.53v4.3h-1.78V21.5h1.78v11M173.02 29.46a3.26 3.26 0 0 0-2.88-1.67c-1.16 0-2.25.58-2.25 1.86 0 3.08 6.62 1.73 6.62 5.97a3.84 3.84 0 0 1-4.12 3.97 5.17 5.17 0 0 1-4.7-2.64l1.4-.9a3.5 3.5 0 0 0 3.3 2.04 2.2 2.2 0 0 0 2.46-2.14c0-3.17-6.63-1.66-6.63-6.18a3.56 3.56 0 0 1 3.85-3.5 4.83 4.83 0 0 1 4.2 2.2l-1.26.98"></path>
</svg>

Before

Width:  |  Height:  |  Size: 2.6 KiB

View File

@ -1,3 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="90" height="31" viewBox="0 0 927 322">
<path fill="#010202" d="M53.328 229.809c3.917 10.395 9.34 19.283 16.27 26.664 6.93 7.382 15.14 13.031 24.63 16.948 9.491 3.917 19.81 5.875 30.958 5.875 10.847 0 21.015-2.034 30.506-6.102s17.776-9.792 24.856-17.173c7.08-7.382 12.579-16.194 16.496-26.438 3.917-10.244 5.875-21.692 5.875-34.347V0h47.453v316.354h-47.001v-29.376c-10.545 11.147-22.974 19.734-37.285 25.761-14.312 6.025-29.752 9.038-46.323 9.038-16.873 0-32.615-2.938-47.228-8.813-14.612-5.875-27.267-14.235-37.962-25.082S15.441 264.006 9.265 248.79C3.088 233.575 0 216.628 0 197.947V0h47.453v195.236C47.453 207.891 49.411 219.414 53.328 229.809zM332.168 0v115.243c10.545-10.545 22.748-18.905 36.607-25.082s28.924-9.265 45.193-9.265c16.873 0 32.689 3.163 47.453 9.49 14.763 6.327 27.567 14.914 38.414 25.761s19.434 23.651 25.761 38.414c6.327 14.764 9.49 30.431 9.49 47.002 0 16.57-3.163 32.162-9.49 46.774-6.327 14.613-14.914 27.343-25.761 38.188-10.847 10.847-23.651 19.434-38.414 25.761-14.764 6.327-30.581 9.49-47.453 9.49-16.27 0-31.409-3.088-45.419-9.265-14.01-6.176-26.288-14.537-36.833-25.082v28.924h-45.193V0H332.168zM337.365 232.746c4.067 9.642 9.717 18.078 16.948 25.309 7.231 7.231 15.667 12.956 25.308 17.174 9.642 4.218 20.036 6.327 31.184 6.327 10.847 0 21.09-2.109 30.731-6.327s18.001-9.942 25.083-17.174c7.08-7.23 12.729-15.667 16.947-25.309 4.218-9.641 6.327-20.035 6.327-31.183 0-11.148-2.109-21.618-6.327-31.41s-9.867-18.303-16.947-25.534c-7.081-7.23-15.441-12.88-25.083-16.947s-19.885-6.102-30.731-6.102c-10.846 0-21.09 2.034-30.731 6.102s-18.077 9.717-25.309 16.947c-7.23 7.231-12.955 15.742-17.173 25.534-4.218 9.792-6.327 20.262-6.327 31.41C331.264 212.711 333.298 223.105 337.365 232.746zM560.842 155.014c6.025-14.462 14.312-27.191 24.856-38.188s23.049-19.659 37.511-25.986 30.129-9.49 47.001-9.49c16.571 0 31.937 3.013 46.098 9.038 14.16 6.026 26.362 14.387 36.606 25.083 10.244 10.695 18.229 23.35 23.952 37.962 5.725 14.613 8.587 30.506 8.587 47.68v14.914H597.901c1.507 9.34 4.52 18.002 9.039 25.985 4.52 7.984 10.168 14.914 16.947 20.789 6.779 5.876 14.462 10.471 23.049 13.784 8.587 3.314 17.7 4.972 27.342 4.972 27.418 0 49.563-11.299 66.435-33.896l32.991 24.404c-11.449 15.366-25.609 27.418-42.481 36.155-16.873 8.737-35.854 13.106-56.944 13.106-17.174 0-33.217-3.014-48.131-9.039s-27.869-14.462-38.866-25.309-19.659-23.576-25.986-38.188-9.491-30.506-9.491-47.679C551.803 184.842 554.817 169.476 560.842 155.014zM624.339 137.162c-12.805 10.696-21.316 24.932-25.534 42.708h140.552c-3.917-17.776-12.278-32.012-25.083-42.708-12.805-10.695-27.794-16.043-44.967-16.043C652.133 121.119 637.144 126.467 624.339 137.162zM870.866 142.359c-9.641 10.545-14.462 24.856-14.462 42.934v131.062h-45.646V85.868h45.193v28.472c5.725-9.34 13.182-16.722 22.371-22.145 9.189-5.424 20.111-8.136 32.766-8.136h15.817v42.482h-18.981C892.86 126.542 880.507 131.814 870.866 142.359z"/>
</svg>

Before

Width:  |  Height:  |  Size: 2.9 KiB

View File

@ -1,4 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="150" height="19" viewBox="0 0 1743 222.2">
<path d="M208 0v44.4c-3.5 0-6.5.4-9.4-.1-4.1-.8-5.5.9-6.6 4.5-13.9 45-28 89.9-42 134.8-3.2 10.3-6.3 20.7-9.8 30.9-.5 1.4-2.5 3.3-3.8 3.3-22.5.2-45 .1-67.8.1-.5-1.4-1.1-2.7-1.6-4.1-17.4-55-34.8-110.1-52-165.1-1.2-3.7-2.7-5.1-6.7-4.5-2.6.5-5.5-.1-8.3-.2V0h94v44.3H74.9c10.5 41.1 20.9 81.7 31.3 122.3.3 0 .6.1 1 .1 11.2-40.6 22.4-81.3 33.8-122.5h-18.9V0H208z M356 58.3h63.2c.6 7.4 1.2 14.7 1.9 22.2 3.8-4.4 7-8.9 11-12.4 17.9-15.4 38.5-18.4 60.2-10.4 16.4 6.1 23.4 19.6 23.7 36.5.4 24.1.2 48.3.2 72.5v6.6l12.9.6v43.7h-70.8V212v-92.5c0-8.4-2.9-12.7-9.3-14.8-6.7-2.2-13.6 0-18.2 6-1.1 1.4-1.9 3.1-2.7 4.8-.5 1.2-1 2.6-1 3.8-.1 17.9 0 35.8 0 54.2h9.7v44.1H356v-43.9h12.3v-70.8h-12.2c-.1-15.2-.1-29.7-.1-44.6zM741.9 102.4h-10.8v-44c.8-.1 1.6-.3 2.4-.3h66.6v115.6H813v43.9h-65.5v-16.5c-2.9 3.1-4.9 5.4-7.2 7.5-15.9 14.1-43.9 17.9-62.9 8.3-14.5-7.3-21.7-19.7-22.3-35.4-.9-24.3-.7-48.6-1-72.9v-6.3h-12.7v-44H712v5.6c0 29.3-.1 58.6.1 88 0 4.1.7 8.3 2 12.2 2 5.9 7 8.9 13.2 8.7 6.1-.2 10.5-3.1 12.6-8.8.8-2.2 1.7-4.5 1.7-6.7.4-18.1.3-36.3.3-54.9z M345.7 149h-98.5c-.2 9.1.1 17.6 4.5 25.4 3.6 6.5 9.6 8.9 16.8 8.6 7.2-.3 12.9-3.3 15.9-10.1 1.3-3 2.1-6.2 3.3-9.6h54.6c-2.2 17.5-8.9 32.3-22.9 43.3-9.9 7.8-21.4 12-33.8 13.8-16.7 2.5-33.2 1.8-49.4-3.4-21.7-7-36.3-21.4-43-43-7.3-23.3-7.6-47 .1-70.3 9.4-28.7 30.1-44.2 59.5-48.6 13.2-2 26.3-1.1 39.1 2.4 29.9 8.1 45.9 28.7 50.8 58.4 1.8 10.6 2 21.5 3 33.1zm-96.9-30.8H287c.5-8.5-.7-16.1-8.2-20.9-6.8-4.3-14.3-4.7-21.2-.4-7.7 4.9-8.7 12.8-8.8 21.3zM1114 148.9h-98.2c-.2 9-.2 17.6 4.3 25.4 3.8 6.7 9.9 9.1 17.3 8.7 7.4-.4 13.1-3.8 15.9-10.9 1.1-2.8 1.8-5.7 2.8-8.8h54.7c-3.5 32.1-26 53.9-59.4 57.6-15.6 1.7-30.9 1-46-3.7-22.3-7-37.2-21.7-44-44-6.9-23-7.2-46.3.3-69.3 9.5-28.9 30.3-44.5 59.9-48.8 13.9-2 27.6-.9 41 3.1 27.5 8.3 43 27.6 48.1 55.2 2.1 11.4 2.2 23.1 3.3 35.5zm-96.4-30.8h38c.1-16-7.7-24.9-20.6-23.9-11.9.9-19.2 11-17.4 23.9z M535.6 58h18c0-10.6.4-20.9-.1-31.2-.3-5.4 1.5-7.4 6.8-8.5 15.2-3.1 30.2-6.7 46-10.3v50h25.6v44.7h-25c-.2 1.8-.4 3.3-.4 4.7v51.5c0 1.8.2 3.7.4 5.5 1.3 9.8 8.2 14.9 18 13.3 1.6-.3 3.2-.6 5.6-1v27.7c0 12.9 0 12.9-12.7 14.9-13.6 2.2-27.1 2.9-40.7-.3-19.1-4.6-27.8-15.5-27.9-35.3V103h-13.7c.1-15.3.1-29.8.1-45zM826.2 217.6v-43.9h12.7v-70.9h-12.6V58.3h62.1l1.9 25.3 2-4.4c5.1-12.9 14.4-20.7 28.3-22.2 6.7-.7 13.6-.1 20.3.3 1.2.1 3.4 2 3.4 3.1.2 15.8 0 31.6 0 47.5 0 .3-.3.6-.6 1.1-7.6 0-15.5-1-23.1.2-16.2 2.6-23.8 12-24.5 28.5-.2 5.8-.2 11.7-.3 17.5v18.2h18v44.3c-29.1-.1-58.1-.1-87.6-.1z" />
<path fill="#ED1E25" d="M1237 .3c8.5 1.4 17.1 2.2 25.4 4.3 34.3 8.6 51.7 50.6 33.5 80.3-4.4 7.2-10.5 12.4-17.7 16.5-3.2 1.8-6.4 3.5-10.3 5.5 2 .8 3.4 1.6 4.9 2 23.7 6.9 34.2 24.4 35.9 47.6 2.4 31.9-17.7 55.7-49.6 59.6-9.9 1.2-19.9 1.9-29.9 1.9-31.7.2-63.3.1-95 .1h-5.8v-43.8h18.9V44.4H1128V.2c36.3.1 72.7.1 109 .1zm-32.3 128.8c0 14.9-.1 28.5.1 42.2 0 .9 2 2.7 3 2.7 8.3 0 16.7 0 24.9-.7 6.1-.5 11.7-2.8 15.1-8.4 8-13.2.4-31.6-14.7-34.2-9-1.6-18.4-1.1-28.4-1.6zm.2-40.5c8.7-.5 16.9-.2 24.8-1.6 9.6-1.7 16.2-11 16.3-21.2 0-10.2-5.9-19.7-14.7-21.3-8.5-1.5-17.4-1.4-26.4-2v46.1z M1743 103.3c-7.5-.1-15-.4-22.4-.2-1.1 0-3.2 1.9-3.2 3-.2 18.8-.6 37.7.1 56.5.4 12.3 7.9 17.4 20 15.2 1-.2 2-.2 3.2-.3.2 1.2.5 2.3.5 3.4 0 10.8 0 21.7.1 32.5 0 2.4-.3 4.2-3.1 4.7-16.5 2.7-32.9 5.1-49.6 1.2-18.7-4.4-27.7-14.3-28.1-33.4-.5-25.5-.2-51-.3-76.5V103h-6.4c-8.3-.1-7.3.9-7.4-7.6V58.5h18.4c0-10.1-.1-19.8 0-29.4.1-10.6-1.5-8.2 8.7-10.7 14.2-3.4 28.5-6.5 43.5-10v49.9h26v45z M1569.2 119.2c0-5.4.3-10-.1-14.6-.6-8.5-6.1-14.1-13.8-14.3-7.7-.2-14.1 5.5-15.3 13.7-.3 1.8-.3 3.6-.5 5.8h-53.3c-1.9-20.2 8.6-38.7 28.2-47.2 28.5-12.3 57.2-11.2 85.1 2.2 17.1 8.2 25.9 22.7 26.2 41.7.4 20.3.2 40.7.3 61v6.6h12.8v43.8h-66.2c-.5-5.4-1-11-1.6-17.4-1.5 1.7-2.5 2.7-3.4 3.8-17.3 21.3-50.3 21.2-67.2 11.3-13.4-7.9-19.2-20.5-20.1-35.4-2-32.6 15.1-53.7 48.1-58.7 11.6-1.8 23.5-1.6 35.3-2.3 1.6-.1 3.2 0 5.5 0zm.7 28.2c-5.4 0-9.7-.6-13.9.1-12.9 2.1-19.5 11.1-18.1 24.1 1.2 10.7 10.4 16.1 20.3 11.9 5.3-2.2 8.9-6.3 9.7-11.8 1.2-7.9 1.4-16 2-24.3z M1475.6 149.2h-98.5c0 9.7.1 18.9 5.6 27 4.2 6.2 10.6 7.7 17.6 7 6.8-.7 11.9-4.1 14.6-10.5 1.2-2.7 1.8-5.7 2.8-9h54.4c-2.2 17.5-8.9 32.5-23.3 43.3-17 12.8-36.8 15.8-57.3 14.4-8.4-.5-16.9-2-25-4.5-21.4-6.5-36-20.6-42.8-41.9-8-25-8.2-50.2 1.1-74.9 10.3-27.1 31.1-41 59.2-44.8 13.7-1.8 27.3-.7 40.5 3.4 28.2 8.7 43.2 28.8 47.9 57 2.1 10.8 2.3 21.8 3.2 33.5zm-58.1-30.5c.1-9-.9-17.2-9.5-21.8-7.3-3.9-14.9-4-21.6 1.2-6.6 5.1-7.8 12.5-7.3 20.6h38.4z" />
</svg>

Before

Width:  |  Height:  |  Size: 4.4 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 8.8 KiB

View File

@ -38,7 +38,10 @@ function remarkCodeBlocks(userOptions = {}) {
}
// Overwrite the code text with the rest of the lines
node.value = lines.slice(1).join('\n')
} else if (firstLine && /^https:\/\/github.com/.test(firstLine)) {
} else if (
(firstLine && /^https:\/\/github.com/.test(firstLine)) ||
firstLine.startsWith('%%GITHUB_')
) {
// GitHub URL
attrs.github = 'true'
}

View File

@ -76,7 +76,7 @@
.banner
background: var(--color-theme)
color: var(--color-back)
padding: 5rem
padding: 1rem 5rem
margin-bottom: var(--spacing-md)
background-size: cover
@ -143,21 +143,3 @@
.banner-button-element
background: var(--color-theme)
.logos
text-align: center
padding-bottom: 1rem
& + &
padding-bottom: 7.5rem
.logos-content
display: flex
justify-content: center
align-items: center
flex-flow: row wrap
margin-bottom: 0
.logo
display: inline-block
padding: 1.8rem

View File

@ -84,7 +84,6 @@
--syntax-function: hsl(195, 70%, 54%)
--syntax-keyword: hsl(343, 100%, 68%)
--syntax-operator: var(--syntax-keyword)
--syntax-regex: hsl(45, 90%, 55%)
// Other
--color-inline-code-text: var(--color-dark)
@ -345,9 +344,6 @@ body [id]:target
&.atrule, &.function, &.selector
color: var(--syntax-function)
&.regex, &.important
color: var(--syntax-regex)
&.keyword
color: var(--syntax-keyword)
@ -528,11 +524,14 @@ body [id]:target
.gatsby-resp-image-figure
margin-bottom: var(--spacing-md)
.gatsby-resp-image-figcaption
.gatsby-resp-image-figcaption, .caption
font: var(--font-size-xs)/var(--line-height-md) var(--font-primary)
color: var(--color-subtle-dark)
padding-top: 0.75rem
padding-left: 3rem
code
p
font: inherit
code, a
color: inherit

View File

@ -6,6 +6,9 @@
margin-bottom: var(--spacing-md)
max-width: 100%
figure > .root
margin-bottom: var(--spacing-sm)
.fixed
table-layout: fixed
@ -54,6 +57,21 @@
border-bottom: 2px solid var(--color-theme)
vertical-align: bottom
.th-rotated
height: 100px
.tx
transform: translate(15px, 0) rotate(315deg) // 45 = 360 - 45
width: 30px
white-space: nowrap
& > span
padding-left: 0.5rem
[data-tooltip]
display: inline-block
transform: rotate(-316deg)
.divider
height: 0
border-bottom: 1px solid var(--color-subtle)

View File

@ -19,7 +19,7 @@ import Footer from '../components/footer'
import SEO from '../components/seo'
import Link from '../components/link'
import Section, { Hr } from '../components/section'
import { Table, Tr, Th, Td } from '../components/table'
import { Table, Tr, Th, Tx, Td } from '../components/table'
import { Pre, Code, InlineCode, TypeAnnotation } from '../components/code'
import { Ol, Ul, Li } from '../components/list'
import { H2, H3, H4, H5, P, Abbr, Help } from '../components/typography'
@ -64,6 +64,7 @@ const scopeComponents = {
Infobox,
Table,
Tr,
Tx,
Th,
Td,
Help,

View File

@ -12,16 +12,21 @@ import {
LandingDemo,
LandingBannerGrid,
LandingBanner,
LandingLogos,
} from '../components/landing'
import { H2 } from '../components/typography'
import { Ul, Li } from '../components/list'
import { InlineCode } from '../components/code'
import Button from '../components/button'
import Link from '../components/link'
import QuickstartTraining from './quickstart-training'
import Project from './project'
import courseImage from '../../docs/images/course.jpg'
import prodigyImage from '../../docs/images/prodigy_overview.jpg'
import projectsImage from '../../docs/images/projects.png'
import irlBackground from '../images/spacy-irl.jpg'
import BenchmarksChoi from 'usage/_benchmarks-choi.md'
import Benchmarks from 'usage/_benchmarks-models.md'
const CODE_EXAMPLE = `# pip install spacy
# python -m spacy download en_core_web_sm
@ -82,8 +87,7 @@ const Landing = ({ data }) => {
<LandingCard title="Get things done" url="/usage/spacy-101" button="Get started">
spaCy is designed to help you do real work to build real products, or gather
real insights. The library respects your time, and tries to avoid wasting it.
It's easy to install, and its API is simple and productive. We like to think of
spaCy as the Ruby on Rails of Natural Language Processing.
It's easy to install, and its API is simple and productive.
</LandingCard>
<LandingCard
title="Blazing fast"
@ -91,16 +95,14 @@ const Landing = ({ data }) => {
button="Facts &amp; Figures"
>
spaCy excels at large-scale information extraction tasks. It's written from the
ground up in carefully memory-managed Cython. Independent research in 2015 found
spaCy to be the fastest in the world. If your application needs to process
entire web dumps, spaCy is the library you want to be using.
ground up in carefully memory-managed Cython. If your application needs to
process entire web dumps, spaCy is the library you want to be using.
</LandingCard>
<LandingCard title="Deep learning" url="/usage/training" button="Read more">
spaCy is the best way to prepare text for deep learning. It interoperates
seamlessly with TensorFlow, PyTorch, scikit-learn, Gensim and the rest of
Python's awesome AI ecosystem. With spaCy, you can easily construct
linguistically sophisticated statistical models for a variety of NLP problems.
<LandingCard title="Awesome ecosystem" url="/usage/projects" button="Read more">
In the five years since its release, spaCy has become an industry standard with
a huge ecosystem. Choose from a variety of plugins, integrate with your machine
learning stack and build custom components and workflows.
</LandingCard>
</LandingGrid>
@ -110,43 +112,154 @@ const Landing = ({ data }) => {
<LandingCol>
<H2>Features</H2>
<Ul>
<Li>
Non-destructive <strong>tokenization</strong>
</Li>
<Li>
<strong>Named entity</strong> recognition
</Li>
<Li>
Support for <strong>{counts.langs}+ languages</strong>
</Li>
<Li>
<strong>{counts.models} statistical models</strong> for{' '}
<strong>{counts.models} trained pipelines</strong> for{' '}
{counts.modelLangs} languages
</Li>
<Li>
Multi-task learning with pretrained <strong>transformers</strong> like
BERT
</Li>
<Li>
Pretrained <strong>word vectors</strong>
</Li>
<Li>State-of-the-art speed</Li>
<Li>
Easy <strong>deep learning</strong> integration
Production-ready <strong>training system</strong>
</Li>
<Li>
Linguistically-motivated <strong>tokenization</strong>
</Li>
<Li>
Components for <strong>named entity</strong> recognition,
part-of-speech-tagging, dependency parsing, sentence segmentation,{' '}
<strong>text classification</strong>, lemmatization, morphological
analysis, entity linking and more
</Li>
<Li>
Easily extensible with <strong>custom components</strong> and attributes
</Li>
<Li>
Support for custom models in <strong>PyTorch</strong>,{' '}
<strong>TensorFlow</strong> and other frameworks
</Li>
<Li>Part-of-speech tagging</Li>
<Li>Labelled dependency parsing</Li>
<Li>Syntax-driven sentence segmentation</Li>
<Li>
Built in <strong>visualizers</strong> for syntax and NER
</Li>
<Li>Convenient string-to-hash mapping</Li>
<Li>Export to numpy data arrays</Li>
<Li>Efficient binary serialization</Li>
<Li>
Easy <strong>model packaging</strong> and deployment
Easy <strong>model packaging</strong>, deployment and workflow
management
</Li>
<Li>Robust, rigorously evaluated accuracy</Li>
</Ul>
</LandingCol>
</LandingGrid>
<LandingBannerGrid>
<LandingBanner
label="New in v3.0"
title="Transformer-based pipelines, new training system, project templates &amp; more"
to="/usage/v3"
button="See what's new"
small
>
spaCy v3.0 features all new <strong>transformer-based pipelines</strong> that
bring spaCy's accuracy right up to the current <strong>state-of-the-art</strong>
. You can use any pretrained transformer to train your own pipelines, and even
share one transformer between multiple components with{' '}
<strong>multi-task learning</strong>. Training is now fully configurable and
extensible, and you can define your own custom models using{' '}
<strong>PyTorch</strong>, <strong>TensorFlow</strong> and other frameworks. The
new spaCy projects system lets you describe whole{' '}
<strong>end-to-end workflows</strong> in a single file, giving you an easy path
from prototype to production, and making it easy to clone and adapt
best-practice projects for your own use cases.
</LandingBanner>
<LandingBanner
title="Prodigy: Radically efficient machine teaching"
label="From the makers of spaCy"
to="https://prodi.gy"
button="Try it out"
background="#f6f6f6"
color="#000"
small
>
<Link to="https://prodi.gy" hidden>
{/** Update image */}
<img
src={prodigyImage}
alt="Prodigy: Radically efficient machine teaching"
/>
</Link>
<br />
<br />
Prodigy is an <strong>annotation tool</strong> so efficient that data scientists
can do the annotation themselves, enabling a new level of rapid iteration.
Whether you're working on entity recognition, intent detection or image
classification, Prodigy can help you <strong>train and evaluate</strong> your
models faster.
</LandingBanner>
</LandingBannerGrid>
<LandingGrid cols={2} style={{ gridTemplateColumns: '1fr calc(80ch + 14rem)' }}>
<LandingCol>
<H2>Reproducible training for custom pipelines</H2>
<p>
spaCy v3.0 introduces a comprehensive and extensible system for{' '}
<strong>configuring your training runs</strong>. Your configuration file
will describe every detail of your training run, with no hidden defaults,
making it easy to <strong>rerun your experiments</strong> and track changes.
You can use the quickstart widget or the{' '}
<Link to="/api/cli#init-config">
<InlineCode>init config</InlineCode>
</Link>{' '}
command to get started, or clone a project template for an end-to-end
workflow.
</p>
<p>
<Button to="/usage/training">Get started</Button>
</p>
</LandingCol>
<LandingCol>
<QuickstartTraining />
</LandingCol>
</LandingGrid>
<LandingGrid cols={2}>
<LandingCol>
<Link to="/usage/projects" hidden>
<img src={projectsImage} />
</Link>
<br />
<br />
<br />
{/** TODO: update with actual example */}
<Project id="some_example">
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus interdum
sodales lectus.
</Project>
</LandingCol>
<LandingCol>
<H2>End-to-end workflows from prototype to production</H2>
<p>
spaCy's new project system gives you a smooth path from prototype to
production. It lets you keep track of all those{' '}
<strong>data transformation</strong>, preprocessing and{' '}
<strong>training steps</strong>, so you can make sure your project is always
ready to hand over for automation. It features source asset download,
command execution, checksum verification, and caching with a variety of
backends and integrations.
</p>
<p>
<Button to="/usage/projects">Try it out</Button>
</p>
</LandingCol>
</LandingGrid>
<LandingBannerGrid>
<LandingBanner
to="https://course.spacy.io"
@ -169,68 +282,41 @@ const Landing = ({ data }) => {
<strong>55 exercises</strong> featuring videos, slide decks, multiple-choice
questions and interactive coding practice in the browser.
</LandingBanner>
<LandingBanner
title="Prodigy: Radically efficient machine teaching"
label="From the makers of spaCy"
to="https://prodi.gy"
button="Try it out"
background="#eee"
color="#000"
title="spaCy IRL: Two days of NLP"
label="Watch the videos"
to="https://www.youtube.com/playlist?list=PLBmcuObd5An4UC6jvK_-eSl6jCvP1gwXc"
button="Watch the videos"
background="#ffc194"
backgroundImage={irlBackground}
color="#1a1e23"
small
>
Prodigy is an <strong>annotation tool</strong> so efficient that data scientists
can do the annotation themselves, enabling a new level of rapid iteration.
Whether you're working on entity recognition, intent detection or image
classification, Prodigy can help you <strong>train and evaluate</strong> your
models faster. Stream in your own examples or real-world data from live APIs,
update your model in real-time and chain models together to build more complex
systems.
We were pleased to invite the spaCy community and other folks working on NLP to
Berlin for a small and intimate event. We booked a beautiful venue, hand-picked
an awesome lineup of speakers and scheduled plenty of social time to get to know
each other. The YouTube playlist includes 12 talks about NLP research,
development and applications, with keynotes by Sebastian Ruder (DeepMind) and
Yoav Goldberg (Allen AI).
</LandingBanner>
</LandingBannerGrid>
<LandingLogos title="spaCy is trusted by" logos={data.logosUsers}>
<Button to={`https://github.com/${data.repo}/stargazers`}>and many more</Button>
</LandingLogos>
<LandingLogos title="Featured on" logos={data.logosPublications} />
<LandingBanner
title="BERT-style language model pretraining"
label="New in v2.1"
to="/usage/v2-1"
button="Read more"
>
Learn more from small training corpora by initializing your models with{' '}
<strong>knowledge from raw text</strong>. The new pretrain command teaches spaCy's
CNN model to predict words based on their context, producing representations of
words in contexts. If you've seen Google's BERT system or fast.ai's ULMFiT, spaCy's
pretraining is similar but much more efficient. It's still experimental, but users
are already reporting good results, so give it a try!
</LandingBanner>
<LandingGrid cols={2}>
<LandingGrid cols={2} style={{ gridTemplateColumns: '1fr 60%' }}>
<LandingCol>
<H2>Benchmarks</H2>
<p>
In 2015, independent researchers from Emory University and Yahoo! Labs
showed that spaCy offered the{' '}
<strong>fastest syntactic parser in the world</strong> and that its accuracy
was <strong>within 1% of the best</strong> available (
<Link to="https://aclweb.org/anthology/P/P15/P15-1038.pdf">
Choi et al., 2015
</Link>
). spaCy v2.0, released in 2017, is more accurate than any of the systems
Choi et al. evaluated.
spaCy v3.0 introduces transformer-based pipelines that bring spaCy's
accuracy right up to the current <strong>state-of-the-art</strong>. You can
also use a CPU-optimized pipeline, which is less accurate but much cheaper
to run.
</p>
<p>
<Button to="/usage/facts-figures#benchmarks" large>
See details
</Button>
<Button to="/usage/facts-figures#benchmarks">See details</Button>
</p>
</LandingCol>
<LandingCol>
<BenchmarksChoi />
<Benchmarks />
</LandingCol>
</LandingGrid>
</>
@ -245,18 +331,6 @@ Landing.propTypes = {
models: PropTypes.arrayOf(PropTypes.string),
})
),
logosUsers: PropTypes.arrayOf(
PropTypes.shape({
id: PropTypes.string.isRequired,
url: PropTypes.string.isRequired,
})
),
logosPublications: PropTypes.arrayOf(
PropTypes.shape({
id: PropTypes.string.isRequired,
url: PropTypes.string.isRequired,
})
),
}),
}
@ -274,14 +348,6 @@ const landingQuery = graphql`
models
starters
}
logosUsers {
id
url
}
logosPublications {
id
url
}
}
}
}

View File

@ -58,7 +58,8 @@ const QuickstartInstall = ({ id, title }) => (
<StaticQuery
query={query}
render={({ site }) => {
const models = site.siteMetadata.languages.filter(({ models }) => models !== null)
const { nightly, languages } = site.siteMetadata
const models = languages.filter(({ models }) => models !== null)
const data = [
...DATA,
{
@ -82,7 +83,10 @@ const QuickstartInstall = ({ id, title }) => (
</QS>
<QS package="pip">pip install -U spacy</QS>
<QS package="conda">conda install -c conda-forge spacy</QS>
<QS package="source">git clone https://github.com/{repo}</QS>
<QS package="source">
git clone https://github.com/{repo}
{nightly ? ` --branch develop` : ''}
</QS>
<QS package="source">cd spaCy</QS>
<QS package="source" os="linux">
export PYTHONPATH=`pwd`
@ -127,6 +131,7 @@ const query = graphql`
query QuickstartInstallQuery {
site {
siteMetadata {
nightly
languages {
code
name

View File

@ -47,7 +47,7 @@ const DATA = [
},
]
export default function QuickstartTraining({ id, title, download = 'config.cfg' }) {
export default function QuickstartTraining({ id, title, download = 'base_config.cfg' }) {
const [lang, setLang] = useState(DEFAULT_LANG)
const [components, setComponents] = useState([])
const [[hardware], setHardware] = useState([DEFAULT_HARDWARE])