From 458bc5f45c5e371bdbef43d58d078436ee496e43 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Tue, 8 Aug 2023 15:04:13 +0200
Subject: [PATCH 1/8] Set version to v3.6.1 (#12892)

---
 spacy/about.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/about.py b/spacy/about.py
index cad6158da..0f8eee0ff 100644
--- a/spacy/about.py
+++ b/spacy/about.py
@@ -1,6 +1,6 @@
 # fmt: off
 __title__ = "spacy"
-__version__ = "3.6.0"
+__version__ = "3.6.1"
 __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
 __projects__ = "https://github.com/explosion/projects"

From d50b8d51e20f4c66ac111e94fdc589e98769c03d Mon Sep 17 00:00:00 2001
From: denizcodeyaa <141595121+denizcodeyaa@users.noreply.github.com>
Date: Fri, 11 Aug 2023 09:38:06 -0400
Subject: [PATCH 2/8] Update examples.py (#12895)

Add: example sentences to improve the Turkish model. Let's get the tr_web_core_sm out in the the world yaa
---
 spacy/lang/tr/examples.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/spacy/lang/tr/examples.py b/spacy/lang/tr/examples.py
index dfb324a4e..c912c950d 100644
--- a/spacy/lang/tr/examples.py
+++ b/spacy/lang/tr/examples.py
@@ -15,4 +15,7 @@ sentences = [
     "Türkiye'nin başkenti neresi?",
     "Bakanlar Kurulu 180 günlük eylem planını açıkladı.",
     "Merkez Bankası, beklentiler doğrultusunda faizlerde değişikliğe gitmedi.",
+    "Cemal Sureya kimdir?",
+    "Bunlari Biliyor muydunuz?",
+    "Altinoluk Turkiye haritasinin neresinde yer alir?",
 ]

From 64b8ee2dbe07ad70321a87cc55b653ef335f5c66 Mon Sep 17 00:00:00 2001
From: William Mattingly <62964060+wjbmattingly@users.noreply.github.com>
Date: Mon, 14 Aug 2023 17:44:14 +0300
Subject: [PATCH 3/8] Update universe.json (#12904)

* Update universe.json

added hobbit-spacy to the universe json

* Update universe.json

removed displacy from hobbit-spacy and added a default text.
---
 website/meta/universe.json | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/website/meta/universe.json b/website/meta/universe.json
index 2ed8b4b41..ec380f847 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -4444,6 +4444,31 @@
             },
             "category": ["pipeline", "standalone", "scientific"],
             "tags": ["ner"]
+        },
+        {
+            "id": "hobbit-spacy",
+            "title": "Hobbit spaCy",
+            "slogan": "NLP for Middle Earth",
+            "description": "Hobbit spaCy is a custom spaCy pipeline designed specifically for working with Middle Earth and texts from the world of J.R.R. Tolkien.",
+            "github": "wjbmattingly/hobbit-spacy",
+            "pip": "en-hobbit",
+            "code_example": [
+                "import spacy",
+                "",
+                "nlp = spacy.load('en_hobbit')",
+                "doc = nlp('Frodo saw Glorfindel and Glóin; and in a corner alone Strider was sitting, clad in his old travel - worn clothes again')"
+            ],
+            "code_language": "python",
+            "thumb": "https://github.com/wjbmattingly/hobbit-spacy/blob/main/images/hobbit-thumbnail.png?raw=true",
+            "image": "https://github.com/wjbmattingly/hobbit-spacy/raw/main/images/hobbitspacy.png",
+            "author": "W.J.B. Mattingly",
+            "author_links": {
+                "twitter": "wjb_mattingly",
+                "github": "wjbmattingly",
+                "website": "https://wjbmattingly.com"
+            },
+            "category": ["pipeline", "standalone"],
+            "tags": ["spans", "rules", "ner"]
         }
     ],
 

From 76a9f9c6c6546ec50cb00fab70dbf5f8ac6e0929 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Wed, 16 Aug 2023 17:28:34 +0200
Subject: [PATCH 4/8] Docs: clarify abstract spacy.load examples (#12889)

---
 website/docs/api/top-level.mdx              | 2 +-
 website/docs/usage/processing-pipelines.mdx | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/website/docs/api/top-level.mdx b/website/docs/api/top-level.mdx
index 37e86a4bc..9cdc0c8ab 100644
--- a/website/docs/api/top-level.mdx
+++ b/website/docs/api/top-level.mdx
@@ -68,7 +68,7 @@ weights, and returns it.
 cls = spacy.util.get_lang_class(lang)  # 1. Get Language class, e.g. English
 nlp = cls()                            # 2. Initialize it
 for name in pipeline:
-    nlp.add_pipe(name)                 # 3. Add the component to the pipeline
+    nlp.add_pipe(name, config={...})   # 3. Add the component to the pipeline
 nlp.from_disk(data_path)               # 4. Load in the binary data
 ```
 
diff --git a/website/docs/usage/processing-pipelines.mdx b/website/docs/usage/processing-pipelines.mdx
index 307cb9dcb..6ec8a0513 100644
--- a/website/docs/usage/processing-pipelines.mdx
+++ b/website/docs/usage/processing-pipelines.mdx
@@ -244,7 +244,7 @@ tagging pipeline. This is also why the pipeline state is always held by the
 together and returns an instance of `Language` with a pipeline set and access to
 the binary data:
 
-```python {title="spacy.load under the hood"}
+```python {title="spacy.load under the hood (abstract example)"}
 lang = "en"
 pipeline = ["tok2vec", "tagger", "parser", "ner", "attribute_ruler", "lemmatizer"]
 data_path = "path/to/en_core_web_sm/en_core_web_sm-3.0.0"
@@ -252,7 +252,7 @@ data_path = "path/to/en_core_web_sm/en_core_web_sm-3.0.0"
 cls = spacy.util.get_lang_class(lang)  # 1. Get Language class, e.g. English
 nlp = cls()                            # 2. Initialize it
 for name in pipeline:
-    nlp.add_pipe(name)                 # 3. Add the component to the pipeline
+    nlp.add_pipe(name, config={...})   # 3. Add the component to the pipeline
 nlp.from_disk(data_path)               # 4. Load in the binary data
 ```
 

From 6dd56868de3c5e8308ef2ad31d7b63e40a87fe01 Mon Sep 17 00:00:00 2001
From: Connor Brinton <connor@brintonium.com>
Date: Mon, 21 Aug 2023 04:52:32 -0400
Subject: [PATCH 5/8] =?UTF-8?q?=F0=9F=93=9D=20Fix=20formula=20for=20recept?=
 =?UTF-8?q?ive=20field=20in=20docs=20(#12918)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SpaCy's HashEmbedCNN layer performs convolutions over tokens to produce
contextualized embeddings using a `MaxoutWindowEncoder` layer. These
convolutions are implemented using Thinc's `expand_window` layer, which
concatenates `window_size` neighboring sequence items on either side of
the sequence item being processed. This is repeated across `depth`
convolutional layers.

For example, consider the sequence "ABCDE" and a `MaxoutWindowEncoder`
layer with a context window of 1 and a depth of 2. We'll focus on the
token "C". We can visually represent the contextual embedding produced
for "C" as:
```mermaid
flowchart LR
A0(A<sub>0</sub>)
B0(B<sub>0</sub>)
C0(C<sub>0</sub>)
D0(D<sub>0</sub>)
E0(E<sub>0</sub>)
B1(B<sub>1</sub>)
C1(C<sub>1</sub>)
D1(D<sub>1</sub>)
C2(C<sub>2</sub>)
A0 --> B1
B0 --> B1
C0 --> B1
B0 --> C1
C0 --> C1
D0 --> C1
C0 --> D1
D0 --> D1
E0 --> D1
B1 --> C2
C1 --> C2
D1 --> C2
```

Described in words, this graph shows that before the first layer of the
convolution, the "receptive field" centered at each token consists only
of that same token. That is to say, that we have a receptive field of 1.
The first layer of the convolution adds one neighboring token on either
side to the receptive field. Since this is done on both sides, the
receptive field increases by 2, giving the first layer a receptive field
of 3. The second layer of the convolutions adds an _additional_
neighboring token on either side to the receptive field, giving a final
receptive field of 5.

However, this doesn't match the formula currently given in the docs,
which read:
> The receptive field of the CNN will be
> `depth * (window_size * 2 + 1)`, so a 4-layer network with a window
> size of `2` will be sensitive to 20 words at a time.

Substituting in our depth of 2 and window size of 1, this formula gives
us a receptive field of:
```
depth * (window_size * 2 + 1)
= 2 * (1 * 2 + 1)
= 2 * (2 + 1)
= 2 * 3
= 6
```

This not only doesn't match our computations from above, it's also an
even number! This is suspicious, since the receptive field is supposed
to be centered on a token, and not between tokens. Generally, this
formula results in an even number for any even value of `depth`.

The error in this formula is that the adjustment for the center token
is multiplied by the depth, when it should occur only once. The
corrected formula, `depth * window_size * 2 + 1`, gives the correct
value for our small example from above:
```
depth * window_size * 2 + 1
= 2 * 1 * 2 + 1
= 4 + 1
= 5
```

These changes update the docs to correct the receptive field formula and
the example receptive field size.
---
 spacy/ml/models/tok2vec.py         | 4 ++--
 website/docs/api/architectures.mdx | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/spacy/ml/models/tok2vec.py b/spacy/ml/models/tok2vec.py
index 2e9d21ef4..0edc89991 100644
--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@@ -67,8 +67,8 @@ def build_hash_embed_cnn_tok2vec(
         are between 2 and 8.
     window_size (int): The number of tokens on either side to concatenate during
         the convolutions. The receptive field of the CNN will be
-        depth * (window_size * 2 + 1), so a 4-layer network with window_size of
-        2 will be sensitive to 20 words at a time. Recommended value is 1.
+        depth * window_size * 2 + 1, so a 4-layer network with window_size of
+        2 will be sensitive to 17 words at a time. Recommended value is 1.
     embed_size (int): The number of rows in the hash embedding tables. This can
         be surprisingly small, due to the use of the hash embeddings. Recommended
         values are between 2000 and 10000.
diff --git a/website/docs/api/architectures.mdx b/website/docs/api/architectures.mdx
index bab24f13b..a292194e9 100644
--- a/website/docs/api/architectures.mdx
+++ b/website/docs/api/architectures.mdx
@@ -83,7 +83,7 @@ consisting of a CNN and a layer-normalized maxout activation function.
 | `width`              | The width of the input and output. These are required to be the same, so that residual connections can be used. Recommended values are `96`, `128` or `300`. ~~int~~                                                                                                          |
 | `depth`              | The number of convolutional layers to use. Recommended values are between `2` and `8`. ~~int~~                                                                                                                                                                                |
 | `embed_size`         | The number of rows in the hash embedding tables. This can be surprisingly small, due to the use of the hash embeddings. Recommended values are between `2000` and `10000`. ~~int~~                                                                                            |
-| `window_size`        | The number of tokens on either side to concatenate during the convolutions. The receptive field of the CNN will be `depth * (window_size * 2 + 1)`, so a 4-layer network with a window size of `2` will be sensitive to 20 words at a time. Recommended value is `1`. ~~int~~ |
+| `window_size`        | The number of tokens on either side to concatenate during the convolutions. The receptive field of the CNN will be `depth * window_size * 2 + 1`, so a 4-layer network with a window size of `2` will be sensitive to 17 words at a time. Recommended value is `1`. ~~int~~ |
 | `maxout_pieces`      | The number of pieces to use in the maxout non-linearity. If `1`, the [`Mish`](https://thinc.ai/docs/api-layers#mish) non-linearity is used instead. Recommended values are `1`-`3`. ~~int~~                                                                                   |
 | `subword_features`   | Whether to also embed subword features, specifically the prefix, suffix and word shape. This is recommended for alphabetic languages like English, but not if single-character tokens are used for a language such as Chinese. ~~bool~~                                       |
 | `pretrained_vectors` | Whether to also use static vectors. ~~bool~~                                                                                                                                                                                                                                  |

From d8a32c1050d2acb4fd121968d7e8780aae0b1382 Mon Sep 17 00:00:00 2001
From: PD Hall <20580126+pdhall99@users.noreply.github.com>
Date: Tue, 29 Aug 2023 10:10:58 +0100
Subject: [PATCH 6/8] docs: fix ngram_range_suggester max_size description
 (#12939)

---
 website/docs/api/spancategorizer.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/docs/api/spancategorizer.mdx b/website/docs/api/spancategorizer.mdx
index 2b63d31ce..bfe33dfb9 100644
--- a/website/docs/api/spancategorizer.mdx
+++ b/website/docs/api/spancategorizer.mdx
@@ -521,7 +521,7 @@ has two columns, indicating the start and end position.
 | Name        | Description                                                                  |
 | ----------- | ---------------------------------------------------------------------------- |
 | `min_size`  | The minimal phrase lengths to suggest (inclusive). ~~[int]~~                 |
-| `max_size`  | The maximal phrase lengths to suggest (exclusive). ~~[int]~~                 |
+| `max_size`  | The maximal phrase lengths to suggest (inclusive). ~~[int]~~                 |
 | **CREATES** | The suggester function. ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~ |
 
 ### spacy.preset_spans_suggester.v1 {id="preset_spans_suggester"}

From 52758e1afaa99b2ac47e0ae825f0a86d209952f4 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Wed, 30 Aug 2023 11:55:23 +0200
Subject: [PATCH 7/8] Add headers to netlify.toml [ci skip]

---
 website/netlify.toml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/website/netlify.toml b/website/netlify.toml
index db7ae27c4..a99395918 100644
--- a/website/netlify.toml
+++ b/website/netlify.toml
@@ -16,3 +16,9 @@ NETLIFY_NEXT_PLUGIN_SKIP = "true"
 
 [[plugins]]
 package = "@netlify/plugin-nextjs"
+
+[[headers]]
+  for = "/*"
+  [headers.values]
+    X-Frame-Options = "DENY"
+    X-XSS-Protection = "1; mode=block"

From 3e4264899c3b12f8eabc5cd700146177a34824d0 Mon Sep 17 00:00:00 2001
From: vincent d warmerdam <vincentwarmerdam@gmail.com>
Date: Wed, 30 Aug 2023 11:58:14 +0200
Subject: [PATCH 8/8] Update large-language-models.mdx (#12944)

---
 website/docs/api/large-language-models.mdx | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/website/docs/api/large-language-models.mdx b/website/docs/api/large-language-models.mdx
index cc8328790..94b426cc8 100644
--- a/website/docs/api/large-language-models.mdx
+++ b/website/docs/api/large-language-models.mdx
@@ -893,7 +893,7 @@ OpenAI's `davinci` model family.
 >
 > ```ini
 > [components.llm.model]
-> @llm_models = "spacy.Davinci.v1 "
+> @llm_models = "spacy.Davinci.v1"
 > name = "davinci"
 > config = {"temperature": 0.3}
 > ```
@@ -914,7 +914,7 @@ OpenAI's `curie` model family.
 >
 > ```ini
 > [components.llm.model]
-> @llm_models = "spacy.Curie.v1 "
+> @llm_models = "spacy.Curie.v1"
 > name = "curie"
 > config = {"temperature": 0.3}
 > ```
@@ -935,7 +935,7 @@ OpenAI's `babbage` model family.
 >
 > ```ini
 > [components.llm.model]
-> @llm_models = "spacy.Babbage.v1 "
+> @llm_models = "spacy.Babbage.v1"
 > name = "babbage"
 > config = {"temperature": 0.3}
 > ```
@@ -956,7 +956,7 @@ OpenAI's `ada` model family.
 >
 > ```ini
 > [components.llm.model]
-> @llm_models = "spacy.Ada.v1 "
+> @llm_models = "spacy.Ada.v1"
 > name = "ada"
 > config = {"temperature": 0.3}
 > ```
@@ -977,7 +977,7 @@ Cohere's `command` model family.
 >
 > ```ini
 > [components.llm.model]
-> @llm_models = "spacy.Command.v1 "
+> @llm_models = "spacy.Command.v1"
 > name = "command"
 > config = {"temperature": 0.3}
 > ```
@@ -998,7 +998,7 @@ Anthropic's `claude-2` model family.
 >
 > ```ini
 > [components.llm.model]
-> @llm_models = "spacy.Claude-2.v1 "
+> @llm_models = "spacy.Claude-2.v1"
 > name = "claude-2"
 > config = {"temperature": 0.3}
 > ```
@@ -1019,7 +1019,7 @@ Anthropic's `claude-1` model family.
 >
 > ```ini
 > [components.llm.model]
-> @llm_models = "spacy.Claude-1.v1 "
+> @llm_models = "spacy.Claude-1.v1"
 > name = "claude-1"
 > config = {"temperature": 0.3}
 > ```
@@ -1040,7 +1040,7 @@ Anthropic's `claude-instant-1` model family.
 >
 > ```ini
 > [components.llm.model]
-> @llm_models = "spacy.Claude-instant-1.v1 "
+> @llm_models = "spacy.Claude-instant-1.v1"
 > name = "claude-instant-1"
 > config = {"temperature": 0.3}
 > ```
@@ -1061,7 +1061,7 @@ Anthropic's `claude-instant-1.1` model family.
 >
 > ```ini
 > [components.llm.model]
-> @llm_models = "spacy.Claude-instant-1-1.v1 "
+> @llm_models = "spacy.Claude-instant-1-1.v1"
 > name = "claude-instant-1.1"
 > config = {"temperature": 0.3}
 > ```
@@ -1082,7 +1082,7 @@ Anthropic's `claude-1.0` model family.
 >
 > ```ini
 > [components.llm.model]
-> @llm_models = "spacy.Claude-1-0.v1 "
+> @llm_models = "spacy.Claude-1-0.v1"
 > name = "claude-1.0"
 > config = {"temperature": 0.3}
 > ```
@@ -1124,7 +1124,7 @@ Anthropic's `claude-1.3` model family.
 >
 > ```ini
 > [components.llm.model]
-> @llm_models = "spacy.Claude-1-3.v1 "
+> @llm_models = "spacy.Claude-1-3.v1"
 > name = "claude-1.3"
 > config = {"temperature": 0.3}
 > ```