diff --git a/spacy/about.py b/spacy/about.py index cad6158da..0f8eee0ff 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -1,6 +1,6 @@ # fmt: off __title__ = "spacy" -__version__ = "3.6.0" +__version__ = "3.6.1" __download_url__ = "https://github.com/explosion/spacy-models/releases/download" __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json" __projects__ = "https://github.com/explosion/projects" diff --git a/spacy/lang/tr/examples.py b/spacy/lang/tr/examples.py index dfb324a4e..c912c950d 100644 --- a/spacy/lang/tr/examples.py +++ b/spacy/lang/tr/examples.py @@ -15,4 +15,7 @@ sentences = [ "Türkiye'nin başkenti neresi?", "Bakanlar Kurulu 180 günlük eylem planını açıkladı.", "Merkez Bankası, beklentiler doğrultusunda faizlerde değişikliğe gitmedi.", + "Cemal Sureya kimdir?", + "Bunlari Biliyor muydunuz?", + "Altinoluk Turkiye haritasinin neresinde yer alir?", ] diff --git a/spacy/ml/models/tok2vec.py b/spacy/ml/models/tok2vec.py index 2e9d21ef4..0edc89991 100644 --- a/spacy/ml/models/tok2vec.py +++ b/spacy/ml/models/tok2vec.py @@ -67,8 +67,8 @@ def build_hash_embed_cnn_tok2vec( are between 2 and 8. window_size (int): The number of tokens on either side to concatenate during the convolutions. The receptive field of the CNN will be - depth * (window_size * 2 + 1), so a 4-layer network with window_size of - 2 will be sensitive to 20 words at a time. Recommended value is 1. + depth * window_size * 2 + 1, so a 4-layer network with window_size of + 2 will be sensitive to 17 words at a time. Recommended value is 1. embed_size (int): The number of rows in the hash embedding tables. This can be surprisingly small, due to the use of the hash embeddings. Recommended values are between 2000 and 10000. diff --git a/website/docs/api/architectures.mdx b/website/docs/api/architectures.mdx index bab24f13b..a292194e9 100644 --- a/website/docs/api/architectures.mdx +++ b/website/docs/api/architectures.mdx @@ -83,7 +83,7 @@ consisting of a CNN and a layer-normalized maxout activation function. | `width` | The width of the input and output. These are required to be the same, so that residual connections can be used. Recommended values are `96`, `128` or `300`. ~~int~~ | | `depth` | The number of convolutional layers to use. Recommended values are between `2` and `8`. ~~int~~ | | `embed_size` | The number of rows in the hash embedding tables. This can be surprisingly small, due to the use of the hash embeddings. Recommended values are between `2000` and `10000`. ~~int~~ | -| `window_size` | The number of tokens on either side to concatenate during the convolutions. The receptive field of the CNN will be `depth * (window_size * 2 + 1)`, so a 4-layer network with a window size of `2` will be sensitive to 20 words at a time. Recommended value is `1`. ~~int~~ | +| `window_size` | The number of tokens on either side to concatenate during the convolutions. The receptive field of the CNN will be `depth * window_size * 2 + 1`, so a 4-layer network with a window size of `2` will be sensitive to 17 words at a time. Recommended value is `1`. ~~int~~ | | `maxout_pieces` | The number of pieces to use in the maxout non-linearity. If `1`, the [`Mish`](https://thinc.ai/docs/api-layers#mish) non-linearity is used instead. Recommended values are `1`-`3`. ~~int~~ | | `subword_features` | Whether to also embed subword features, specifically the prefix, suffix and word shape. This is recommended for alphabetic languages like English, but not if single-character tokens are used for a language such as Chinese. ~~bool~~ | | `pretrained_vectors` | Whether to also use static vectors. ~~bool~~ | diff --git a/website/docs/api/large-language-models.mdx b/website/docs/api/large-language-models.mdx index 0edaaf946..b9788ed8f 100644 --- a/website/docs/api/large-language-models.mdx +++ b/website/docs/api/large-language-models.mdx @@ -893,7 +893,7 @@ OpenAI's `davinci` model family. > > ```ini > [components.llm.model] -> @llm_models = "spacy.Davinci.v1 " +> @llm_models = "spacy.Davinci.v1" > name = "davinci" > config = {"temperature": 0.3} > ``` @@ -914,7 +914,7 @@ OpenAI's `curie` model family. > > ```ini > [components.llm.model] -> @llm_models = "spacy.Curie.v1 " +> @llm_models = "spacy.Curie.v1" > name = "curie" > config = {"temperature": 0.3} > ``` @@ -935,7 +935,7 @@ OpenAI's `babbage` model family. > > ```ini > [components.llm.model] -> @llm_models = "spacy.Babbage.v1 " +> @llm_models = "spacy.Babbage.v1" > name = "babbage" > config = {"temperature": 0.3} > ``` @@ -956,7 +956,7 @@ OpenAI's `ada` model family. > > ```ini > [components.llm.model] -> @llm_models = "spacy.Ada.v1 " +> @llm_models = "spacy.Ada.v1" > name = "ada" > config = {"temperature": 0.3} > ``` @@ -977,7 +977,7 @@ Cohere's `command` model family. > > ```ini > [components.llm.model] -> @llm_models = "spacy.Command.v1 " +> @llm_models = "spacy.Command.v1" > name = "command" > config = {"temperature": 0.3} > ``` @@ -998,7 +998,7 @@ Anthropic's `claude-2` model family. > > ```ini > [components.llm.model] -> @llm_models = "spacy.Claude-2.v1 " +> @llm_models = "spacy.Claude-2.v1" > name = "claude-2" > config = {"temperature": 0.3} > ``` @@ -1019,7 +1019,7 @@ Anthropic's `claude-1` model family. > > ```ini > [components.llm.model] -> @llm_models = "spacy.Claude-1.v1 " +> @llm_models = "spacy.Claude-1.v1" > name = "claude-1" > config = {"temperature": 0.3} > ``` @@ -1040,7 +1040,7 @@ Anthropic's `claude-instant-1` model family. > > ```ini > [components.llm.model] -> @llm_models = "spacy.Claude-instant-1.v1 " +> @llm_models = "spacy.Claude-instant-1.v1" > name = "claude-instant-1" > config = {"temperature": 0.3} > ``` @@ -1061,7 +1061,7 @@ Anthropic's `claude-instant-1.1` model family. > > ```ini > [components.llm.model] -> @llm_models = "spacy.Claude-instant-1-1.v1 " +> @llm_models = "spacy.Claude-instant-1-1.v1" > name = "claude-instant-1.1" > config = {"temperature": 0.3} > ``` @@ -1082,7 +1082,7 @@ Anthropic's `claude-1.0` model family. > > ```ini > [components.llm.model] -> @llm_models = "spacy.Claude-1-0.v1 " +> @llm_models = "spacy.Claude-1-0.v1" > name = "claude-1.0" > config = {"temperature": 0.3} > ``` @@ -1124,7 +1124,7 @@ Anthropic's `claude-1.3` model family. > > ```ini > [components.llm.model] -> @llm_models = "spacy.Claude-1-3.v1 " +> @llm_models = "spacy.Claude-1-3.v1" > name = "claude-1.3" > config = {"temperature": 0.3} > ``` diff --git a/website/docs/api/spancategorizer.mdx b/website/docs/api/spancategorizer.mdx index 2b63d31ce..bfe33dfb9 100644 --- a/website/docs/api/spancategorizer.mdx +++ b/website/docs/api/spancategorizer.mdx @@ -521,7 +521,7 @@ has two columns, indicating the start and end position. | Name | Description | | ----------- | ---------------------------------------------------------------------------- | | `min_size` | The minimal phrase lengths to suggest (inclusive). ~~[int]~~ | -| `max_size` | The maximal phrase lengths to suggest (exclusive). ~~[int]~~ | +| `max_size` | The maximal phrase lengths to suggest (inclusive). ~~[int]~~ | | **CREATES** | The suggester function. ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~ | ### spacy.preset_spans_suggester.v1 {id="preset_spans_suggester"} diff --git a/website/docs/api/top-level.mdx b/website/docs/api/top-level.mdx index 37e86a4bc..9cdc0c8ab 100644 --- a/website/docs/api/top-level.mdx +++ b/website/docs/api/top-level.mdx @@ -68,7 +68,7 @@ weights, and returns it. cls = spacy.util.get_lang_class(lang) # 1. Get Language class, e.g. English nlp = cls() # 2. Initialize it for name in pipeline: - nlp.add_pipe(name) # 3. Add the component to the pipeline + nlp.add_pipe(name, config={...}) # 3. Add the component to the pipeline nlp.from_disk(data_path) # 4. Load in the binary data ``` diff --git a/website/docs/usage/processing-pipelines.mdx b/website/docs/usage/processing-pipelines.mdx index 307cb9dcb..6ec8a0513 100644 --- a/website/docs/usage/processing-pipelines.mdx +++ b/website/docs/usage/processing-pipelines.mdx @@ -244,7 +244,7 @@ tagging pipeline. This is also why the pipeline state is always held by the together and returns an instance of `Language` with a pipeline set and access to the binary data: -```python {title="spacy.load under the hood"} +```python {title="spacy.load under the hood (abstract example)"} lang = "en" pipeline = ["tok2vec", "tagger", "parser", "ner", "attribute_ruler", "lemmatizer"] data_path = "path/to/en_core_web_sm/en_core_web_sm-3.0.0" @@ -252,7 +252,7 @@ data_path = "path/to/en_core_web_sm/en_core_web_sm-3.0.0" cls = spacy.util.get_lang_class(lang) # 1. Get Language class, e.g. English nlp = cls() # 2. Initialize it for name in pipeline: - nlp.add_pipe(name) # 3. Add the component to the pipeline + nlp.add_pipe(name, config={...}) # 3. Add the component to the pipeline nlp.from_disk(data_path) # 4. Load in the binary data ``` diff --git a/website/meta/universe.json b/website/meta/universe.json index 2ed8b4b41..ec380f847 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -4444,6 +4444,31 @@ }, "category": ["pipeline", "standalone", "scientific"], "tags": ["ner"] + }, + { + "id": "hobbit-spacy", + "title": "Hobbit spaCy", + "slogan": "NLP for Middle Earth", + "description": "Hobbit spaCy is a custom spaCy pipeline designed specifically for working with Middle Earth and texts from the world of J.R.R. Tolkien.", + "github": "wjbmattingly/hobbit-spacy", + "pip": "en-hobbit", + "code_example": [ + "import spacy", + "", + "nlp = spacy.load('en_hobbit')", + "doc = nlp('Frodo saw Glorfindel and Glóin; and in a corner alone Strider was sitting, clad in his old travel - worn clothes again')" + ], + "code_language": "python", + "thumb": "https://github.com/wjbmattingly/hobbit-spacy/blob/main/images/hobbit-thumbnail.png?raw=true", + "image": "https://github.com/wjbmattingly/hobbit-spacy/raw/main/images/hobbitspacy.png", + "author": "W.J.B. Mattingly", + "author_links": { + "twitter": "wjb_mattingly", + "github": "wjbmattingly", + "website": "https://wjbmattingly.com" + }, + "category": ["pipeline", "standalone"], + "tags": ["spans", "rules", "ner"] } ], diff --git a/website/netlify.toml b/website/netlify.toml index db7ae27c4..a99395918 100644 --- a/website/netlify.toml +++ b/website/netlify.toml @@ -16,3 +16,9 @@ NETLIFY_NEXT_PLUGIN_SKIP = "true" [[plugins]] package = "@netlify/plugin-nextjs" + +[[headers]] + for = "/*" + [headers.values] + X-Frame-Options = "DENY" + X-XSS-Protection = "1; mode=block"