Merge pull request #10215 from explosion/master

update develop
2025-09-18 10:02:40 +03:00 · 2022-02-06 13:45:41 +01:00 · 2022-02-06 13:45:41 +01:00 · 14513f82da
commit 14513f82da
parent fef896ce49 0668a449ba
25 changed files with 325 additions and 61 deletions
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@ -699,9 +699,7 @@ def _get_examples_without_label(data: Sequence[Example], label: str) -> int:
    return count


-def _get_labels_from_model(
-    nlp: Language, factory_name: str
-) -> Set[str]:
+def _get_labels_from_model(nlp: Language, factory_name: str) -> Set[str]:
    pipe_names = [
        pipe_name
        for pipe_name in nlp.pipe_names
@ -714,9 +712,7 @@ def _get_labels_from_model(
    return labels


-def _get_labels_from_spancat(
-    nlp: Language
-) -> Dict[str, Set[str]]:
+def _get_labels_from_spancat(nlp: Language) -> Dict[str, Set[str]]:
    pipe_names = [
        pipe_name
        for pipe_name in nlp.pipe_names
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@ -6,6 +6,11 @@ can help generate the best possible configuration, given a user's requirements.
 [paths]
 train = null
 dev = null
+{% if use_transformer or optimize == "efficiency" or not word_vectors -%}
+vectors = null
+{% else -%}
+vectors = "{{ word_vectors }}"
+{% endif -%}

 [system]
 {% if use_transformer -%}
@ -421,8 +426,4 @@ compound = 1.001
 {% endif %}

 [initialize]
-{% if use_transformer or optimize == "efficiency" or not word_vectors -%}
 vectors = ${paths.vectors}
-{% else -%}
-vectors = "{{ word_vectors }}"
-{% endif -%}
--- a/spacy/lang/hi/lex_attrs.py
+++ b/spacy/lang/hi/lex_attrs.py
@ -90,7 +90,7 @@ _eleven_to_beyond = [
    "अड़सठ",
    "उनहत्तर",
    "सत्तर",
-    "इकहत्तर"
+    "इकहत्तर",
    "बहत्तर",
    "तिहत्तर",
    "चौहत्तर",
--- a/spacy/lang/xx/examples.py
+++ b/spacy/lang/xx/examples.py
@ -59,7 +59,7 @@ sentences = [
    "Czy w ciągu ostatnich 48 godzin spożyłeś leki zawierające paracetamol?",
    "Kto ma ochotę zapoznać się z innymi niż w książkach przygodami Muminków i ich przyjaciół, temu polecam komiks Tove Jansson „Muminki i morze”.",
    "Apple está querendo comprar uma startup do Reino Unido por 100 milhões de dólares.",
-    "Carros autônomos empurram a responsabilidade do seguro para os fabricantes.."
+    "Carros autônomos empurram a responsabilidade do seguro para os fabricantes..",
    "São Francisco considera banir os robôs de entrega que andam pelas calçadas.",
    "Londres é a maior cidade do Reino Unido.",
    # Translations from English:
--- a/spacy/language.py
+++ b/spacy/language.py
@ -354,12 +354,15 @@ class Language:
    @property
    def pipe_labels(self) -> Dict[str, List[str]]:
        """Get the labels set by the pipeline components, if available (if
-        the component exposes a labels property).
+        the component exposes a labels property and the labels are not
+        hidden).

        RETURNS (Dict[str, List[str]]): Labels keyed by component name.
        """
        labels = {}
        for name, pipe in self._components:
+            if hasattr(pipe, "hide_labels") and pipe.hide_labels is True:
+                continue
            if hasattr(pipe, "labels"):
                labels[name] = list(pipe.labels)
        return SimpleFrozenDict(labels)
@ -522,7 +525,7 @@ class Language:
        requires: Iterable[str] = SimpleFrozenList(),
        retokenizes: bool = False,
        func: Optional["Pipe"] = None,
-    ) -> Callable:
+    ) -> Callable[..., Any]:
        """Register a new pipeline component. Can be used for stateless function
        components that don't require a separate factory. Can be used as a
        decorator on a function or classmethod, or called as a function with the
--- a/spacy/matcher/dependencymatcher.pyi
+++ b/spacy/matcher/dependencymatcher.pyi
@ -0,0 +1,66 @@
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from .matcher import Matcher
+from ..vocab import Vocab
+from ..tokens.doc import Doc
+from ..tokens.span import Span
+
+class DependencyMatcher:
+    """Match dependency parse tree based on pattern rules."""
+
+    _patterns: Dict[str, List[Any]]
+    _raw_patterns: Dict[str, List[Any]]
+    _tokens_to_key: Dict[str, List[Any]]
+    _root: Dict[str, List[Any]]
+    _tree: Dict[str, List[Any]]
+    _callbacks: Dict[
+        Any, Callable[[DependencyMatcher, Doc, int, List[Tuple[int, List[int]]]], Any]
+    ]
+    _ops: Dict[str, Any]
+    vocab: Vocab
+    _matcher: Matcher
+    def __init__(self, vocab: Vocab, *, validate: bool = ...) -> None: ...
+    def __reduce__(
+        self,
+    ) -> Tuple[
+        Callable[
+            [Vocab, Dict[str, Any], Dict[str, Callable[..., Any]]], DependencyMatcher
+        ],
+        Tuple[
+            Vocab,
+            Dict[str, List[Any]],
+            Dict[
+                str,
+                Callable[
+                    [DependencyMatcher, Doc, int, List[Tuple[int, List[int]]]], Any
+                ],
+            ],
+        ],
+        None,
+        None,
+    ]: ...
+    def __len__(self) -> int: ...
+    def __contains__(self, key: Union[str, int]) -> bool: ...
+    def add(
+        self,
+        key: Union[str, int],
+        patterns: List[List[Dict[str, Any]]],
+        *,
+        on_match: Optional[
+            Callable[[DependencyMatcher, Doc, int, List[Tuple[int, List[int]]]], Any]
+        ] = ...
+    ) -> None: ...
+    def has_key(self, key: Union[str, int]) -> bool: ...
+    def get(
+        self, key: Union[str, int], default: Optional[Any] = ...
+    ) -> Tuple[
+        Optional[
+            Callable[[DependencyMatcher, Doc, int, List[Tuple[int, List[int]]]], Any]
+        ],
+        List[List[Dict[str, Any]]],
+    ]: ...
+    def remove(self, key: Union[str, int]) -> None: ...
+    def __call__(self, doclike: Union[Doc, Span]) -> List[Tuple[int, List[int]]]: ...
+
+def unpickle_matcher(
+    vocab: Vocab, patterns: Dict[str, Any], callbacks: Dict[str, Callable[..., Any]]
+) -> DependencyMatcher: ...
--- a/spacy/matcher/matcher.pyi
+++ b/spacy/matcher/matcher.pyi
@ -1,4 +1,6 @@
-from typing import Any, List, Dict, Tuple, Optional, Callable, Union, Iterator, Iterable
+from typing import Any, List, Dict, Tuple, Optional, Callable, Union
+from typing import Iterator, Iterable, overload
+from ..compat import Literal
 from ..vocab import Vocab
 from ..tokens import Doc, Span

@ -31,12 +33,22 @@ class Matcher:
    ) -> Union[
        Iterator[Tuple[Tuple[Doc, Any], Any]], Iterator[Tuple[Doc, Any]], Iterator[Doc]
    ]: ...
+    @overload
    def __call__(
        self,
        doclike: Union[Doc, Span],
        *,
-        as_spans: bool = ...,
+        as_spans: Literal[False] = ...,
        allow_missing: bool = ...,
        with_alignments: bool = ...
-    ) -> Union[List[Tuple[int, int, int]], List[Span]]: ...
+    ) -> List[Tuple[int, int, int]]: ...
+    @overload
+    def __call__(
+        self,
+        doclike: Union[Doc, Span],
+        *,
+        as_spans: Literal[True],
+        allow_missing: bool = ...,
+        with_alignments: bool = ...
+    ) -> List[Span]: ...
    def _normalize_key(self, key: Any) -> Any: ...
--- a/spacy/matcher/phrasematcher.pyi
+++ b/spacy/matcher/phrasematcher.pyi
@ -1,6 +1,6 @@
-from typing import List, Tuple, Union, Optional, Callable, Any, Dict
-
-from . import Matcher
+from typing import List, Tuple, Union, Optional, Callable, Any, Dict, overload
+from ..compat import Literal
+from .matcher import Matcher
 from ..vocab import Vocab
 from ..tokens import Doc, Span

@ -21,9 +21,17 @@ class PhraseMatcher:
        ] = ...,
    ) -> None: ...
    def remove(self, key: str) -> None: ...
+    @overload
    def __call__(
        self,
        doclike: Union[Doc, Span],
        *,
-        as_spans: bool = ...,
-    ) -> Union[List[Tuple[int, int, int]], List[Span]]: ...
+        as_spans: Literal[False] = ...,
+    ) -> List[Tuple[int, int, int]]: ...
+    @overload
+    def __call__(
+        self,
+        doclike: Union[Doc, Span],
+        *,
+        as_spans: Literal[True],
+    ) -> List[Span]: ...
--- a/spacy/pipeline/pipe.pyi
+++ b/spacy/pipeline/pipe.pyi
@ -26,6 +26,8 @@ class Pipe:
    @property
    def labels(self) -> Tuple[str, ...]: ...
    @property
+    def hide_labels(self) -> bool: ...
+    @property
    def label_data(self) -> Any: ...
    def _require_labels(self) -> None: ...
    def set_error_handler(
--- a/spacy/pipeline/pipe.pyx
+++ b/spacy/pipeline/pipe.pyx
@ -102,6 +102,10 @@ cdef class Pipe:
    def labels(self) -> Tuple[str, ...]:
        return tuple()

+    @property
+    def hide_labels(self) -> bool:
+        return False
+
    @property
    def label_data(self):
        """Optional JSON-serializable data that would be sufficient to recreate
--- a/spacy/pipeline/senter.pyx
+++ b/spacy/pipeline/senter.pyx
@ -99,6 +99,10 @@ class SentenceRecognizer(Tagger):
        # are 0
        return tuple(["I", "S"])

+    @property
+    def hide_labels(self):
+        return True
+
    @property
    def label_data(self):
        return None
--- a/spacy/pipeline/spancat.py
+++ b/spacy/pipeline/spancat.py
@ -413,7 +413,7 @@ class SpanCategorizer(TrainablePipe):
        self._require_labels()
        if subbatch:
            docs = [eg.x for eg in subbatch]
-            spans = self.suggester(docs)
+            spans = build_ngram_suggester(sizes=[1])(docs)
            Y = self.model.ops.alloc2f(spans.dataXd.shape[0], len(self.labels))
            self.model.initialize(X=(docs, spans), Y=Y)
        else:
--- a/spacy/tests/pipeline/test_senter.py
+++ b/spacy/tests/pipeline/test_senter.py
@ -97,3 +97,7 @@ def test_overfitting_IO():
    ]
    assert_equal(batch_deps_1, batch_deps_2)
    assert_equal(batch_deps_1, no_batch_deps)
+
+    # test internal pipe labels vs. Language.pipe_labels with hidden labels
+    assert nlp.get_pipe("senter").labels == ("I", "S")
+    assert "senter" not in nlp.pipe_labels
--- a/spacy/tests/pipeline/test_spancat.py
+++ b/spacy/tests/pipeline/test_spancat.py
@ -79,7 +79,8 @@ def test_explicit_labels():
    nlp.initialize()
    assert spancat.labels == ("PERSON", "LOC")

-#TODO figure out why this is flaky
+
+# TODO figure out why this is flaky
@pytest.mark.skip(reason="Test is unreliable for unknown reason")
 def test_doc_gc():
    # If the Doc object is garbage collected, the spans won't be functional afterwards
--- a/spacy/tests/tokenizer/test_tokenizer.py
+++ b/spacy/tests/tokenizer/test_tokenizer.py
@ -9,6 +9,7 @@ from spacy.tokenizer import Tokenizer
 from spacy.tokens import Doc
 from spacy.training import Example
 from spacy.util import compile_prefix_regex, compile_suffix_regex, ensure_path
+from spacy.util import compile_infix_regex
 from spacy.vocab import Vocab
 from spacy.symbols import ORTH

@ -503,3 +504,20 @@ def test_tokenizer_prefix_suffix_overlap_lookbehind(en_vocab):
    assert tokens == ["a", "10", "."]
    explain_tokens = [t[1] for t in tokenizer.explain("a10.")]
    assert tokens == explain_tokens
+
+
+def test_tokenizer_infix_prefix(en_vocab):
+    # the prefix and suffix matches overlap in the suffix lookbehind
+    infixes = ["±"]
+    suffixes = ["%"]
+    infix_re = compile_infix_regex(infixes)
+    suffix_re = compile_suffix_regex(suffixes)
+    tokenizer = Tokenizer(
+        en_vocab,
+        infix_finditer=infix_re.finditer,
+        suffix_search=suffix_re.search,
+    )
+    tokens = [t.text for t in tokenizer("±10%")]
+    assert tokens == ["±10", "%"]
+    explain_tokens = [t[1] for t in tokenizer.explain("±10%")]
+    assert tokens == explain_tokens
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@ -683,6 +683,8 @@ cdef class Tokenizer:
                    infixes = infix_finditer(substring)
                    offset = 0
                    for match in infixes:
+                        if offset == 0 and match.start() == 0:
+                            continue
                        if substring[offset : match.start()]:
                            tokens.append(("TOKEN", substring[offset : match.start()]))
                        if substring[match.start() : match.end()]:
--- a/spacy/tokens/doc.pyi
+++ b/spacy/tokens/doc.pyi
@ -10,7 +10,7 @@ from ..lexeme import Lexeme
 from ..vocab import Vocab
 from .underscore import Underscore
 from pathlib import Path
-import numpy
+import numpy as np

 class DocMethod(Protocol):
    def __call__(self: Doc, *args: Any, **kwargs: Any) -> Any: ...  # type: ignore[misc]
@ -26,7 +26,7 @@ class Doc:
    user_hooks: Dict[str, Callable[..., Any]]
    user_token_hooks: Dict[str, Callable[..., Any]]
    user_span_hooks: Dict[str, Callable[..., Any]]
-    tensor: numpy.ndarray
+    tensor: np.ndarray[Any, np.dtype[np.float_]]
    user_data: Dict[str, Any]
    has_unknown_spaces: bool
    _context: Any
@ -144,7 +144,7 @@ class Doc:
    ) -> Doc: ...
    def to_array(
        self, py_attr_ids: Union[int, str, List[Union[int, str]]]
-    ) -> numpy.ndarray: ...
+    ) -> np.ndarray[Any, np.dtype[np.float_]]: ...
    @staticmethod
    def from_docs(
        docs: List[Doc],
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@ -459,8 +459,8 @@ cdef class Span:

    @property
    def ents(self):
-        """The named entities in the span. Returns a tuple of named entity
-        `Span` objects, if the entity recognizer has been applied.
+        """The named entities that fall completely within the span. Returns
+        a tuple of `Span` objects.

        RETURNS (tuple): Entities in the span, one `Span` per entity.

--- a/spacy/tokens/underscore.py
+++ b/spacy/tokens/underscore.py
@ -1,17 +1,31 @@
-from typing import Dict, Any
+from typing import Dict, Any, List, Optional, Tuple, Union, TYPE_CHECKING
 import functools
 import copy
-
 from ..errors import Errors

+if TYPE_CHECKING:
+    from .doc import Doc
+    from .span import Span
+    from .token import Token
+

 class Underscore:
    mutable_types = (dict, list, set)
    doc_extensions: Dict[Any, Any] = {}
    span_extensions: Dict[Any, Any] = {}
    token_extensions: Dict[Any, Any] = {}
+    _extensions: Dict[str, Any]
+    _obj: Union["Doc", "Span", "Token"]
+    _start: Optional[int]
+    _end: Optional[int]

-    def __init__(self, extensions, obj, start=None, end=None):
+    def __init__(
+        self,
+        extensions: Dict[str, Any],
+        obj: Union["Doc", "Span", "Token"],
+        start: Optional[int] = None,
+        end: Optional[int] = None,
+    ):
        object.__setattr__(self, "_extensions", extensions)
        object.__setattr__(self, "_obj", obj)
        # Assumption is that for doc values, _start and _end will both be None
@ -23,12 +37,12 @@ class Underscore:
        object.__setattr__(self, "_start", start)
        object.__setattr__(self, "_end", end)

-    def __dir__(self):
+    def __dir__(self) -> List[str]:
        # Hack to enable autocomplete on custom extensions
        extensions = list(self._extensions.keys())
        return ["set", "get", "has"] + extensions

-    def __getattr__(self, name):
+    def __getattr__(self, name: str) -> Any:
        if name not in self._extensions:
            raise AttributeError(Errors.E046.format(name=name))
        default, method, getter, setter = self._extensions[name]
@ -56,7 +70,7 @@ class Underscore:
                return new_default
            return default

-    def __setattr__(self, name, value):
+    def __setattr__(self, name: str, value: Any):
        if name not in self._extensions:
            raise AttributeError(Errors.E047.format(name=name))
        default, method, getter, setter = self._extensions[name]
@ -65,28 +79,30 @@ class Underscore:
        else:
            self._doc.user_data[self._get_key(name)] = value

-    def set(self, name, value):
+    def set(self, name: str, value: Any):
        return self.__setattr__(name, value)

-    def get(self, name):
+    def get(self, name: str) -> Any:
        return self.__getattr__(name)

-    def has(self, name):
+    def has(self, name: str) -> bool:
        return name in self._extensions

-    def _get_key(self, name):
+    def _get_key(self, name: str) -> Tuple[str, str, Optional[int], Optional[int]]:
        return ("._.", name, self._start, self._end)

    @classmethod
-    def get_state(cls):
+    def get_state(cls) -> Tuple[Dict[Any, Any], Dict[Any, Any], Dict[Any, Any]]:
        return cls.token_extensions, cls.span_extensions, cls.doc_extensions

    @classmethod
-    def load_state(cls, state):
+    def load_state(
+        cls, state: Tuple[Dict[Any, Any], Dict[Any, Any], Dict[Any, Any]]
+    ) -> None:
        cls.token_extensions, cls.span_extensions, cls.doc_extensions = state


-def get_ext_args(**kwargs):
+def get_ext_args(**kwargs: Any):
    """Validate and convert arguments. Reused in Doc, Token and Span."""
    default = kwargs.get("default")
    getter = kwargs.get("getter")
--- a/website/Dockerfile
+++ b/website/Dockerfile
@ -0,0 +1,16 @@
+FROM node:11.15.0 
+
+WORKDIR /spacy-io
+
+RUN npm install -g gatsby-cli@2.7.4
+
+COPY package.json .
+COPY package-lock.json . 
+
+RUN npm install
+
+# This is so the installed node_modules will be up one directory
+# from where a user mounts files, so that they don't accidentally mount
+# their own node_modules from a different build
+# https://nodejs.org/api/modules.html#modules_loading_from_node_modules_folders
+WORKDIR /spacy-io/website/
--- a/website/README.md
+++ b/website/README.md
@ -554,6 +554,42 @@ extensions for your code editor. The
 [`.prettierrc`](https://github.com/explosion/spaCy/tree/master/website/.prettierrc)
 file in the root defines the settings used in this codebase.

+## Building & developing the site with Docker {#docker}
+Sometimes it's hard to get a local environment working due to rapid updates to node dependencies,
+so it may be easier to use docker for building the docs.
+
+If you'd like to do this,
+**be sure you do *not* include your local `node_modules` folder**,
+since there are some dependencies that need to be built for the image system. 
+Rename it before using.
+
+```bash
+docker run -it \
+  -v $(pwd):/spacy-io/website \
+  -p 8000:8000 \
+  ghcr.io/explosion/spacy-io \
+  gatsby develop -H 0.0.0.0
+```
+
+This will allow you to access the built website at http://0.0.0.0:8000/ 
+in your browser, and still edit code in your editor while having the site 
+reflect those changes.
+
+**Note**: If you're working on a Mac with an M1 processor, 
+you might see segfault errors from `qemu` if you use the default image. 
+To fix this use the `arm64` tagged image in the `docker run` command 
+(ghcr.io/explosion/spacy-io:arm64).
+
+### Building the Docker image {#docker-build}
+
+If you'd like to build the image locally, you can do so like this:
+
+```bash
+docker build -t spacy-io .
+```
+
+This will take some time, so if you want to use the prebuilt image you'll save a bit of time.
+
 ## Markdown reference {#markdown}

 All page content and page meta lives in the `.md` files in the `/docs`
--- a/website/docs/api/span.md
+++ b/website/docs/api/span.md
@ -257,8 +257,8 @@ shape `(N, M)`, where `N` is the length of the document. The values will be

 ## Span.ents {#ents tag="property" new="2.0.13" model="ner"}

-The named entities in the span. Returns a tuple of named entity `Span` objects,
-if the entity recognizer has been applied.
+The named entities that fall completely within the span. Returns a tuple of
+`Span` objects.

 > #### Example
 >
--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@ -831,6 +831,8 @@ def tokenizer_pseudo_code(
                infixes = infix_finditer(substring)
                offset = 0
                for match in infixes:
+                    if offset == 0 and match.start() == 0:
+                        continue
                    tokens.append(substring[offset : match.start()])
                    tokens.append(substring[match.start() : match.end()])
                    offset = match.end()
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@ -141,7 +141,8 @@
                "website": "https://www.nr.no/~plison"
            },
            "category": ["pipeline", "standalone", "research", "training"],
-            "tags": []
+            "tags": [],
+            "spacy_version": 3
        },
        {
            "id": "numerizer",
@ -977,6 +978,48 @@
            "category": ["pipeline"],
            "tags": ["pipeline", "danish"]
        },
+        {
+            "id": "spacy-wrap",
+            "title": "spaCy-wrap",
+            "slogan": "For Wrapping fine-tuned transformers in spaCy pipelines",
+            "description": "spaCy-wrap is a wrapper library for spaCy for including fine-tuned transformers from Huggingface in your spaCy pipeline allowing inclusion of existing models within existing workflows.",
+            "github": "kennethenevoldsen/spacy-wrap",
+            "pip": "spacy_wrap",
+            "code_example": [
+                "import spacy",
+                "import spacy_wrap",
+                "",
+                "nlp = spacy.blank('en')",
+                "config = {",
+                "   'doc_extension_trf_data': 'clf_trf_data',  # document extention for the forward pass",
+                "   'doc_extension_prediction': 'sentiment',  # document extention for the prediction",
+                "   'labels': ['negative', 'neutral', 'positive'],",
+                "   'model': {",
+                "       'name': 'cardiffnlp/twitter-roberta-base-sentiment',  # the model name or path of huggingface model",
+                "},",
+                "}",
+                "",
+                "transformer = nlp.add_pipe('classification_transformer', config=config)",
+                "transformer.model.initialize()",
+                "",
+                "doc = nlp('spaCy is a wonderful tool')",
+                "",
+                "print(doc._.clf_trf_data)",
+                "# TransformerData(wordpieces=...",
+                "print(doc._.sentiment)",
+                "# 'positive'",
+                "print(doc._.sentiment_prob)",
+                "# {'prob': array([0.004, 0.028, 0.969], dtype=float32), 'labels': ['negative', 'neutral', 'positive']}"
+            ],
+            "thumb": "https://raw.githubusercontent.com/KennethEnevoldsen/spacy-wrap/main/docs/_static/icon.png",
+            "author": "Kenneth Enevoldsen",
+            "author_links": {
+                "github": "KennethEnevoldsen",
+                "website": "https://www.kennethenevoldsen.com"
+            },
+            "category": ["pipeline", "models", "training"],
+            "tags": ["pipeline", "models", "transformers"]
+        },
        {
            "id": "textdescriptives",
            "title": "TextDescriptives",
--- a/website/src/templates/universe.js
+++ b/website/src/templates/universe.js
@ -8,10 +8,11 @@ import Title from '../components/title'
 import Grid from '../components/grid'
 import Button from '../components/button'
 import Icon from '../components/icon'
+import Tag from '../components/tag'
 import CodeBlock, { InlineCode } from '../components/code'
 import Aside from '../components/aside'
 import Sidebar from '../components/sidebar'
-import Section from '../components/section'
+import Section, { Hr } from '../components/section'
 import Main from '../components/main'
 import Footer from '../components/footer'
 import { H3, H5, Label, InlineList } from '../components/typography'
@ -121,6 +122,18 @@ const UniverseContent = ({ content = [], categories, theme, pageContext, mdxComp
                        </Grid>
                    </Section>
                )}
+                <section className="search-exclude">
+                    <H3>Found a mistake or something isn't working?</H3>
+                    <p>
+                        If you've come across a universe project that isn't working or is
+                        incompatible with the reported spaCy version, let us know by{' '}
+                        <Link to="https://github.com/explosion/spaCy/discussions/new">
+                            opening a discussion thread
+                        </Link>
+                        .
+                    </p>
+                </section>
+                <Hr />
                <section className="search-exclude">
                    <H3>Submit your project</H3>
                    <p>
@ -168,25 +181,41 @@ UniverseContent.propTypes = {
    mdxComponents: PropTypes.object,
 }

+const SpaCyVersion = ({ version }) => {
+    const versions = !Array.isArray(version) ? [version] : version
+    return versions.map((v, i) => (
+        <>
+            <Tag tooltip={`This project is compatible with spaCy v${v}`}>spaCy v{v}</Tag>{' '}
+        </>
+    ))
+}
+
 const Project = ({ data, components }) => (
    <>
        <Title title={data.title || data.id} teaser={data.slogan} image={data.thumb}>
-            {data.github && (
+            {(data.github || data.spacy_version) && (
                <p>
-                    <Link to={`https://github.com/${data.github}`} hidden>
-                        {[
-                            `release/${data.github}/all.svg?style=flat-square`,
-                            `license/${data.github}.svg?style=flat-square`,
-                            `stars/${data.github}.svg?style=social&label=Stars`,
-                        ].map((url, i) => (
-                            <img
-                                style={{ borderRadius: '1em', marginRight: '0.5rem' }}
-                                key={i}
-                                src={`https://img.shields.io/github/${url}`}
-                                alt=""
-                            />
-                        ))}
-                    </Link>
+                    {data.spacy_version && <SpaCyVersion version={data.spacy_version} />}
+                    {data.github && (
+                        <Link to={`https://github.com/${data.github}`} hidden>
+                            {[
+                                `release/${data.github}/all.svg?style=flat-square`,
+                                `license/${data.github}.svg?style=flat-square`,
+                                `stars/${data.github}.svg?style=social&label=Stars`,
+                            ].map((url, i) => (
+                                <img
+                                    style={{
+                                        borderRadius: '1em',
+                                        marginRight: '0.5rem',
+                                        verticalAlign: 'middle',
+                                    }}
+                                    key={i}
+                                    src={`https://img.shields.io/github/${url}`}
+                                    alt=""
+                                />
+                            ))}
+                        </Link>
+                    )}
                </p>
            )}
        </Title>
@ -335,6 +364,7 @@ const query = graphql`
                        url
                        github
                        description
+                        spacy_version
                        pip
                        cran
                        category