Merge branch 'master' into fix/windows-quoting

2025-08-07 13:44:55 +03:00 · 2022-10-18 15:21:21 +09:00 · 2022-10-18 15:21:21 +09:00 · fb1d671ed4
commit fb1d671ed4
parent 588cb99317 858565a567
22 changed files with 177 additions and 49 deletions
--- a/.github/ISSUE_TEMPLATE/01_bugs.md
+++ b/.github/ISSUE_TEMPLATE/01_bugs.md
@ -10,7 +10,7 @@ about: Use this template if you came across a bug or unexpected behaviour differ
 <!-- Include a code example or the steps that led to the problem. Please try to be as specific as possible. -->

 ## Your Environment
-<!-- Include details of your environment. If you're using spaCy 1.7+, you can also type `python -m spacy info --markdown` and copy-paste the result here.-->
+<!-- Include details of your environment. You can also type `python -m spacy info --markdown` and copy-paste the result here.-->
 * Operating System:
 * Python Version Used:
 * spaCy Version Used:
--- a/.github/azure-steps.yml
+++ b/.github/azure-steps.yml
@ -27,7 +27,7 @@ steps:

  - script: python -m mypy spacy
    displayName: 'Run mypy'
-    condition: ne(variables['python_version'], '3.10')
+    condition: ne(variables['python_version'], '3.6')

  - task: DeleteFiles@1
    inputs:
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -6,7 +6,7 @@ repos:
      language_version: python3.7
      additional_dependencies: ['click==8.0.4']
 -   repo: https://gitlab.com/pycqa/flake8
-    rev: 3.9.2
+    rev: 5.0.4
    hooks:
    - id: flake8
      args:
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@ -31,7 +31,7 @@ jobs:
        inputs:
          versionSpec: "3.7"
      - script: |
-          pip install flake8==3.9.2
+          pip install flake8==5.0.4
          python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
        displayName: "flake8"

--- a/requirements.txt
+++ b/requirements.txt
@ -15,7 +15,7 @@ pathy>=0.3.5
 numpy>=1.15.0
 requests>=2.13.0,<3.0.0
 tqdm>=4.38.0,<5.0.0
-pydantic>=1.7.4,!=1.8,!=1.8.1,<1.10.0
+pydantic>=1.7.4,!=1.8,!=1.8.1,<1.11.0
 jinja2
 langcodes>=3.2.0,<4.0.0
 # Official Python utilities
@ -28,9 +28,9 @@ cython>=0.25,<3.0
 pytest>=5.2.0,!=7.1.0
 pytest-timeout>=1.3.0,<2.0.0
 mock>=2.0.0,<3.0.0
-flake8>=3.8.0,<3.10.0
+flake8>=3.8.0,<6.0.0
 hypothesis>=3.27.0,<7.0.0
-mypy>=0.910,<0.970; platform_machine!='aarch64'
+mypy>=0.980,<0.990; platform_machine != "aarch64" and python_version >= "3.7"
 types-dataclasses>=0.1.3; python_version < "3.7"
 types-mock>=0.1.1
 types-setuptools>=57.0.0
--- a/setup.cfg
+++ b/setup.cfg
@ -56,7 +56,7 @@ install_requires =
    tqdm>=4.38.0,<5.0.0
    numpy>=1.15.0
    requests>=2.13.0,<3.0.0
-    pydantic>=1.7.4,!=1.8,!=1.8.1,<1.10.0
+    pydantic>=1.7.4,!=1.8,!=1.8.1,<1.11.0
    jinja2
    # Official Python utilities
    setuptools
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@ -573,3 +573,12 @@ def setup_gpu(use_gpu: int, silent=None) -> None:
        local_msg.info("Using CPU")
        if gpu_is_available():
            local_msg.info("To switch to GPU 0, use the option: --gpu-id 0")
+
+
+def _format_number(number: Union[int, float], ndigits: int = 2) -> str:
+    """Formats a number (float or int) rounding to `ndigits`, without truncating trailing 0s,
+    as happens with `round(number, ndigits)`"""
+    if isinstance(number, float):
+        return f"{number:.{ndigits}f}"
+    else:
+        return str(number)
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@ -9,7 +9,7 @@ import typer
 import math

 from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides
-from ._util import import_code, debug_cli
+from ._util import import_code, debug_cli, _format_number
 from ..training import Example, remove_bilu_prefix
 from ..training.initialize import get_sourced_components
 from ..schemas import ConfigSchemaTraining
@ -989,7 +989,8 @@ def _get_kl_divergence(p: Counter, q: Counter) -> float:
 def _format_span_row(span_data: List[Dict], labels: List[str]) -> List[Any]:
    """Compile into one list for easier reporting"""
    d = {
-        label: [label] + list(round(d[label], 2) for d in span_data) for label in labels
+        label: [label] + list(_format_number(d[label]) for d in span_data)
+        for label in labels
    }
    return list(d.values())

@ -1004,6 +1005,10 @@ def _get_span_characteristics(
        label: _gmean(l)
        for label, l in compiled_gold["spans_length"][spans_key].items()
    }
+    spans_per_type = {
+        label: len(spans)
+        for label, spans in compiled_gold["spans_per_type"][spans_key].items()
+    }
    min_lengths = [min(l) for l in compiled_gold["spans_length"][spans_key].values()]
    max_lengths = [max(l) for l in compiled_gold["spans_length"][spans_key].values()]

@ -1031,6 +1036,7 @@ def _get_span_characteristics(
    return {
        "sd": span_distinctiveness,
        "bd": sb_distinctiveness,
+        "spans_per_type": spans_per_type,
        "lengths": span_length,
        "min_length": min(min_lengths),
        "max_length": max(max_lengths),
@ -1045,12 +1051,15 @@ def _get_span_characteristics(

 def _print_span_characteristics(span_characteristics: Dict[str, Any]):
    """Print all span characteristics into a table"""
-    headers = ("Span Type", "Length", "SD", "BD")
+    headers = ("Span Type", "Length", "SD", "BD", "N")
+    # Wasabi has this at 30 by default, but we might have some long labels
+    max_col = max(30, max(len(label) for label in span_characteristics["labels"]))
    # Prepare table data with all span characteristics
    table_data = [
        span_characteristics["lengths"],
        span_characteristics["sd"],
        span_characteristics["bd"],
+        span_characteristics["spans_per_type"],
    ]
    table = _format_span_row(
        span_data=table_data, labels=span_characteristics["labels"]
@ -1061,8 +1070,18 @@ def _print_span_characteristics(span_characteristics: Dict[str, Any]):
        span_characteristics["avg_sd"],
        span_characteristics["avg_bd"],
    ]
-    footer = ["Wgt. Average"] + [str(round(f, 2)) for f in footer_data]
-    msg.table(table, footer=footer, header=headers, divider=True)
+
+    footer = (
+        ["Wgt. Average"] + ["{:.2f}".format(round(f, 2)) for f in footer_data] + ["-"]
+    )
+    msg.table(
+        table,
+        footer=footer,
+        header=headers,
+        divider=True,
+        aligns=["l"] + ["r"] * (len(footer_data) + 1),
+        max_col=max_col,
+    )


 def _get_spans_length_freq_dist(
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@ -299,8 +299,8 @@ def get_meta(
    }
    nlp = util.load_model_from_path(Path(model_path))
    meta.update(nlp.meta)
-    meta.update(existing_meta)
    meta["spacy_version"] = util.get_minor_version_range(about.__version__)
+    meta.update(existing_meta)
    meta["vectors"] = {
        "width": nlp.vocab.vectors_length,
        "vectors": len(nlp.vocab.vectors),
--- a/spacy/cli/project/dvc.py
+++ b/spacy/cli/project/dvc.py
@ -110,9 +110,6 @@ def update_dvc_config(
    config_commands = {cmd["name"]: cmd for cmd in config.get("commands", [])}

    # some flags that apply to every command
-    if verbose and quiet:
-        # don't allow contradictions
-        msg.fail("Can't set both --verbose and --quiet", exits=1)
    flags = []
    if verbose:
        flags.append("--verbose")
--- a/spacy/lang/ru/lemmatizer.py
+++ b/spacy/lang/ru/lemmatizer.py
@ -23,7 +23,7 @@ class RussianLemmatizer(Lemmatizer):
        overwrite: bool = False,
        scorer: Optional[Callable] = lemmatizer_score,
    ) -> None:
-        if mode == "pymorphy2":
+        if mode in {"pymorphy2", "pymorphy2_lookup"}:
            try:
                from pymorphy2 import MorphAnalyzer
            except ImportError:
--- a/spacy/lang/uk/lemmatizer.py
+++ b/spacy/lang/uk/lemmatizer.py
@ -18,7 +18,7 @@ class UkrainianLemmatizer(RussianLemmatizer):
        overwrite: bool = False,
        scorer: Optional[Callable] = lemmatizer_score,
    ) -> None:
-        if mode == "pymorphy2":
+        if mode in {"pymorphy2", "pymorphy2_lookup"}:
            try:
                from pymorphy2 import MorphAnalyzer
            except ImportError:
--- a/spacy/pipeline/edit_tree_lemmatizer.py
+++ b/spacy/pipeline/edit_tree_lemmatizer.py
@ -1,7 +1,6 @@
 from typing import cast, Any, Callable, Dict, Iterable, List, Optional
-from typing import Sequence, Tuple, Union
+from typing import Tuple
 from collections import Counter
-from copy import deepcopy
 from itertools import islice
 import numpy as np

@ -149,9 +148,7 @@ class EditTreeLemmatizer(TrainablePipe):
        if not any(len(doc) for doc in docs):
            # Handle cases where there are no tokens in any docs.
            n_labels = len(self.cfg["labels"])
-            guesses: List[Ints2d] = [
-                self.model.ops.alloc((0, n_labels), dtype="i") for doc in docs
-            ]
+            guesses: List[Ints2d] = [self.model.ops.alloc2i(0, n_labels) for _ in docs]
            assert len(guesses) == n_docs
            return guesses
        scores = self.model.predict(docs)
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@ -1,6 +1,5 @@
-import warnings
 from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable, Sequence
-from typing import cast
+import warnings
 from collections import defaultdict
 from pathlib import Path
 import srsly
@ -317,7 +316,7 @@ class EntityRuler(Pipe):
                    phrase_pattern["id"] = ent_id
                phrase_patterns.append(phrase_pattern)
            for entry in token_patterns + phrase_patterns:  # type: ignore[operator]
-                label = entry["label"]
+                label = entry["label"]  # type: ignore
                if "id" in entry:
                    ent_label = label
                    label = self._create_label(label, entry["id"])
--- a/spacy/pipeline/spancat.py
+++ b/spacy/pipeline/spancat.py
@ -133,6 +133,9 @@ def make_spancat(
    spans_key (str): Key of the doc.spans dict to save the spans under. During
        initialization and training, the component will look for spans on the
        reference document under the same key.
+    scorer (Optional[Callable]): The scoring method. Defaults to
+        Scorer.score_spans for the Doc.spans[spans_key] with overlapping
+        spans allowed.
    threshold (float): Minimum probability to consider a prediction positive.
        Spans with a positive prediction will be saved on the Doc. Defaults to
        0.5.
--- a/spacy/pipeline/textcat_multilabel.py
+++ b/spacy/pipeline/textcat_multilabel.py
@ -96,8 +96,8 @@ def make_multilabel_textcat(
    model: Model[List[Doc], List[Floats2d]],
    threshold: float,
    scorer: Optional[Callable],
-) -> "TextCategorizer":
-    """Create a TextCategorizer component. The text categorizer predicts categories
+) -> "MultiLabel_TextCategorizer":
+    """Create a MultiLabel_TextCategorizer component. The text categorizer predicts categories
    over a whole document. It can learn one or more labels, and the labels are considered
    to be non-mutually exclusive, which means that there can be zero or more labels
    per doc).
@ -105,6 +105,7 @@ def make_multilabel_textcat(
    model (Model[List[Doc], List[Floats2d]]): A model instance that predicts
        scores for each category.
    threshold (float): Cutoff to consider a prediction "positive".
+    scorer (Optional[Callable]): The scoring method.
    """
    return MultiLabel_TextCategorizer(
        nlp.vocab, model, name, threshold=threshold, scorer=scorer
@ -147,6 +148,7 @@ class MultiLabel_TextCategorizer(TextCategorizer):
        name (str): The component instance name, used to add entries to the
            losses during training.
        threshold (float): Cutoff to consider a prediction "positive".
+        scorer (Optional[Callable]): The scoring method.

        DOCS: https://spacy.io/api/textcategorizer#init
        """
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@ -181,12 +181,12 @@ class TokenPatternNumber(BaseModel):
    IS_SUBSET: Optional[List[StrictInt]] = Field(None, alias="is_subset")
    IS_SUPERSET: Optional[List[StrictInt]] = Field(None, alias="is_superset")
    INTERSECTS: Optional[List[StrictInt]] = Field(None, alias="intersects")
-    EQ: Union[StrictInt, StrictFloat] = Field(None, alias="==")
-    NEQ: Union[StrictInt, StrictFloat] = Field(None, alias="!=")
-    GEQ: Union[StrictInt, StrictFloat] = Field(None, alias=">=")
-    LEQ: Union[StrictInt, StrictFloat] = Field(None, alias="<=")
-    GT: Union[StrictInt, StrictFloat] = Field(None, alias=">")
-    LT: Union[StrictInt, StrictFloat] = Field(None, alias="<")
+    EQ: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="==")
+    NEQ: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="!=")
+    GEQ: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias=">=")
+    LEQ: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="<=")
+    GT: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias=">")
+    LT: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="<")

    class Config:
        extra = "forbid"
@ -430,7 +430,7 @@ class ProjectConfigAssetURL(BaseModel):
    # fmt: off
    dest: StrictStr = Field(..., title="Destination of downloaded asset")
    url: Optional[StrictStr] = Field(None, title="URL of asset")
-    checksum: str = Field(None, title="MD5 hash of file", regex=r"([a-fA-F\d]{32})")
+    checksum: Optional[str] = Field(None, title="MD5 hash of file", regex=r"([a-fA-F\d]{32})")
    description: StrictStr = Field("", title="Description of asset")
    # fmt: on

@ -438,7 +438,7 @@ class ProjectConfigAssetURL(BaseModel):
 class ProjectConfigAssetGit(BaseModel):
    # fmt: off
    git: ProjectConfigAssetGitItem = Field(..., title="Git repo information")
-    checksum: str = Field(None, title="MD5 hash of file", regex=r"([a-fA-F\d]{32})")
+    checksum: Optional[str] = Field(None, title="MD5 hash of file", regex=r"([a-fA-F\d]{32})")
    description: Optional[StrictStr] = Field(None, title="Description of asset")
    # fmt: on

@ -508,9 +508,9 @@ class DocJSONSchema(BaseModel):
        None, title="Indices of sentences' start and end indices"
    )
    text: StrictStr = Field(..., title="Document text")
-    spans: Dict[StrictStr, List[Dict[StrictStr, Union[StrictStr, StrictInt]]]] = Field(
-        None, title="Span information - end/start indices, label, KB ID"
-    )
+    spans: Optional[
+        Dict[StrictStr, List[Dict[StrictStr, Union[StrictStr, StrictInt]]]]
+    ] = Field(None, title="Span information - end/start indices, label, KB ID")
    tokens: List[Dict[StrictStr, Union[StrictStr, StrictInt]]] = Field(
        ..., title="Token information - ID, start, annotations"
    )
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@ -343,6 +343,14 @@ def ru_lemmatizer():
    return get_lang_class("ru")().add_pipe("lemmatizer")


+@pytest.fixture
+def ru_lookup_lemmatizer():
+    pytest.importorskip("pymorphy2")
+    return get_lang_class("ru")().add_pipe(
+        "lemmatizer", config={"mode": "pymorphy2_lookup"}
+    )
+
+
@pytest.fixture(scope="session")
 def sa_tokenizer():
    return get_lang_class("sa")().tokenizer
@ -422,6 +430,15 @@ def uk_lemmatizer():
    return get_lang_class("uk")().add_pipe("lemmatizer")


+@pytest.fixture
+def uk_lookup_lemmatizer():
+    pytest.importorskip("pymorphy2")
+    pytest.importorskip("pymorphy2_dicts_uk")
+    return get_lang_class("uk")().add_pipe(
+        "lemmatizer", config={"mode": "pymorphy2_lookup"}
+    )
+
+
@pytest.fixture(scope="session")
 def ur_tokenizer():
    return get_lang_class("ur")().tokenizer
--- a/spacy/tests/lang/ru/test_lemmatizer.py
+++ b/spacy/tests/lang/ru/test_lemmatizer.py
@ -78,3 +78,17 @@ def test_ru_lemmatizer_punct(ru_lemmatizer):
    assert ru_lemmatizer.pymorphy2_lemmatize(doc[0]) == ['"']
    doc = Doc(ru_lemmatizer.vocab, words=["»"], pos=["PUNCT"])
    assert ru_lemmatizer.pymorphy2_lemmatize(doc[0]) == ['"']
+
+
+def test_ru_doc_lookup_lemmatization(ru_lookup_lemmatizer):
+    words = ["мама", "мыла", "раму"]
+    pos = ["NOUN", "VERB", "NOUN"]
+    morphs = [
+        "Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing",
+        "Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act",
+        "Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing",
+    ]
+    doc = Doc(ru_lookup_lemmatizer.vocab, words=words, pos=pos, morphs=morphs)
+    doc = ru_lookup_lemmatizer(doc)
+    lemmas = [token.lemma_ for token in doc]
+    assert lemmas == ["мама", "мыла", "раму"]
--- a/spacy/tests/lang/uk/test_lemmatizer.py
+++ b/spacy/tests/lang/uk/test_lemmatizer.py
@ -9,3 +9,11 @@ def test_uk_lemmatizer(uk_lemmatizer):
    """Check that the default uk lemmatizer runs."""
    doc = Doc(uk_lemmatizer.vocab, words=["a", "b", "c"])
    uk_lemmatizer(doc)
+    assert [token.lemma for token in doc]
+
+
+def test_uk_lookup_lemmatizer(uk_lookup_lemmatizer):
+    """Check that the lookup uk lemmatizer runs."""
+    doc = Doc(uk_lookup_lemmatizer.vocab, words=["a", "b", "c"])
+    uk_lookup_lemmatizer(doc)
+    assert [token.lemma for token in doc]
--- a/website/docs/api/language.md
+++ b/website/docs/api/language.md
@ -164,6 +164,9 @@ examples, see the
 Apply the pipeline to some text. The text can span multiple sentences, and can
 contain arbitrary whitespace. Alignment into the original string is preserved.

+Instead of text, a `Doc` can be passed as input, in which case tokenization is
+skipped, but the rest of the pipeline is run.
+
 > #### Example
 >
 > ```python
@ -173,7 +176,7 @@ contain arbitrary whitespace. Alignment into the original string is preserved.

 | Name            | Description                                                                                                                                    |
 | --------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
-| `text`          | The text to be processed. ~~str~~                                                                                                              |
+| `text`          | The text to be processed, or a Doc. ~~Union[str, Doc]~~                                                                                        |
 | _keyword-only_  |                                                                                                                                                |
 | `disable`       | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). ~~List[str]~~                                                |
 | `component_cfg` | Optional dictionary of keyword arguments for components, keyed by component names. Defaults to `None`. ~~Optional[Dict[str, Dict[str, Any]]]~~ |
@ -184,6 +187,9 @@ contain arbitrary whitespace. Alignment into the original string is preserved.
 Process texts as a stream, and yield `Doc` objects in order. This is usually
 more efficient than processing texts one-by-one.

+Instead of text, a `Doc` object can be passed as input. In this case
+tokenization is skipped but the rest of the pipeline is run.
+
 > #### Example
 >
 > ```python
@ -194,7 +200,7 @@ more efficient than processing texts one-by-one.

 | Name                                       | Description                                                                                                                                                         |
 | ------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `texts`                                    | A sequence of strings. ~~Iterable[str]~~                                                                                                                            |
+| `texts`                                    | A sequence of strings (or `Doc` objects). ~~Iterable[Union[str, Doc]]~~                                                                                             |
 | _keyword-only_                             |                                                                                                                                                                     |
 | `as_tuples`                                | If set to `True`, inputs should be a sequence of `(text, context)` tuples. Output will then be a sequence of `(doc, context)` tuples. Defaults to `False`. ~~bool~~ |
 | `batch_size`                               | The number of texts to buffer. ~~Optional[int]~~                                                                                                                    |
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@ -1,5 +1,62 @@
 {
    "resources": [
+        {
+            "id": "Zshot",
+            "title": "Zshot",
+            "slogan": "Zero and Few shot named entity & relationships recognition",
+            "github": "ibm/zshot",
+            "pip": "zshot",
+            "code_example": [
+                "import spacy",
+                "from zshot import PipelineConfig, displacy",
+                "from zshot.linker import LinkerRegen",
+                "from zshot.mentions_extractor import MentionsExtractorSpacy",
+                "from zshot.utils.data_models import Entity",
+                "",
+                "nlp = spacy.load('en_core_web_sm')",
+                "# zero shot definition of entities",
+                "nlp_config = PipelineConfig(",
+                "    mentions_extractor=MentionsExtractorSpacy(),",
+                "    linker=LinkerRegen(),",
+                "    entities=[",
+                "        Entity(name='Paris',",
+                "               description='Paris is located in northern central France, in a north-bending arc of the river Seine'),",
+                "        Entity(name='IBM',",
+                "               description='International Business Machines Corporation (IBM) is an American multinational technology corporation headquartered in Armonk, New York'),",
+                "        Entity(name='New York', description='New York is a city in U.S. state'),",
+                "        Entity(name='Florida', description='southeasternmost U.S. state'),",
+                "        Entity(name='American',",
+                "              description='American, something of, from, or related to the United States of America, commonly known as the United States or America'),",
+                "        Entity(name='Chemical formula',",
+                "               description='In chemistry, a chemical formula is a way of presenting information about the chemical proportions of atoms that constitute a particular chemical compound or molecul'),",
+                "        Entity(name='Acetamide',",
+                "               description='Acetamide (systematic name: ethanamide) is an organic compound with the formula CH3CONH2. It is the simplest amide derived from acetic acid. It finds some use as a plasticizer and as an industrial solvent.'),",
+                "        Entity(name='Armonk',",
+                "               description='Armonk is a hamlet and census-designated place (CDP) in the town of North Castle, located in Westchester County, New York, United States.'),",
+                "        Entity(name='Acetic Acid',",
+                "               description='Acetic acid, systematically named ethanoic acid, is an acidic, colourless liquid and organic compound with the chemical formula CH3COOH'),",
+                "        Entity(name='Industrial solvent',",
+                "               description='Acetamide (systematic name: ethanamide) is an organic compound with the formula CH3CONH2. It is the simplest amide derived from acetic acid. It finds some use as a plasticizer and as an industrial solvent.'),",
+                "    ]",
+                ")",
+                "nlp.add_pipe('zshot', config=nlp_config, last=True)",
+                "",
+                "text = 'International Business Machines Corporation (IBM) is an American multinational technology corporation' \\",
+                "        ' headquartered in Armonk, New York, with operations in over 171 countries.'",
+                "",
+                "doc = nlp(text)",
+                "displacy.serve(doc, style='ent')"
+            ],
+            "thumb": "https://ibm.github.io/zshot/img/graph.png",
+            "url": "https://ibm.github.io/zshot/",
+            "author": "IBM Research",
+            "author_links": {
+                "github": "ibm",
+                "twitter": "IBMResearch",
+                "website": "https://research.ibm.com/labs/ireland/"
+            },
+            "category": ["scientific", "models", "research"]
+        },
        {
            "id": "concepcy",
            "title": "concepCy",
@ -2403,20 +2460,20 @@
                "import spacy",
                "from spacy_wordnet.wordnet_annotator import WordnetAnnotator ",
                "",
-                "# Load an spacy model (supported models are \"es\" and \"en\") ",
-                "nlp = spacy.load('en')",
-                "# Spacy 3.x",
-                "nlp.add_pipe(\"spacy_wordnet\", after='tagger', config={'lang': nlp.lang})",
-                "# Spacy 2.x",
+                "# Load a spaCy model (supported languages are \"es\" and \"en\") ",
+                "nlp = spacy.load('en_core_web_sm')",
+                "# spaCy 3.x",
+                "nlp.add_pipe(\"spacy_wordnet\", after='tagger')",
+                "# spaCy 2.x",
                "# nlp.add_pipe(WordnetAnnotator(nlp.lang), after='tagger')",
                "token = nlp('prices')[0]",
                "",
-                "# wordnet object link spacy token with nltk wordnet interface by giving acces to",
+                "# WordNet object links spaCy token with NLTK WordNet interface by giving access to",
                "# synsets and lemmas ",
                "token._.wordnet.synsets()",
                "token._.wordnet.lemmas()",
                "",
-                "# And automatically tags with wordnet domains",
+                "# And automatically add info about WordNet domains",
                "token._.wordnet.wordnet_domains()"
            ],
            "author": "recognai",