Simplify pipe analysis

- remove unused code
- don't print by default
- integrate attrs info into analysis output
This commit is contained in:
Ines Montani 2020-08-01 13:40:06 +02:00
parent 98c6a85c8b
commit b40f44419b
6 changed files with 171 additions and 170 deletions

View File

@ -63,8 +63,6 @@ class Warnings:
"have the spacy-lookups-data package installed.") "have the spacy-lookups-data package installed.")
W024 = ("Entity '{entity}' - Alias '{alias}' combination already exists in " W024 = ("Entity '{entity}' - Alias '{alias}' combination already exists in "
"the Knowledge Base.") "the Knowledge Base.")
W025 = ("'{name}' requires '{attr}' to be assigned, but none of the "
"previous components in the pipeline declare that they assign it.")
W026 = ("Unable to set all sentence boundaries from dependency parses.") W026 = ("Unable to set all sentence boundaries from dependency parses.")
W027 = ("Found a large training file of {size} bytes. Note that it may " W027 = ("Found a large training file of {size} bytes. Note that it may "
"be more efficient to split your training data into multiple " "be more efficient to split your training data into multiple "

View File

@ -18,7 +18,7 @@ from timeit import default_timer as timer
from .tokens.underscore import Underscore from .tokens.underscore import Underscore
from .vocab import Vocab, create_vocab from .vocab import Vocab, create_vocab
from .pipe_analysis import validate_attrs, print_summary from .pipe_analysis import validate_attrs, analyze_pipes, print_pipe_analysis
from .gold import Example from .gold import Example
from .scorer import Scorer from .scorer import Scorer
from .util import create_default_optimizer, registry from .util import create_default_optimizer, registry
@ -524,19 +524,20 @@ class Language:
self, self,
*, *,
keys: List[str] = ["assigns", "requires", "scores", "retokenizes"], keys: List[str] = ["assigns", "requires", "scores", "retokenizes"],
pretty: bool = True, pretty: bool = False,
no_print: bool = False,
) -> Optional[Dict[str, Any]]: ) -> Optional[Dict[str, Any]]:
"""Analyze the current pipeline components, print a summary of what """Analyze the current pipeline components, print a summary of what
they assign or require and check that all requirements are met. they assign or require and check that all requirements are met.
keys (List[str]): The meta values to display in the table. Corresponds keys (List[str]): The meta values to display in the table. Corresponds
to values in FactoryMeta, defined by @Language.factory decorator. to values in FactoryMeta, defined by @Language.factory decorator.
pretty (bool): Pretty-print the results with colors and icons. pretty (bool): Pretty-print the results.
no_print (bool): Don't print anything and return structured dict instead. RETURNS (dict): The data.
RETURNS (dict): The data, if no_print is set to True.
""" """
return print_summary(self, keys=keys, pretty=pretty, no_print=no_print) analysis = analyze_pipes(self, keys=keys)
if pretty:
print_pipe_analysis(analysis, keys=keys)
return analysis
def get_pipe(self, name: str) -> Callable[[Doc], Doc]: def get_pipe(self, name: str) -> Callable[[Doc], Doc]:
"""Get a pipeline component for a given component name. """Get a pipeline component for a given component name.

View File

@ -1,9 +1,8 @@
from typing import List, Dict, Iterable, Optional, Union, TYPE_CHECKING from typing import List, Dict, Iterable, Optional, Union, TYPE_CHECKING
from wasabi import Printer from wasabi import msg
import warnings
from .tokens import Doc, Token, Span from .tokens import Doc, Token, Span
from .errors import Errors, Warnings from .errors import Errors
from .util import dot_to_dict from .util import dot_to_dict
if TYPE_CHECKING: if TYPE_CHECKING:
@ -11,35 +10,7 @@ if TYPE_CHECKING:
from .language import Language # noqa: F401 from .language import Language # noqa: F401
def analyze_pipes( DEFAULT_KEYS = ["requires", "assigns", "scores", "retokenizes"]
nlp: "Language", name: str, index: int, warn: bool = True
) -> List[str]:
"""Analyze a pipeline component with respect to its position in the current
pipeline and the other components. Will check whether requirements are
fulfilled (e.g. if previous components assign the attributes).
nlp (Language): The current nlp object.
name (str): The name of the pipeline component to analyze.
index (int): The index of the component in the pipeline.
warn (bool): Show user warning if problem is found.
RETURNS (List[str]): The problems found for the given pipeline component.
"""
assert nlp.pipeline[index][0] == name
prev_pipes = nlp.pipeline[:index]
meta = nlp.get_pipe_meta(name)
requires = {annot: False for annot in meta.requires}
if requires:
for prev_name, prev_pipe in prev_pipes:
prev_meta = nlp.get_pipe_meta(prev_name)
for annot in prev_meta.assigns:
requires[annot] = True
problems = []
for annot, fulfilled in requires.items():
if not fulfilled:
problems.append(annot)
if warn:
warnings.warn(Warnings.W025.format(name=name, attr=annot))
return problems
def validate_attrs(values: Iterable[str]) -> Iterable[str]: def validate_attrs(values: Iterable[str]) -> Iterable[str]:
@ -88,97 +59,77 @@ def validate_attrs(values: Iterable[str]) -> Iterable[str]:
return values return values
def _get_feature_for_attr(nlp: "Language", attr: str, feature: str) -> List[str]: def get_attr_info(nlp: "Language", attr: str) -> Dict[str, List[str]]:
assert feature in ["assigns", "requires"] """Check which components in the pipeline assign or require an attribute.
result = []
nlp (Language): The current nlp object.
attr (str): The attribute, e.g. "doc.tensor".
RETURNS (Dict[str, List[str]]): A dict keyed by "assigns" and "requires",
mapped to a list of component names.
"""
result = {"assigns": [], "requires": []}
for pipe_name in nlp.pipe_names: for pipe_name in nlp.pipe_names:
meta = nlp.get_pipe_meta(pipe_name) meta = nlp.get_pipe_meta(pipe_name)
pipe_assigns = getattr(meta, feature, []) if attr in meta.assigns:
if attr in pipe_assigns: result["assigns"].append(pipe_name)
result.append(pipe_name) if attr in meta.requires:
result["requires"].append(pipe_name)
return result return result
def get_assigns_for_attr(nlp: "Language", attr: str) -> List[str]: def analyze_pipes(
"""Get all pipeline components that assign an attr, e.g. "doc.tensor". nlp: "Language", *, keys: List[str] = DEFAULT_KEYS,
) -> Dict[str, Union[List[str], Dict[str, List[str]]]]:
pipeline (Language): The current nlp object.
attr (str): The attribute to check.
RETURNS (List[str]): Names of components that require the attr.
"""
return _get_feature_for_attr(nlp, attr, "assigns")
def get_requires_for_attr(nlp: "Language", attr: str) -> List[str]:
"""Get all pipeline components that require an attr, e.g. "doc.tensor".
pipeline (Language): The current nlp object.
attr (str): The attribute to check.
RETURNS (List[str]): Names of components that require the attr.
"""
return _get_feature_for_attr(nlp, attr, "requires")
def print_summary(
nlp: "Language",
*,
keys: List[str] = ["requires", "assigns", "scores", "retokenizes"],
pretty: bool = True,
no_print: bool = False,
) -> Optional[Dict[str, Union[List[str], Dict[str, List[str]]]]]:
"""Print a formatted summary for the current nlp object's pipeline. Shows """Print a formatted summary for the current nlp object's pipeline. Shows
a table with the pipeline components and why they assign and require, as a table with the pipeline components and why they assign and require, as
well as any problems if available. well as any problems if available.
nlp (Language): The nlp object. nlp (Language): The nlp object.
keys (List[str]): The meta keys to show in the table. keys (List[str]): The meta keys to show in the table.
pretty (bool): Pretty-print the results (color etc). RETURNS (dict): A dict with "summary" and "problems".
no_print (bool): Don't print anything, just return the data.
RETURNS (dict): A dict with "overview" and "problems".
""" """
msg = Printer(pretty=pretty, no_print=no_print) result = {"summary": {}, "problems": {}}
overview = {} all_attrs = set()
problems = {}
for i, name in enumerate(nlp.pipe_names): for i, name in enumerate(nlp.pipe_names):
meta = nlp.get_pipe_meta(name) meta = nlp.get_pipe_meta(name)
overview[name] = {"i": i, "name": name} all_attrs.update(meta.assigns)
for key in keys: all_attrs.update(meta.requires)
overview[name][key] = getattr(meta, key, None) result["summary"][name] = {key: getattr(meta, key, None) for key in keys}
problems[name] = analyze_pipes(nlp, name, i, warn=False) prev_pipes = nlp.pipeline[:i]
requires = {annot: False for annot in meta.requires}
if requires:
for prev_name, prev_pipe in prev_pipes:
prev_meta = nlp.get_pipe_meta(prev_name)
for annot in prev_meta.assigns:
requires[annot] = True
result["problems"][name] = []
for annot, fulfilled in requires.items():
if not fulfilled:
result["problems"][name].append(annot)
result["attrs"] = {attr: get_attr_info(nlp, attr) for attr in all_attrs}
return result
def print_pipe_analysis(
analysis: Dict[str, Union[List[str], Dict[str, List[str]]]],
*,
keys: List[str] = DEFAULT_KEYS,
) -> Optional[Dict[str, Union[List[str], Dict[str, List[str]]]]]:
"""Print a formatted version of the pipe analysis produced by analyze_pipes.
analysis (Dict[str, Union[List[str], Dict[str, List[str]]]]): The analysis.
keys (List[str]): The meta keys to show in the table.
"""
msg.divider("Pipeline Overview") msg.divider("Pipeline Overview")
header = ["#", "Component", *[key.capitalize() for key in keys]] header = ["#", "Component", *[key.capitalize() for key in keys]]
body = [[info for info in entry.values()] for entry in overview.values()] summary = analysis["summary"].items()
body = [[i, n, *[v for v in m.values()]] for i, (n, m) in enumerate(summary)]
msg.table(body, header=header, divider=True, multiline=True) msg.table(body, header=header, divider=True, multiline=True)
n_problems = sum(len(p) for p in problems.values()) n_problems = sum(len(p) for p in analysis["problems"].values())
if any(p for p in problems.values()): if any(p for p in analysis["problems"].values()):
msg.divider(f"Problems ({n_problems})") msg.divider(f"Problems ({n_problems})")
for name, problem in problems.items(): for name, problem in analysis["problems"].items():
if problem: if problem:
msg.warn(f"'{name}' requirements not met: {', '.join(problem)}") msg.warn(f"'{name}' requirements not met: {', '.join(problem)}")
else: else:
msg.good("No problems found.") msg.good("No problems found.")
if no_print:
return {"overview": overview, "problems": problems}
def count_pipeline_interdependencies(nlp: "Language") -> List[int]:
"""Count how many subsequent components require an annotation set by each
component in the pipeline.
nlp (Language): The current nlp object.
RETURNS (List[int]): The interdependency counts.
"""
pipe_assigns = []
pipe_requires = []
for name in nlp.pipe_names:
meta = nlp.get_pipe_meta(name)
pipe_assigns.append(set(meta.assigns))
pipe_requires.append(set(meta.requires))
counts = []
for i, assigns in enumerate(pipe_assigns):
count = 0
for requires in pipe_requires[i + 1 :]:
if assigns.intersection(requires):
count += 1
counts.append(count)
return counts

View File

@ -1,6 +1,5 @@
from spacy.language import Language from spacy.language import Language
from spacy.pipe_analysis import get_assigns_for_attr, get_requires_for_attr from spacy.pipe_analysis import get_attr_info, validate_attrs
from spacy.pipe_analysis import validate_attrs, count_pipeline_interdependencies
from mock import Mock from mock import Mock
import pytest import pytest
@ -29,10 +28,10 @@ def test_component_decorator_assigns():
nlp = Language() nlp = Language()
nlp.add_pipe("c1") nlp.add_pipe("c1")
nlp.add_pipe("c2") nlp.add_pipe("c2")
problems = nlp.analyze_pipes(no_print=True)["problems"] problems = nlp.analyze_pipes()["problems"]
assert problems["c2"] == ["token.pos"] assert problems["c2"] == ["token.pos"]
nlp.add_pipe("c3") nlp.add_pipe("c3")
assert get_assigns_for_attr(nlp, "doc.tensor") == ["c1", "c2"] assert get_attr_info(nlp, "doc.tensor")["assigns"] == ["c1", "c2"]
nlp.add_pipe("c1", name="c4") nlp.add_pipe("c1", name="c4")
test_component4_meta = nlp.get_pipe_meta("c1") test_component4_meta = nlp.get_pipe_meta("c1")
assert test_component4_meta.factory == "c1" assert test_component4_meta.factory == "c1"
@ -40,8 +39,8 @@ def test_component_decorator_assigns():
assert not Language.has_factory("c4") assert not Language.has_factory("c4")
assert nlp.pipe_factories["c1"] == "c1" assert nlp.pipe_factories["c1"] == "c1"
assert nlp.pipe_factories["c4"] == "c1" assert nlp.pipe_factories["c4"] == "c1"
assert get_assigns_for_attr(nlp, "doc.tensor") == ["c1", "c2", "c4"] assert get_attr_info(nlp, "doc.tensor")["assigns"] == ["c1", "c2", "c4"]
assert get_requires_for_attr(nlp, "token.pos") == ["c2"] assert get_attr_info(nlp, "token.pos")["requires"] == ["c2"]
assert nlp("hello world") assert nlp("hello world")
@ -108,26 +107,8 @@ def test_analysis_validate_attrs_remove_pipe():
nlp = Language() nlp = Language()
nlp.add_pipe("pipe_analysis_c6") nlp.add_pipe("pipe_analysis_c6")
nlp.add_pipe("pipe_analysis_c7") nlp.add_pipe("pipe_analysis_c7")
problems = nlp.analyze_pipes(no_print=True)["problems"] problems = nlp.analyze_pipes()["problems"]
assert problems["pipe_analysis_c7"] == ["token.pos"] assert problems["pipe_analysis_c7"] == ["token.pos"]
nlp.remove_pipe("pipe_analysis_c7") nlp.remove_pipe("pipe_analysis_c7")
problems = nlp.analyze_pipes(no_print=True)["problems"] problems = nlp.analyze_pipes()["problems"]
assert all(p == [] for p in problems.values()) assert all(p == [] for p in problems.values())
def test_pipe_interdependencies():
prefix = "test_pipe_interdependencies"
@Language.component(f"{prefix}.fancifier", assigns=("doc._.fancy",))
def fancifier(doc):
return doc
@Language.component(f"{prefix}.needer", requires=("doc._.fancy",))
def needer(doc):
return doc
nlp = Language()
nlp.add_pipe(f"{prefix}.fancifier")
nlp.add_pipe(f"{prefix}.needer")
counts = count_pipeline_interdependencies(nlp)
assert counts == [1, 0]

View File

@ -98,10 +98,10 @@ decorator. For more details and examples, see the
| ----------------------- | -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ----------------------- | -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `name` | str | The name of the component factory. | | `name` | str | The name of the component factory. |
| _keyword-only_ | | | | _keyword-only_ | | |
| `assigns` | `Iterable[str]` | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. | | `assigns` | `Iterable[str]` | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis).. |
| `requires` | `Iterable[str]` | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. | | `requires` | `Iterable[str]` | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis). |
| `retokenizes` | bool | Whether the component changes tokenization. Used for pipeline analysis. | | `retokenizes` | bool | Whether the component changes tokenization. Used for [pipe analysis](/usage/processing-pipelines#analysis). |
| `scores` | `Iterable[str]` | All scores set by the components if it's trainable, e.g. `["ents_f", "ents_r", "ents_p"]`. | | `scores` | `Iterable[str]` | All scores set by the components if it's trainable, e.g. `["ents_f", "ents_r", "ents_p"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis). |
| `default_score_weights` | `Dict[str, float]` | The scores to report during training, and their default weight towards the final score used to select the best model. Weights should sum to `1.0` per component and will be combined and normalized for the whole pipeline. | | `default_score_weights` | `Dict[str, float]` | The scores to report during training, and their default weight towards the final score used to select the best model. Weights should sum to `1.0` per component and will be combined and normalized for the whole pipeline. |
| `func` | `Optional[Callable]` | Optional function if not used a a decorator. | | `func` | `Optional[Callable]` | Optional function if not used a a decorator. |
@ -146,10 +146,10 @@ examples, see the
| `name` | str | The name of the component factory. | | `name` | str | The name of the component factory. |
| _keyword-only_ | | | | _keyword-only_ | | |
| `default_config` | `Dict[str, any]` | The default config, describing the default values of the factory arguments. | | `default_config` | `Dict[str, any]` | The default config, describing the default values of the factory arguments. |
| `assigns` | `Iterable[str]` | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. | | `assigns` | `Iterable[str]` | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis). |
| `requires` | `Iterable[str]` | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. | | `requires` | `Iterable[str]` | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis). |
| `retokenizes` | bool | Whether the component changes tokenization. Used for pipeline analysis. | | `retokenizes` | bool | Whether the component changes tokenization. Used for [pipe analysis](/usage/processing-pipelines#analysis). |
| `scores` | `Iterable[str]` | All scores set by the components if it's trainable, e.g. `["ents_f", "ents_r", "ents_p"]`. | | `scores` | `Iterable[str]` | All scores set by the components if it's trainable, e.g. `["ents_f", "ents_r", "ents_p"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis). |
| `default_score_weights` | `Dict[str, float]` | The scores to report during training, and their default weight towards the final score used to select the best model. Weights should sum to `1.0` per component and will be combined and normalized for the whole pipeline. | | `default_score_weights` | `Dict[str, float]` | The scores to report during training, and their default weight towards the final score used to select the best model. Weights should sum to `1.0` per component and will be combined and normalized for the whole pipeline. |
| `func` | `Optional[Callable]` | Optional function if not used a a decorator. | | `func` | `Optional[Callable]` | Optional function if not used a a decorator. |
@ -622,12 +622,45 @@ doesn't, the pipeline analysis won't catch that.
> nlp = spacy.blank("en") > nlp = spacy.blank("en")
> nlp.add_pipe("tagger") > nlp.add_pipe("tagger")
> nlp.add_pipe("entity_linker") > nlp.add_pipe("entity_linker")
> nlp.analyze_pipes() > analysis = nlp.analyze_pipes()
> ``` > ```
<Accordion title="Example output" spaced> <Accordion title="Example output" spaced>
```json
### Structured
{
"summary": {
"tagger": {
"assigns": ["token.tag"],
"requires": [],
"scores": ["tag_acc", "pos_acc", "lemma_acc"],
"retokenizes": false
},
"entity_linker": {
"assigns": ["token.ent_kb_id"],
"requires": ["doc.ents", "doc.sents", "token.ent_iob", "token.ent_type"],
"scores": [],
"retokenizes": false
}
},
"problems": {
"tagger": [],
"entity_linker": ["doc.ents", "doc.sents", "token.ent_iob", "token.ent_type"]
},
"attrs": {
"token.ent_iob": { "assigns": [], "requires": ["entity_linker"] },
"doc.ents": { "assigns": [], "requires": ["entity_linker"] },
"token.ent_kb_id": { "assigns": ["entity_linker"], "requires": [] },
"doc.sents": { "assigns": [], "requires": ["entity_linker"] },
"token.tag": { "assigns": ["tagger"], "requires": [] },
"token.ent_type": { "assigns": [], "requires": ["entity_linker"] }
}
}
``` ```
```
### Pretty
============================= Pipeline Overview ============================= ============================= Pipeline Overview =============================
# Component Assigns Requires Scores Retokenizes # Component Assigns Requires Scores Retokenizes
@ -649,13 +682,12 @@ token.ent_iob, token.ent_type
</Accordion> </Accordion>
| Name | Type | Description | | Name | Type | Description |
| -------------- | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | -------------- | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| _keyword-only_ | | | | _keyword-only_ | | |
| `keys` | `List[str]` | The values to display in the table. Corresponds to attributes of the [`FactoryMeta`](/api/language#factorymeta). Defaults to `["assigns", "requires", "scores", "retokenizes"]`. | | `keys` | `List[str]` | The values to display in the table. Corresponds to attributes of the [`FactoryMeta`](/api/language#factorymeta). Defaults to `["assigns", "requires", "scores", "retokenizes"]`. |
| `pretty` | bool | Pretty-print the results with colors and icons. Defaults to `True`. | | `pretty` | bool | Pretty-print the results as a table. Defaults to `False`. |
| `no_print` | bool | Don't print anything and return a structured dict instead. Defaults to `False`. | | **RETURNS** | dict | Dictionary containing the pipe analysis, keyed by `"summary"` (component meta by pipe), `"problems"` (attribute names by pipe) and `"attrs"` (pipes that assign and require an attribute, keyed by attribute). |
| **RETURNS** | dict | Optional dict, if `no_print` is set to `True`. |
## Language.meta {#meta tag="property"} ## Language.meta {#meta tag="property"}
@ -892,8 +924,8 @@ instance and factory instance.
| ----------------------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ----------------------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `factory` | str | The name of the registered component factory. | | `factory` | str | The name of the registered component factory. |
| `default_config` | `Dict[str, Any]` | The default config, describing the default values of the factory arguments. | | `default_config` | `Dict[str, Any]` | The default config, describing the default values of the factory arguments. |
| `assigns` | `Iterable[str]` | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. | | `assigns` | `Iterable[str]` | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis). |
| `requires` | `Iterable[str]` | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis.  | | `requires` | `Iterable[str]` | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis).  |
| `retokenizes` | bool | Whether the component changes tokenization. Used for pipeline analysis.  | | `retokenizes` | bool | Whether the component changes tokenization. Used for [pipe analysis](/usage/processing-pipelines#analysis).  |
| `scores` | `Iterable[str]` | All scores set by the components if it's trainable, e.g. `["ents_f", "ents_r", "ents_p"]`. | | `scores` | `Iterable[str]` | All scores set by the components if it's trainable, e.g. `["ents_f", "ents_r", "ents_p"]`. Used for [pipe analysis](/usage/processing-pipelines#analysis). |
| `default_score_weights` | `Dict[str, float]` | The scores to report during training, and their default weight towards the final score used to select the best model. Weights should sum to `1.0` per component and will be combined and normalized for the whole pipeline. | | `default_score_weights` | `Dict[str, float]` | The scores to report during training, and their default weight towards the final score used to select the best model. Weights should sum to `1.0` per component and will be combined and normalized for the whole pipeline. |

View File

@ -319,17 +319,61 @@ attributes they set on the [`Doc`](/api/doc) and [`Token`](/api/token), whether
they retokenize the `Doc` and which scores they produce during training. It will they retokenize the `Doc` and which scores they produce during training. It will
also show warnings if components require values that aren't set by previous also show warnings if components require values that aren't set by previous
component for instance, if the entity linker is used but no component that component for instance, if the entity linker is used but no component that
runs before it sets named entities. runs before it sets named entities. Setting `pretty=True` will pretty-print a
table instead of only returning the structured data.
> #### ✏️ Things to try
>
> 1. Add the components `"ner"` and `"sentencizer"` _before_ the entity linker.
> The analysis should now show no problems, because requirements are met.
```python ```python
### {executable="true"}
import spacy
nlp = spacy.blank("en") nlp = spacy.blank("en")
nlp.add_pipe("tagger") nlp.add_pipe("tagger")
nlp.add_pipe("entity_linker") # this is a problem, because it needs entities # This is a problem because it needs entities and sentence boundaries
nlp.analyze_pipes() nlp.add_pipe("entity_linker")
analysis = nlp.analyze_pipes(pretty=True)
```
<Accordion title="Example output">
```json
### Structured
{
"summary": {
"tagger": {
"assigns": ["token.tag"],
"requires": [],
"scores": ["tag_acc", "pos_acc", "lemma_acc"],
"retokenizes": false
},
"entity_linker": {
"assigns": ["token.ent_kb_id"],
"requires": ["doc.ents", "doc.sents", "token.ent_iob", "token.ent_type"],
"scores": [],
"retokenizes": false
}
},
"problems": {
"tagger": [],
"entity_linker": ["doc.ents", "doc.sents", "token.ent_iob", "token.ent_type"]
},
"attrs": {
"token.ent_iob": { "assigns": [], "requires": ["entity_linker"] },
"doc.ents": { "assigns": [], "requires": ["entity_linker"] },
"token.ent_kb_id": { "assigns": ["entity_linker"], "requires": [] },
"doc.sents": { "assigns": [], "requires": ["entity_linker"] },
"token.tag": { "assigns": ["tagger"], "requires": [] },
"token.ent_type": { "assigns": [], "requires": ["entity_linker"] }
}
}
``` ```
``` ```
### Example output ### Pretty
============================= Pipeline Overview ============================= ============================= Pipeline Overview =============================
# Component Assigns Requires Scores Retokenizes # Component Assigns Requires Scores Retokenizes
@ -349,13 +393,7 @@ nlp.analyze_pipes()
token.ent_iob, token.ent_type token.ent_iob, token.ent_type
``` ```
If you prefer a structured dictionary containing the component information and </Accordion>
the problems, you can set `no_print=True`. This will return the data instead of
printing it.
```
result = nlp.analyze_pipes(no_print=True)
```
<Infobox variant="warning" title="Important note"> <Infobox variant="warning" title="Important note">