mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* Allow output_path to be None during training
* Fix cat scoring (?)
* Improve error message for weighted None score
* Improve messages
So we can call this in other places etc.
* FIx output path check
* Use latest wasabi
* Revert "Improve error message for weighted None score"
This reverts commit 7059926763.
* Exclude None scores from final score by default
It's otherwise very difficult to keep track of the score weights if we modify a config programmatically, source components etc.
* Update warnings and use logger.warning
		
	
			
		
			
				
	
	
		
			958 lines
		
	
	
		
			37 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			958 lines
		
	
	
		
			37 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from typing import Optional, Iterable, Dict, Set, Any, Callable, TYPE_CHECKING
 | 
						||
import numpy as np
 | 
						||
from collections import defaultdict
 | 
						||
 | 
						||
from .training import Example
 | 
						||
from .tokens import Token, Doc, Span
 | 
						||
from .errors import Errors
 | 
						||
from .util import get_lang_class, SimpleFrozenList
 | 
						||
from .morphology import Morphology
 | 
						||
 | 
						||
if TYPE_CHECKING:
 | 
						||
    # This lets us add type hints for mypy etc. without causing circular imports
 | 
						||
    from .language import Language  # noqa: F401
 | 
						||
 | 
						||
 | 
						||
DEFAULT_PIPELINE = ("senter", "tagger", "morphologizer", "parser", "ner", "textcat")
 | 
						||
MISSING_VALUES = frozenset([None, 0, ""])
 | 
						||
 | 
						||
 | 
						||
class PRFScore:
 | 
						||
    """A precision / recall / F score."""
 | 
						||
 | 
						||
    def __init__(self) -> None:
 | 
						||
        self.tp = 0
 | 
						||
        self.fp = 0
 | 
						||
        self.fn = 0
 | 
						||
 | 
						||
    def __len__(self) -> int:
 | 
						||
        return self.tp + self.fp + self.fn
 | 
						||
 | 
						||
    def __iadd__(self, other):
 | 
						||
        self.tp += other.tp
 | 
						||
        self.fp += other.fp
 | 
						||
        self.fn += other.fn
 | 
						||
        return self
 | 
						||
 | 
						||
    def __add__(self, other):
 | 
						||
        return PRFScore(
 | 
						||
            tp=self.tp + other.tp, fp=self.fp + other.fp, fn=self.fn + other.fn
 | 
						||
        )
 | 
						||
 | 
						||
    def score_set(self, cand: set, gold: set) -> None:
 | 
						||
        self.tp += len(cand.intersection(gold))
 | 
						||
        self.fp += len(cand - gold)
 | 
						||
        self.fn += len(gold - cand)
 | 
						||
 | 
						||
    @property
 | 
						||
    def precision(self) -> float:
 | 
						||
        return self.tp / (self.tp + self.fp + 1e-100)
 | 
						||
 | 
						||
    @property
 | 
						||
    def recall(self) -> float:
 | 
						||
        return self.tp / (self.tp + self.fn + 1e-100)
 | 
						||
 | 
						||
    @property
 | 
						||
    def fscore(self) -> float:
 | 
						||
        p = self.precision
 | 
						||
        r = self.recall
 | 
						||
        return 2 * ((p * r) / (p + r + 1e-100))
 | 
						||
 | 
						||
    def to_dict(self) -> Dict[str, float]:
 | 
						||
        return {"p": self.precision, "r": self.recall, "f": self.fscore}
 | 
						||
 | 
						||
 | 
						||
class ROCAUCScore:
 | 
						||
    """An AUC ROC score. This is only defined for binary classification.
 | 
						||
    Use the method is_binary before calculating the score, otherwise it
 | 
						||
    may throw an error."""
 | 
						||
 | 
						||
    def __init__(self) -> None:
 | 
						||
        self.golds = []
 | 
						||
        self.cands = []
 | 
						||
        self.saved_score = 0.0
 | 
						||
        self.saved_score_at_len = 0
 | 
						||
 | 
						||
    def score_set(self, cand, gold) -> None:
 | 
						||
        self.cands.append(cand)
 | 
						||
        self.golds.append(gold)
 | 
						||
 | 
						||
    def is_binary(self):
 | 
						||
        return len(np.unique(self.golds)) == 2
 | 
						||
 | 
						||
    @property
 | 
						||
    def score(self):
 | 
						||
        if not self.is_binary():
 | 
						||
            raise ValueError(Errors.E165.format(label=set(self.golds)))
 | 
						||
        if len(self.golds) == self.saved_score_at_len:
 | 
						||
            return self.saved_score
 | 
						||
        self.saved_score = _roc_auc_score(self.golds, self.cands)
 | 
						||
        self.saved_score_at_len = len(self.golds)
 | 
						||
        return self.saved_score
 | 
						||
 | 
						||
 | 
						||
class Scorer:
 | 
						||
    """Compute evaluation scores."""
 | 
						||
 | 
						||
    def __init__(
 | 
						||
        self,
 | 
						||
        nlp: Optional["Language"] = None,
 | 
						||
        default_lang: str = "xx",
 | 
						||
        default_pipeline: Iterable[str] = DEFAULT_PIPELINE,
 | 
						||
        **cfg,
 | 
						||
    ) -> None:
 | 
						||
        """Initialize the Scorer.
 | 
						||
 | 
						||
        DOCS: https://nightly.spacy.io/api/scorer#init
 | 
						||
        """
 | 
						||
        self.nlp = nlp
 | 
						||
        self.cfg = cfg
 | 
						||
        if not nlp:
 | 
						||
            nlp = get_lang_class(default_lang)()
 | 
						||
            for pipe in default_pipeline:
 | 
						||
                nlp.add_pipe(pipe)
 | 
						||
            self.nlp = nlp
 | 
						||
 | 
						||
    def score(self, examples: Iterable[Example]) -> Dict[str, Any]:
 | 
						||
        """Evaluate a list of Examples.
 | 
						||
 | 
						||
        examples (Iterable[Example]): The predicted annotations + correct annotations.
 | 
						||
        RETURNS (Dict): A dictionary of scores.
 | 
						||
 | 
						||
        DOCS: https://nightly.spacy.io/api/scorer#score
 | 
						||
        """
 | 
						||
        scores = {}
 | 
						||
        if hasattr(self.nlp.tokenizer, "score"):
 | 
						||
            scores.update(self.nlp.tokenizer.score(examples, **self.cfg))
 | 
						||
        for name, component in self.nlp.pipeline:
 | 
						||
            if hasattr(component, "score"):
 | 
						||
                scores.update(component.score(examples, **self.cfg))
 | 
						||
        return scores
 | 
						||
 | 
						||
    @staticmethod
 | 
						||
    def score_tokenization(examples: Iterable[Example], **cfg) -> Dict[str, Any]:
 | 
						||
        """Returns accuracy and PRF scores for tokenization.
 | 
						||
        * token_acc: # correct tokens / # gold tokens
 | 
						||
        * token_p/r/f: PRF for token character spans
 | 
						||
 | 
						||
        examples (Iterable[Example]): Examples to score
 | 
						||
        RETURNS (Dict[str, Any]): A dictionary containing the scores
 | 
						||
            token_acc/p/r/f.
 | 
						||
 | 
						||
        DOCS: https://nightly.spacy.io/api/scorer#score_tokenization
 | 
						||
        """
 | 
						||
        acc_score = PRFScore()
 | 
						||
        prf_score = PRFScore()
 | 
						||
        for example in examples:
 | 
						||
            gold_doc = example.reference
 | 
						||
            pred_doc = example.predicted
 | 
						||
            if gold_doc.has_unknown_spaces:
 | 
						||
                continue
 | 
						||
            align = example.alignment
 | 
						||
            gold_spans = set()
 | 
						||
            pred_spans = set()
 | 
						||
            for token in gold_doc:
 | 
						||
                if token.orth_.isspace():
 | 
						||
                    continue
 | 
						||
                gold_spans.add((token.idx, token.idx + len(token)))
 | 
						||
            for token in pred_doc:
 | 
						||
                if token.orth_.isspace():
 | 
						||
                    continue
 | 
						||
                pred_spans.add((token.idx, token.idx + len(token)))
 | 
						||
                if align.x2y.lengths[token.i] != 1:
 | 
						||
                    acc_score.fp += 1
 | 
						||
                else:
 | 
						||
                    acc_score.tp += 1
 | 
						||
            prf_score.score_set(pred_spans, gold_spans)
 | 
						||
        if len(acc_score) > 0:
 | 
						||
            return {
 | 
						||
                "token_acc": acc_score.fscore,
 | 
						||
                "token_p": prf_score.precision,
 | 
						||
                "token_r": prf_score.recall,
 | 
						||
                "token_f": prf_score.fscore,
 | 
						||
            }
 | 
						||
        else:
 | 
						||
            return {
 | 
						||
                "token_acc": None,
 | 
						||
                "token_p": None,
 | 
						||
                "token_r": None,
 | 
						||
                "token_f": None,
 | 
						||
            }
 | 
						||
 | 
						||
    @staticmethod
 | 
						||
    def score_token_attr(
 | 
						||
        examples: Iterable[Example],
 | 
						||
        attr: str,
 | 
						||
        *,
 | 
						||
        getter: Callable[[Token, str], Any] = getattr,
 | 
						||
        missing_values: Set[Any] = MISSING_VALUES,
 | 
						||
        **cfg,
 | 
						||
    ) -> Dict[str, Any]:
 | 
						||
        """Returns an accuracy score for a token-level attribute.
 | 
						||
 | 
						||
        examples (Iterable[Example]): Examples to score
 | 
						||
        attr (str): The attribute to score.
 | 
						||
        getter (Callable[[Token, str], Any]): Defaults to getattr. If provided,
 | 
						||
            getter(token, attr) should return the value of the attribute for an
 | 
						||
            individual token.
 | 
						||
        RETURNS (Dict[str, Any]): A dictionary containing the accuracy score
 | 
						||
            under the key attr_acc.
 | 
						||
 | 
						||
        DOCS: https://nightly.spacy.io/api/scorer#score_token_attr
 | 
						||
        """
 | 
						||
        tag_score = PRFScore()
 | 
						||
        for example in examples:
 | 
						||
            gold_doc = example.reference
 | 
						||
            pred_doc = example.predicted
 | 
						||
            align = example.alignment
 | 
						||
            gold_tags = set()
 | 
						||
            missing_indices = set()
 | 
						||
            for gold_i, token in enumerate(gold_doc):
 | 
						||
                value = getter(token, attr)
 | 
						||
                if value not in missing_values:
 | 
						||
                    gold_tags.add((gold_i, getter(token, attr)))
 | 
						||
                else:
 | 
						||
                    missing_indices.add(gold_i)
 | 
						||
            pred_tags = set()
 | 
						||
            for token in pred_doc:
 | 
						||
                if token.orth_.isspace():
 | 
						||
                    continue
 | 
						||
                if align.x2y.lengths[token.i] == 1:
 | 
						||
                    gold_i = align.x2y[token.i].dataXd[0, 0]
 | 
						||
                    if gold_i not in missing_indices:
 | 
						||
                        pred_tags.add((gold_i, getter(token, attr)))
 | 
						||
            tag_score.score_set(pred_tags, gold_tags)
 | 
						||
        score_key = f"{attr}_acc"
 | 
						||
        if len(tag_score) == 0:
 | 
						||
            return {score_key: None}
 | 
						||
        else:
 | 
						||
            return {score_key: tag_score.fscore}
 | 
						||
 | 
						||
    @staticmethod
 | 
						||
    def score_token_attr_per_feat(
 | 
						||
        examples: Iterable[Example],
 | 
						||
        attr: str,
 | 
						||
        *,
 | 
						||
        getter: Callable[[Token, str], Any] = getattr,
 | 
						||
        missing_values: Set[Any] = MISSING_VALUES,
 | 
						||
        **cfg,
 | 
						||
    ) -> Dict[str, Any]:
 | 
						||
        """Return PRF scores per feat for a token attribute in UFEATS format.
 | 
						||
 | 
						||
        examples (Iterable[Example]): Examples to score
 | 
						||
        attr (str): The attribute to score.
 | 
						||
        getter (Callable[[Token, str], Any]): Defaults to getattr. If provided,
 | 
						||
            getter(token, attr) should return the value of the attribute for an
 | 
						||
            individual token.
 | 
						||
        RETURNS (dict): A dictionary containing the per-feat PRF scores under
 | 
						||
            the key attr_per_feat.
 | 
						||
        """
 | 
						||
        per_feat = {}
 | 
						||
        for example in examples:
 | 
						||
            pred_doc = example.predicted
 | 
						||
            gold_doc = example.reference
 | 
						||
            align = example.alignment
 | 
						||
            gold_per_feat = {}
 | 
						||
            missing_indices = set()
 | 
						||
            for gold_i, token in enumerate(gold_doc):
 | 
						||
                value = getter(token, attr)
 | 
						||
                morph = gold_doc.vocab.strings[value]
 | 
						||
                if value not in missing_values and morph != Morphology.EMPTY_MORPH:
 | 
						||
                    for feat in morph.split(Morphology.FEATURE_SEP):
 | 
						||
                        field, values = feat.split(Morphology.FIELD_SEP)
 | 
						||
                        if field not in per_feat:
 | 
						||
                            per_feat[field] = PRFScore()
 | 
						||
                        if field not in gold_per_feat:
 | 
						||
                            gold_per_feat[field] = set()
 | 
						||
                        gold_per_feat[field].add((gold_i, feat))
 | 
						||
                else:
 | 
						||
                    missing_indices.add(gold_i)
 | 
						||
            pred_per_feat = {}
 | 
						||
            for token in pred_doc:
 | 
						||
                if token.orth_.isspace():
 | 
						||
                    continue
 | 
						||
                if align.x2y.lengths[token.i] == 1:
 | 
						||
                    gold_i = align.x2y[token.i].dataXd[0, 0]
 | 
						||
                    if gold_i not in missing_indices:
 | 
						||
                        value = getter(token, attr)
 | 
						||
                        morph = gold_doc.vocab.strings[value]
 | 
						||
                        if (
 | 
						||
                            value not in missing_values
 | 
						||
                            and morph != Morphology.EMPTY_MORPH
 | 
						||
                        ):
 | 
						||
                            for feat in morph.split(Morphology.FEATURE_SEP):
 | 
						||
                                field, values = feat.split(Morphology.FIELD_SEP)
 | 
						||
                                if field not in per_feat:
 | 
						||
                                    per_feat[field] = PRFScore()
 | 
						||
                                if field not in pred_per_feat:
 | 
						||
                                    pred_per_feat[field] = set()
 | 
						||
                                pred_per_feat[field].add((gold_i, feat))
 | 
						||
            for field in per_feat:
 | 
						||
                per_feat[field].score_set(
 | 
						||
                    pred_per_feat.get(field, set()), gold_per_feat.get(field, set())
 | 
						||
                )
 | 
						||
        score_key = f"{attr}_per_feat"
 | 
						||
        if any([len(v) for v in per_feat.values()]):
 | 
						||
            result = {k: v.to_dict() for k, v in per_feat.items()}
 | 
						||
            return {score_key: result}
 | 
						||
        else:
 | 
						||
            return {score_key: None}
 | 
						||
 | 
						||
    @staticmethod
 | 
						||
    def score_spans(
 | 
						||
        examples: Iterable[Example],
 | 
						||
        attr: str,
 | 
						||
        *,
 | 
						||
        getter: Callable[[Doc, str], Iterable[Span]] = getattr,
 | 
						||
        has_annotation: Optional[Callable[[Doc], bool]] = None,
 | 
						||
        **cfg,
 | 
						||
    ) -> Dict[str, Any]:
 | 
						||
        """Returns PRF scores for labeled spans.
 | 
						||
 | 
						||
        examples (Iterable[Example]): Examples to score
 | 
						||
        attr (str): The attribute to score.
 | 
						||
        getter (Callable[[Doc, str], Iterable[Span]]): Defaults to getattr. If
 | 
						||
            provided, getter(doc, attr) should return the spans for the
 | 
						||
            individual doc.
 | 
						||
        RETURNS (Dict[str, Any]): A dictionary containing the PRF scores under
 | 
						||
            the keys attr_p/r/f and the per-type PRF scores under attr_per_type.
 | 
						||
 | 
						||
        DOCS: https://nightly.spacy.io/api/scorer#score_spans
 | 
						||
        """
 | 
						||
        score = PRFScore()
 | 
						||
        score_per_type = dict()
 | 
						||
        for example in examples:
 | 
						||
            pred_doc = example.predicted
 | 
						||
            gold_doc = example.reference
 | 
						||
            # Option to handle docs without sents
 | 
						||
            if has_annotation is not None:
 | 
						||
                if not has_annotation(gold_doc):
 | 
						||
                    continue
 | 
						||
            # Find all labels in gold and doc
 | 
						||
            labels = set(
 | 
						||
                [k.label_ for k in getter(gold_doc, attr)]
 | 
						||
                + [k.label_ for k in getter(pred_doc, attr)]
 | 
						||
            )
 | 
						||
            # Set up all labels for per type scoring and prepare gold per type
 | 
						||
            gold_per_type = {label: set() for label in labels}
 | 
						||
            for label in labels:
 | 
						||
                if label not in score_per_type:
 | 
						||
                    score_per_type[label] = PRFScore()
 | 
						||
            # Find all predidate labels, for all and per type
 | 
						||
            gold_spans = set()
 | 
						||
            pred_spans = set()
 | 
						||
            for span in getter(gold_doc, attr):
 | 
						||
                gold_span = (span.label_, span.start, span.end - 1)
 | 
						||
                gold_spans.add(gold_span)
 | 
						||
                gold_per_type[span.label_].add((span.label_, span.start, span.end - 1))
 | 
						||
            pred_per_type = {label: set() for label in labels}
 | 
						||
            for span in example.get_aligned_spans_x2y(getter(pred_doc, attr)):
 | 
						||
                pred_spans.add((span.label_, span.start, span.end - 1))
 | 
						||
                pred_per_type[span.label_].add((span.label_, span.start, span.end - 1))
 | 
						||
            # Scores per label
 | 
						||
            for k, v in score_per_type.items():
 | 
						||
                if k in pred_per_type:
 | 
						||
                    v.score_set(pred_per_type[k], gold_per_type[k])
 | 
						||
            # Score for all labels
 | 
						||
            score.score_set(pred_spans, gold_spans)
 | 
						||
        if len(score) > 0:
 | 
						||
            return {
 | 
						||
                f"{attr}_p": score.precision,
 | 
						||
                f"{attr}_r": score.recall,
 | 
						||
                f"{attr}_f": score.fscore,
 | 
						||
                f"{attr}_per_type": {k: v.to_dict() for k, v in score_per_type.items()},
 | 
						||
            }
 | 
						||
        else:
 | 
						||
            return {
 | 
						||
                f"{attr}_p": None,
 | 
						||
                f"{attr}_r": None,
 | 
						||
                f"{attr}_f": None,
 | 
						||
                f"{attr}_per_type": None,
 | 
						||
            }
 | 
						||
 | 
						||
    @staticmethod
 | 
						||
    def score_cats(
 | 
						||
        examples: Iterable[Example],
 | 
						||
        attr: str,
 | 
						||
        *,
 | 
						||
        getter: Callable[[Doc, str], Any] = getattr,
 | 
						||
        labels: Iterable[str] = SimpleFrozenList(),
 | 
						||
        multi_label: bool = True,
 | 
						||
        positive_label: Optional[str] = None,
 | 
						||
        threshold: Optional[float] = None,
 | 
						||
        **cfg,
 | 
						||
    ) -> Dict[str, Any]:
 | 
						||
        """Returns PRF and ROC AUC scores for a doc-level attribute with a
 | 
						||
        dict with scores for each label like Doc.cats. The reported overall
 | 
						||
        score depends on the scorer settings.
 | 
						||
 | 
						||
        examples (Iterable[Example]): Examples to score
 | 
						||
        attr (str): The attribute to score.
 | 
						||
        getter (Callable[[Doc, str], Any]): Defaults to getattr. If provided,
 | 
						||
            getter(doc, attr) should return the values for the individual doc.
 | 
						||
        labels (Iterable[str]): The set of possible labels. Defaults to [].
 | 
						||
        multi_label (bool): Whether the attribute allows multiple labels.
 | 
						||
            Defaults to True.
 | 
						||
        positive_label (str): The positive label for a binary task with
 | 
						||
            exclusive classes. Defaults to None.
 | 
						||
        threshold (float): Cutoff to consider a prediction "positive". Defaults
 | 
						||
            to 0.5 for multi-label, and 0.0 (i.e. whatever's highest scoring)
 | 
						||
            otherwise.
 | 
						||
        RETURNS (Dict[str, Any]): A dictionary containing the scores, with
 | 
						||
            inapplicable scores as None:
 | 
						||
            for all:
 | 
						||
                attr_score (one of attr_micro_f / attr_macro_f / attr_macro_auc),
 | 
						||
                attr_score_desc (text description of the overall score),
 | 
						||
                attr_micro_p,
 | 
						||
                attr_micro_r,
 | 
						||
                attr_micro_f,
 | 
						||
                attr_macro_p,
 | 
						||
                attr_macro_r,
 | 
						||
                attr_macro_f,
 | 
						||
                attr_macro_auc,
 | 
						||
                attr_f_per_type,
 | 
						||
                attr_auc_per_type
 | 
						||
 | 
						||
        DOCS: https://nightly.spacy.io/api/scorer#score_cats
 | 
						||
        """
 | 
						||
        if threshold is None:
 | 
						||
            threshold = 0.5 if multi_label else 0.0
 | 
						||
        f_per_type = {label: PRFScore() for label in labels}
 | 
						||
        auc_per_type = {label: ROCAUCScore() for label in labels}
 | 
						||
        labels = set(labels)
 | 
						||
        if labels:
 | 
						||
            for eg in examples:
 | 
						||
                labels.update(eg.predicted.cats.keys())
 | 
						||
                labels.update(eg.reference.cats.keys())
 | 
						||
        for example in examples:
 | 
						||
            # Through this loop, None in the gold_cats indicates missing label.
 | 
						||
            pred_cats = getter(example.predicted, attr)
 | 
						||
            gold_cats = getter(example.reference, attr)
 | 
						||
 | 
						||
            for label in labels:
 | 
						||
                pred_score = pred_cats.get(label, 0.0)
 | 
						||
                gold_score = gold_cats.get(label, 0.0)
 | 
						||
                if gold_score is not None:
 | 
						||
                    auc_per_type[label].score_set(pred_score, gold_score)
 | 
						||
            if multi_label:
 | 
						||
                for label in labels:
 | 
						||
                    pred_score = pred_cats.get(label, 0.0)
 | 
						||
                    gold_score = gold_cats.get(label, 0.0)
 | 
						||
                    if gold_score is not None:
 | 
						||
                        if pred_score >= threshold and gold_score > 0:
 | 
						||
                            f_per_type[label].tp += 1
 | 
						||
                        elif pred_score >= threshold and gold_score == 0:
 | 
						||
                            f_per_type[label].fp += 1
 | 
						||
                        elif pred_score < threshold and gold_score > 0:
 | 
						||
                            f_per_type[label].fn += 1
 | 
						||
            elif pred_cats and gold_cats:
 | 
						||
                # Get the highest-scoring for each.
 | 
						||
                pred_label, pred_score = max(pred_cats.items(), key=lambda it: it[1])
 | 
						||
                gold_label, gold_score = max(gold_cats.items(), key=lambda it: it[1])
 | 
						||
                if gold_score is not None:
 | 
						||
                    if pred_label == gold_label and pred_score >= threshold:
 | 
						||
                        f_per_type[pred_label].tp += 1
 | 
						||
                    else:
 | 
						||
                        f_per_type[gold_label].fn += 1
 | 
						||
                        if pred_score >= threshold:
 | 
						||
                            f_per_type[pred_label].fp += 1
 | 
						||
            elif gold_cats:
 | 
						||
                gold_label, gold_score = max(gold_cats, key=lambda it: it[1])
 | 
						||
                if gold_score is not None and gold_score > 0:
 | 
						||
                    f_per_type[gold_label].fn += 1
 | 
						||
            elif pred_cats:
 | 
						||
                pred_label, pred_score = max(pred_cats.items(), key=lambda it: it[1])
 | 
						||
                if pred_score >= threshold:
 | 
						||
                    f_per_type[pred_label].fp += 1
 | 
						||
        micro_prf = PRFScore()
 | 
						||
        for label_prf in f_per_type.values():
 | 
						||
            micro_prf.tp += label_prf.tp
 | 
						||
            micro_prf.fn += label_prf.fn
 | 
						||
            micro_prf.fp += label_prf.fp
 | 
						||
        n_cats = len(f_per_type) + 1e-100
 | 
						||
        macro_p = sum(prf.precision for prf in f_per_type.values()) / n_cats
 | 
						||
        macro_r = sum(prf.recall for prf in f_per_type.values()) / n_cats
 | 
						||
        macro_f = sum(prf.fscore for prf in f_per_type.values()) / n_cats
 | 
						||
        # Limit macro_auc to those labels with gold annotations,
 | 
						||
        # but still divide by all cats to avoid artificial boosting of datasets with missing labels
 | 
						||
        macro_auc = (
 | 
						||
            sum(auc.score if auc.is_binary() else 0.0 for auc in auc_per_type.values())
 | 
						||
            / n_cats
 | 
						||
        )
 | 
						||
        results = {
 | 
						||
            f"{attr}_score": None,
 | 
						||
            f"{attr}_score_desc": None,
 | 
						||
            f"{attr}_micro_p": micro_prf.precision,
 | 
						||
            f"{attr}_micro_r": micro_prf.recall,
 | 
						||
            f"{attr}_micro_f": micro_prf.fscore,
 | 
						||
            f"{attr}_macro_p": macro_p,
 | 
						||
            f"{attr}_macro_r": macro_r,
 | 
						||
            f"{attr}_macro_f": macro_f,
 | 
						||
            f"{attr}_macro_auc": macro_auc,
 | 
						||
            f"{attr}_f_per_type": {k: v.to_dict() for k, v in f_per_type.items()},
 | 
						||
            f"{attr}_auc_per_type": {
 | 
						||
                k: v.score if v.is_binary() else None for k, v in auc_per_type.items()
 | 
						||
            },
 | 
						||
        }
 | 
						||
        if len(labels) == 2 and not multi_label and positive_label:
 | 
						||
            positive_label_f = results[f"{attr}_f_per_type"][positive_label]["f"]
 | 
						||
            results[f"{attr}_score"] = positive_label_f
 | 
						||
            results[f"{attr}_score_desc"] = f"F ({positive_label})"
 | 
						||
        elif not multi_label:
 | 
						||
            results[f"{attr}_score"] = results[f"{attr}_macro_f"]
 | 
						||
            results[f"{attr}_score_desc"] = "macro F"
 | 
						||
        else:
 | 
						||
            results[f"{attr}_score"] = results[f"{attr}_macro_auc"]
 | 
						||
            results[f"{attr}_score_desc"] = "macro AUC"
 | 
						||
        return results
 | 
						||
 | 
						||
    @staticmethod
 | 
						||
    def score_links(
 | 
						||
        examples: Iterable[Example], *, negative_labels: Iterable[str]
 | 
						||
    ) -> Dict[str, Any]:
 | 
						||
        """Returns PRF for predicted links on the entity level.
 | 
						||
        To disentangle the performance of the NEL from the NER,
 | 
						||
        this method only evaluates NEL links for entities that overlap
 | 
						||
        between the gold reference and the predictions.
 | 
						||
 | 
						||
        examples (Iterable[Example]): Examples to score
 | 
						||
        negative_labels (Iterable[str]): The string values that refer to no annotation (e.g. "NIL")
 | 
						||
        RETURNS (Dict[str, Any]): A dictionary containing the scores.
 | 
						||
 | 
						||
        DOCS: https://nightly.spacy.io/api/scorer#score_links
 | 
						||
        """
 | 
						||
        f_per_type = {}
 | 
						||
        for example in examples:
 | 
						||
            gold_ent_by_offset = {}
 | 
						||
            for gold_ent in example.reference.ents:
 | 
						||
                gold_ent_by_offset[(gold_ent.start_char, gold_ent.end_char)] = gold_ent
 | 
						||
 | 
						||
            for pred_ent in example.predicted.ents:
 | 
						||
                gold_span = gold_ent_by_offset.get(
 | 
						||
                    (pred_ent.start_char, pred_ent.end_char), None
 | 
						||
                )
 | 
						||
                label = gold_span.label_
 | 
						||
                if label not in f_per_type:
 | 
						||
                    f_per_type[label] = PRFScore()
 | 
						||
                gold = gold_span.kb_id_
 | 
						||
                # only evaluating entities that overlap between gold and pred,
 | 
						||
                # to disentangle the performance of the NEL from the NER
 | 
						||
                if gold is not None:
 | 
						||
                    pred = pred_ent.kb_id_
 | 
						||
                    if gold in negative_labels and pred in negative_labels:
 | 
						||
                        # ignore true negatives
 | 
						||
                        pass
 | 
						||
                    elif gold == pred:
 | 
						||
                        f_per_type[label].tp += 1
 | 
						||
                    elif gold in negative_labels:
 | 
						||
                        f_per_type[label].fp += 1
 | 
						||
                    elif pred in negative_labels:
 | 
						||
                        f_per_type[label].fn += 1
 | 
						||
                    else:
 | 
						||
                        # a wrong prediction (e.g. Q42 != Q3) counts as both a FP as well as a FN
 | 
						||
                        f_per_type[label].fp += 1
 | 
						||
                        f_per_type[label].fn += 1
 | 
						||
        micro_prf = PRFScore()
 | 
						||
        for label_prf in f_per_type.values():
 | 
						||
            micro_prf.tp += label_prf.tp
 | 
						||
            micro_prf.fn += label_prf.fn
 | 
						||
            micro_prf.fp += label_prf.fp
 | 
						||
        n_labels = len(f_per_type) + 1e-100
 | 
						||
        macro_p = sum(prf.precision for prf in f_per_type.values()) / n_labels
 | 
						||
        macro_r = sum(prf.recall for prf in f_per_type.values()) / n_labels
 | 
						||
        macro_f = sum(prf.fscore for prf in f_per_type.values()) / n_labels
 | 
						||
        results = {
 | 
						||
            f"nel_score": micro_prf.fscore,
 | 
						||
            f"nel_score_desc": "micro F",
 | 
						||
            f"nel_micro_p": micro_prf.precision,
 | 
						||
            f"nel_micro_r": micro_prf.recall,
 | 
						||
            f"nel_micro_f": micro_prf.fscore,
 | 
						||
            f"nel_macro_p": macro_p,
 | 
						||
            f"nel_macro_r": macro_r,
 | 
						||
            f"nel_macro_f": macro_f,
 | 
						||
            f"nel_f_per_type": {k: v.to_dict() for k, v in f_per_type.items()},
 | 
						||
        }
 | 
						||
        return results
 | 
						||
 | 
						||
    @staticmethod
 | 
						||
    def score_deps(
 | 
						||
        examples: Iterable[Example],
 | 
						||
        attr: str,
 | 
						||
        *,
 | 
						||
        getter: Callable[[Token, str], Any] = getattr,
 | 
						||
        head_attr: str = "head",
 | 
						||
        head_getter: Callable[[Token, str], Token] = getattr,
 | 
						||
        ignore_labels: Iterable[str] = SimpleFrozenList(),
 | 
						||
        missing_values: Set[Any] = MISSING_VALUES,
 | 
						||
        **cfg,
 | 
						||
    ) -> Dict[str, Any]:
 | 
						||
        """Returns the UAS, LAS, and LAS per type scores for dependency
 | 
						||
        parses.
 | 
						||
 | 
						||
        examples (Iterable[Example]): Examples to score
 | 
						||
        attr (str): The attribute containing the dependency label.
 | 
						||
        getter (Callable[[Token, str], Any]): Defaults to getattr. If provided,
 | 
						||
            getter(token, attr) should return the value of the attribute for an
 | 
						||
            individual token.
 | 
						||
        head_attr (str): The attribute containing the head token. Defaults to
 | 
						||
            'head'.
 | 
						||
        head_getter (Callable[[Token, str], Token]): Defaults to getattr. If provided,
 | 
						||
            head_getter(token, attr) should return the value of the head for an
 | 
						||
            individual token.
 | 
						||
        ignore_labels (Tuple): Labels to ignore while scoring (e.g., punct).
 | 
						||
        RETURNS (Dict[str, Any]): A dictionary containing the scores:
 | 
						||
            attr_uas, attr_las, and attr_las_per_type.
 | 
						||
 | 
						||
        DOCS: https://nightly.spacy.io/api/scorer#score_deps
 | 
						||
        """
 | 
						||
        unlabelled = PRFScore()
 | 
						||
        labelled = PRFScore()
 | 
						||
        labelled_per_dep = dict()
 | 
						||
        missing_indices = set()
 | 
						||
        for example in examples:
 | 
						||
            gold_doc = example.reference
 | 
						||
            pred_doc = example.predicted
 | 
						||
            align = example.alignment
 | 
						||
            gold_deps = set()
 | 
						||
            gold_deps_per_dep = {}
 | 
						||
            for gold_i, token in enumerate(gold_doc):
 | 
						||
                dep = getter(token, attr)
 | 
						||
                head = head_getter(token, head_attr)
 | 
						||
                if dep not in missing_values:
 | 
						||
                    if dep not in ignore_labels:
 | 
						||
                        gold_deps.add((gold_i, head.i, dep))
 | 
						||
                        if dep not in labelled_per_dep:
 | 
						||
                            labelled_per_dep[dep] = PRFScore()
 | 
						||
                        if dep not in gold_deps_per_dep:
 | 
						||
                            gold_deps_per_dep[dep] = set()
 | 
						||
                        gold_deps_per_dep[dep].add((gold_i, head.i, dep))
 | 
						||
                else:
 | 
						||
                    missing_indices.add(gold_i)
 | 
						||
            pred_deps = set()
 | 
						||
            pred_deps_per_dep = {}
 | 
						||
            for token in pred_doc:
 | 
						||
                if token.orth_.isspace():
 | 
						||
                    continue
 | 
						||
                if align.x2y.lengths[token.i] != 1:
 | 
						||
                    gold_i = None
 | 
						||
                else:
 | 
						||
                    gold_i = align.x2y[token.i].dataXd[0, 0]
 | 
						||
                if gold_i not in missing_indices:
 | 
						||
                    dep = getter(token, attr)
 | 
						||
                    head = head_getter(token, head_attr)
 | 
						||
                    if dep not in ignore_labels and token.orth_.strip():
 | 
						||
                        if align.x2y.lengths[head.i] == 1:
 | 
						||
                            gold_head = align.x2y[head.i].dataXd[0, 0]
 | 
						||
                        else:
 | 
						||
                            gold_head = None
 | 
						||
                        # None is indistinct, so we can't just add it to the set
 | 
						||
                        # Multiple (None, None) deps are possible
 | 
						||
                        if gold_i is None or gold_head is None:
 | 
						||
                            unlabelled.fp += 1
 | 
						||
                            labelled.fp += 1
 | 
						||
                        else:
 | 
						||
                            pred_deps.add((gold_i, gold_head, dep))
 | 
						||
                            if dep not in labelled_per_dep:
 | 
						||
                                labelled_per_dep[dep] = PRFScore()
 | 
						||
                            if dep not in pred_deps_per_dep:
 | 
						||
                                pred_deps_per_dep[dep] = set()
 | 
						||
                            pred_deps_per_dep[dep].add((gold_i, gold_head, dep))
 | 
						||
            labelled.score_set(pred_deps, gold_deps)
 | 
						||
            for dep in labelled_per_dep:
 | 
						||
                labelled_per_dep[dep].score_set(
 | 
						||
                    pred_deps_per_dep.get(dep, set()), gold_deps_per_dep.get(dep, set())
 | 
						||
                )
 | 
						||
            unlabelled.score_set(
 | 
						||
                set(item[:2] for item in pred_deps), set(item[:2] for item in gold_deps)
 | 
						||
            )
 | 
						||
        if len(unlabelled) > 0:
 | 
						||
            return {
 | 
						||
                f"{attr}_uas": unlabelled.fscore,
 | 
						||
                f"{attr}_las": labelled.fscore,
 | 
						||
                f"{attr}_las_per_type": {
 | 
						||
                    k: v.to_dict() for k, v in labelled_per_dep.items()
 | 
						||
                },
 | 
						||
            }
 | 
						||
        else:
 | 
						||
            return {
 | 
						||
                f"{attr}_uas": None,
 | 
						||
                f"{attr}_las": None,
 | 
						||
                f"{attr}_las_per_type": None,
 | 
						||
            }
 | 
						||
 | 
						||
 | 
						||
def get_ner_prf(examples: Iterable[Example]) -> Dict[str, Any]:
 | 
						||
    """Compute micro-PRF and per-entity PRF scores for a sequence of examples."""
 | 
						||
    score_per_type = defaultdict(PRFScore)
 | 
						||
    for eg in examples:
 | 
						||
        if not eg.y.has_annotation("ENT_IOB"):
 | 
						||
            continue
 | 
						||
        golds = {(e.label_, e.start, e.end) for e in eg.y.ents}
 | 
						||
        align_x2y = eg.alignment.x2y
 | 
						||
        for pred_ent in eg.x.ents:
 | 
						||
            if pred_ent.label_ not in score_per_type:
 | 
						||
                score_per_type[pred_ent.label_] = PRFScore()
 | 
						||
            indices = align_x2y[pred_ent.start : pred_ent.end].dataXd.ravel()
 | 
						||
            if len(indices):
 | 
						||
                g_span = eg.y[indices[0] : indices[-1] + 1]
 | 
						||
                # Check we aren't missing annotation on this span. If so,
 | 
						||
                # our prediction is neither right nor wrong, we just
 | 
						||
                # ignore it.
 | 
						||
                if all(token.ent_iob != 0 for token in g_span):
 | 
						||
                    key = (pred_ent.label_, indices[0], indices[-1] + 1)
 | 
						||
                    if key in golds:
 | 
						||
                        score_per_type[pred_ent.label_].tp += 1
 | 
						||
                        golds.remove(key)
 | 
						||
                    else:
 | 
						||
                        score_per_type[pred_ent.label_].fp += 1
 | 
						||
        for label, start, end in golds:
 | 
						||
            score_per_type[label].fn += 1
 | 
						||
    totals = PRFScore()
 | 
						||
    for prf in score_per_type.values():
 | 
						||
        totals += prf
 | 
						||
    if len(totals) > 0:
 | 
						||
        return {
 | 
						||
            "ents_p": totals.precision,
 | 
						||
            "ents_r": totals.recall,
 | 
						||
            "ents_f": totals.fscore,
 | 
						||
            "ents_per_type": {k: v.to_dict() for k, v in score_per_type.items()},
 | 
						||
        }
 | 
						||
    else:
 | 
						||
        return {
 | 
						||
            "ents_p": None,
 | 
						||
            "ents_r": None,
 | 
						||
            "ents_f": None,
 | 
						||
            "ents_per_type": None,
 | 
						||
        }
 | 
						||
 | 
						||
 | 
						||
# The following implementation of roc_auc_score() is adapted from
 | 
						||
# scikit-learn, which is distributed under the New BSD License.
 | 
						||
# Copyright (c) 2007–2019 The scikit-learn developers.
 | 
						||
# See licenses/3rd_party_licenses.txt
 | 
						||
def _roc_auc_score(y_true, y_score):
 | 
						||
    """Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC)
 | 
						||
    from prediction scores.
 | 
						||
 | 
						||
    Note: this implementation is restricted to the binary classification task
 | 
						||
 | 
						||
    Parameters
 | 
						||
    ----------
 | 
						||
    y_true : array, shape = [n_samples] or [n_samples, n_classes]
 | 
						||
        True binary labels or binary label indicators.
 | 
						||
        The multiclass case expects shape = [n_samples] and labels
 | 
						||
        with values in ``range(n_classes)``.
 | 
						||
 | 
						||
    y_score : array, shape = [n_samples] or [n_samples, n_classes]
 | 
						||
        Target scores, can either be probability estimates of the positive
 | 
						||
        class, confidence values, or non-thresholded measure of decisions
 | 
						||
        (as returned by "decision_function" on some classifiers). For binary
 | 
						||
        y_true, y_score is supposed to be the score of the class with greater
 | 
						||
        label. The multiclass case expects shape = [n_samples, n_classes]
 | 
						||
        where the scores correspond to probability estimates.
 | 
						||
 | 
						||
    Returns
 | 
						||
    -------
 | 
						||
    auc : float
 | 
						||
 | 
						||
    References
 | 
						||
    ----------
 | 
						||
    .. [1] `Wikipedia entry for the Receiver operating characteristic
 | 
						||
            <https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_
 | 
						||
 | 
						||
    .. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition
 | 
						||
           Letters, 2006, 27(8):861-874.
 | 
						||
 | 
						||
    .. [3] `Analyzing a portion of the ROC curve. McClish, 1989
 | 
						||
            <https://www.ncbi.nlm.nih.gov/pubmed/2668680>`_
 | 
						||
    """
 | 
						||
    if len(np.unique(y_true)) != 2:
 | 
						||
        raise ValueError(Errors.E165.format(label=np.unique(y_true)))
 | 
						||
    fpr, tpr, _ = _roc_curve(y_true, y_score)
 | 
						||
    return _auc(fpr, tpr)
 | 
						||
 | 
						||
 | 
						||
def _roc_curve(y_true, y_score):
 | 
						||
    """Compute Receiver operating characteristic (ROC)
 | 
						||
 | 
						||
    Note: this implementation is restricted to the binary classification task.
 | 
						||
 | 
						||
    Parameters
 | 
						||
    ----------
 | 
						||
 | 
						||
    y_true : array, shape = [n_samples]
 | 
						||
        True binary labels. If labels are not either {-1, 1} or {0, 1}, then
 | 
						||
        pos_label should be explicitly given.
 | 
						||
 | 
						||
    y_score : array, shape = [n_samples]
 | 
						||
        Target scores, can either be probability estimates of the positive
 | 
						||
        class, confidence values, or non-thresholded measure of decisions
 | 
						||
        (as returned by "decision_function" on some classifiers).
 | 
						||
 | 
						||
    Returns
 | 
						||
    -------
 | 
						||
    fpr : array, shape = [>2]
 | 
						||
        Increasing false positive rates such that element i is the false
 | 
						||
        positive rate of predictions with score >= thresholds[i].
 | 
						||
 | 
						||
    tpr : array, shape = [>2]
 | 
						||
        Increasing true positive rates such that element i is the true
 | 
						||
        positive rate of predictions with score >= thresholds[i].
 | 
						||
 | 
						||
    thresholds : array, shape = [n_thresholds]
 | 
						||
        Decreasing thresholds on the decision function used to compute
 | 
						||
        fpr and tpr. `thresholds[0]` represents no instances being predicted
 | 
						||
        and is arbitrarily set to `max(y_score) + 1`.
 | 
						||
 | 
						||
    Notes
 | 
						||
    -----
 | 
						||
    Since the thresholds are sorted from low to high values, they
 | 
						||
    are reversed upon returning them to ensure they correspond to both ``fpr``
 | 
						||
    and ``tpr``, which are sorted in reversed order during their calculation.
 | 
						||
 | 
						||
    References
 | 
						||
    ----------
 | 
						||
    .. [1] `Wikipedia entry for the Receiver operating characteristic
 | 
						||
            <https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_
 | 
						||
 | 
						||
    .. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition
 | 
						||
           Letters, 2006, 27(8):861-874.
 | 
						||
    """
 | 
						||
    fps, tps, thresholds = _binary_clf_curve(y_true, y_score)
 | 
						||
 | 
						||
    # Add an extra threshold position
 | 
						||
    # to make sure that the curve starts at (0, 0)
 | 
						||
    tps = np.r_[0, tps]
 | 
						||
    fps = np.r_[0, fps]
 | 
						||
    thresholds = np.r_[thresholds[0] + 1, thresholds]
 | 
						||
 | 
						||
    if fps[-1] <= 0:
 | 
						||
        fpr = np.repeat(np.nan, fps.shape)
 | 
						||
    else:
 | 
						||
        fpr = fps / fps[-1]
 | 
						||
 | 
						||
    if tps[-1] <= 0:
 | 
						||
        tpr = np.repeat(np.nan, tps.shape)
 | 
						||
    else:
 | 
						||
        tpr = tps / tps[-1]
 | 
						||
 | 
						||
    return fpr, tpr, thresholds
 | 
						||
 | 
						||
 | 
						||
def _binary_clf_curve(y_true, y_score):
 | 
						||
    """Calculate true and false positives per binary classification threshold.
 | 
						||
 | 
						||
    Parameters
 | 
						||
    ----------
 | 
						||
    y_true : array, shape = [n_samples]
 | 
						||
        True targets of binary classification
 | 
						||
 | 
						||
    y_score : array, shape = [n_samples]
 | 
						||
        Estimated probabilities or decision function
 | 
						||
 | 
						||
    Returns
 | 
						||
    -------
 | 
						||
    fps : array, shape = [n_thresholds]
 | 
						||
        A count of false positives, at index i being the number of negative
 | 
						||
        samples assigned a score >= thresholds[i]. The total number of
 | 
						||
        negative samples is equal to fps[-1] (thus true negatives are given by
 | 
						||
        fps[-1] - fps).
 | 
						||
 | 
						||
    tps : array, shape = [n_thresholds <= len(np.unique(y_score))]
 | 
						||
        An increasing count of true positives, at index i being the number
 | 
						||
        of positive samples assigned a score >= thresholds[i]. The total
 | 
						||
        number of positive samples is equal to tps[-1] (thus false negatives
 | 
						||
        are given by tps[-1] - tps).
 | 
						||
 | 
						||
    thresholds : array, shape = [n_thresholds]
 | 
						||
        Decreasing score values.
 | 
						||
    """
 | 
						||
    pos_label = 1.0
 | 
						||
 | 
						||
    y_true = np.ravel(y_true)
 | 
						||
    y_score = np.ravel(y_score)
 | 
						||
 | 
						||
    # make y_true a boolean vector
 | 
						||
    y_true = y_true == pos_label
 | 
						||
 | 
						||
    # sort scores and corresponding truth values
 | 
						||
    desc_score_indices = np.argsort(y_score, kind="mergesort")[::-1]
 | 
						||
    y_score = y_score[desc_score_indices]
 | 
						||
    y_true = y_true[desc_score_indices]
 | 
						||
    weight = 1.0
 | 
						||
 | 
						||
    # y_score typically has many tied values. Here we extract
 | 
						||
    # the indices associated with the distinct values. We also
 | 
						||
    # concatenate a value for the end of the curve.
 | 
						||
    distinct_value_indices = np.where(np.diff(y_score))[0]
 | 
						||
    threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1]
 | 
						||
 | 
						||
    # accumulate the true positives with decreasing threshold
 | 
						||
    tps = _stable_cumsum(y_true * weight)[threshold_idxs]
 | 
						||
    fps = 1 + threshold_idxs - tps
 | 
						||
    return fps, tps, y_score[threshold_idxs]
 | 
						||
 | 
						||
 | 
						||
def _stable_cumsum(arr, axis=None, rtol=1e-05, atol=1e-08):
 | 
						||
    """Use high precision for cumsum and check that final value matches sum
 | 
						||
 | 
						||
    Parameters
 | 
						||
    ----------
 | 
						||
    arr : array-like
 | 
						||
        To be cumulatively summed as flat
 | 
						||
    axis : int, optional
 | 
						||
        Axis along which the cumulative sum is computed.
 | 
						||
        The default (None) is to compute the cumsum over the flattened array.
 | 
						||
    rtol : float
 | 
						||
        Relative tolerance, see ``np.allclose``
 | 
						||
    atol : float
 | 
						||
        Absolute tolerance, see ``np.allclose``
 | 
						||
    """
 | 
						||
    out = np.cumsum(arr, axis=axis, dtype=np.float64)
 | 
						||
    expected = np.sum(arr, axis=axis, dtype=np.float64)
 | 
						||
    if not np.all(
 | 
						||
        np.isclose(
 | 
						||
            out.take(-1, axis=axis), expected, rtol=rtol, atol=atol, equal_nan=True
 | 
						||
        )
 | 
						||
    ):
 | 
						||
        raise ValueError(Errors.E163)
 | 
						||
    return out
 | 
						||
 | 
						||
 | 
						||
def _auc(x, y):
 | 
						||
    """Compute Area Under the Curve (AUC) using the trapezoidal rule
 | 
						||
 | 
						||
    This is a general function, given points on a curve.  For computing the
 | 
						||
    area under the ROC-curve, see :func:`roc_auc_score`.
 | 
						||
 | 
						||
    Parameters
 | 
						||
    ----------
 | 
						||
    x : array, shape = [n]
 | 
						||
        x coordinates. These must be either monotonic increasing or monotonic
 | 
						||
        decreasing.
 | 
						||
    y : array, shape = [n]
 | 
						||
        y coordinates.
 | 
						||
 | 
						||
    Returns
 | 
						||
    -------
 | 
						||
    auc : float
 | 
						||
    """
 | 
						||
    x = np.ravel(x)
 | 
						||
    y = np.ravel(y)
 | 
						||
 | 
						||
    direction = 1
 | 
						||
    dx = np.diff(x)
 | 
						||
    if np.any(dx < 0):
 | 
						||
        if np.all(dx <= 0):
 | 
						||
            direction = -1
 | 
						||
        else:
 | 
						||
            raise ValueError(Errors.E164.format(x=x))
 | 
						||
 | 
						||
    area = direction * np.trapz(y, x)
 | 
						||
    if isinstance(area, np.memmap):
 | 
						||
        # Reductions such as .sum used internally in np.trapz do not return a
 | 
						||
        # scalar by default for numpy.memmap instances contrary to
 | 
						||
        # regular numpy.ndarray instances.
 | 
						||
        area = area.dtype.type(area)
 | 
						||
    return area
 |