mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-14 18:40:33 +03:00
Merge branch 'develop' into nightly.spacy.io
This commit is contained in:
commit
f3aba49830
|
@ -1,6 +1,6 @@
|
|||
# fmt: off
|
||||
__title__ = "spacy-nightly"
|
||||
__version__ = "3.0.0a24"
|
||||
__version__ = "3.0.0a25"
|
||||
__release__ = True
|
||||
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
|
||||
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
|
||||
|
|
|
@ -51,7 +51,7 @@ def project_pull(project_dir: Path, remote: str, *, verbose: bool = False):
|
|||
update_lockfile(project_dir, cmd)
|
||||
# We remove the command from the list here, and break, so that
|
||||
# we iterate over the loop again.
|
||||
commands.remove(i)
|
||||
commands.pop(i)
|
||||
break
|
||||
else:
|
||||
# If we didn't break the for loop, break the while loop.
|
||||
|
|
|
@ -401,10 +401,6 @@ class Errors:
|
|||
"Matcher or PhraseMatcher with the attribute {attr}. "
|
||||
"Try using nlp() instead of nlp.make_doc() or list(nlp.pipe()) "
|
||||
"instead of list(nlp.tokenizer.pipe()).")
|
||||
E156 = ("The pipeline needs to include a parser in order to use "
|
||||
"Matcher or PhraseMatcher with the attribute DEP. Try using "
|
||||
"nlp() instead of nlp.make_doc() or list(nlp.pipe()) instead of "
|
||||
"list(nlp.tokenizer.pipe()).")
|
||||
E157 = ("Can't render negative values for dependency arc start or end. "
|
||||
"Make sure that you're passing in absolute token indices, not "
|
||||
"relative token offsets.\nstart: {start}, end: {end}, label: "
|
||||
|
@ -517,8 +513,8 @@ class Errors:
|
|||
"instead.")
|
||||
E927 = ("Can't write to frozen list Maybe you're trying to modify a computed "
|
||||
"property or default function argument?")
|
||||
E928 = ("A 'KnowledgeBase' should be written to / read from a file, but the "
|
||||
"provided argument {loc} is an existing directory.")
|
||||
E928 = ("A 'KnowledgeBase' can only be serialized to/from from a directory, "
|
||||
"but the provided argument {loc} points to a file.")
|
||||
E929 = ("A 'KnowledgeBase' could not be read from {loc} - the path does "
|
||||
"not seem to exist.")
|
||||
E930 = ("Received invalid get_examples callback in {name}.begin_training. "
|
||||
|
|
39
spacy/kb.pyx
39
spacy/kb.pyx
|
@ -10,6 +10,8 @@ from libcpp.vector cimport vector
|
|||
from pathlib import Path
|
||||
import warnings
|
||||
|
||||
from spacy.strings import StringStore
|
||||
|
||||
from spacy import util
|
||||
|
||||
from .typedefs cimport hash_t
|
||||
|
@ -83,6 +85,9 @@ cdef class KnowledgeBase:
|
|||
DOCS: https://nightly.spacy.io/api/kb
|
||||
"""
|
||||
|
||||
contents_loc = "contents"
|
||||
strings_loc = "strings.json"
|
||||
|
||||
def __init__(self, Vocab vocab, entity_vector_length):
|
||||
"""Create a KnowledgeBase."""
|
||||
self.mem = Pool()
|
||||
|
@ -319,15 +324,29 @@ cdef class KnowledgeBase:
|
|||
|
||||
return 0.0
|
||||
|
||||
|
||||
def to_disk(self, path):
|
||||
path = util.ensure_path(path)
|
||||
if path.is_dir():
|
||||
if not path.exists():
|
||||
path.mkdir(parents=True)
|
||||
if not path.is_dir():
|
||||
raise ValueError(Errors.E928.format(loc=path))
|
||||
if not path.parent.exists():
|
||||
path.parent.mkdir(parents=True)
|
||||
self.write_contents(path / self.contents_loc)
|
||||
self.vocab.strings.to_disk(path / self.strings_loc)
|
||||
|
||||
cdef Writer writer = Writer(path)
|
||||
def from_disk(self, path):
|
||||
path = util.ensure_path(path)
|
||||
if not path.exists():
|
||||
raise ValueError(Errors.E929.format(loc=path))
|
||||
if not path.is_dir():
|
||||
raise ValueError(Errors.E928.format(loc=path))
|
||||
self.read_contents(path / self.contents_loc)
|
||||
kb_strings = StringStore()
|
||||
kb_strings.from_disk(path / self.strings_loc)
|
||||
for string in kb_strings:
|
||||
self.vocab.strings.add(string)
|
||||
|
||||
def write_contents(self, file_path):
|
||||
cdef Writer writer = Writer(file_path)
|
||||
writer.write_header(self.get_size_entities(), self.entity_vector_length)
|
||||
|
||||
# dumping the entity vectors in their original order
|
||||
|
@ -366,13 +385,7 @@ cdef class KnowledgeBase:
|
|||
|
||||
writer.close()
|
||||
|
||||
def from_disk(self, path):
|
||||
path = util.ensure_path(path)
|
||||
if path.is_dir():
|
||||
raise ValueError(Errors.E928.format(loc=path))
|
||||
if not path.exists():
|
||||
raise ValueError(Errors.E929.format(loc=path))
|
||||
|
||||
def read_contents(self, file_path):
|
||||
cdef hash_t entity_hash
|
||||
cdef hash_t alias_hash
|
||||
cdef int64_t entry_index
|
||||
|
@ -382,7 +395,7 @@ cdef class KnowledgeBase:
|
|||
cdef AliasC alias
|
||||
cdef float vector_element
|
||||
|
||||
cdef Reader reader = Reader(path)
|
||||
cdef Reader reader = Reader(file_path)
|
||||
|
||||
# STEP 0: load header and initialize KB
|
||||
cdef int64_t nr_entities
|
||||
|
|
|
@ -17,6 +17,7 @@ from ..vocab cimport Vocab
|
|||
from ..tokens.doc cimport Doc, get_token_attr_for_matcher
|
||||
from ..tokens.span cimport Span
|
||||
from ..tokens.token cimport Token
|
||||
from ..tokens.morphanalysis cimport MorphAnalysis
|
||||
from ..attrs cimport ID, attr_id_t, NULL_ATTR, ORTH, POS, TAG, DEP, LEMMA, MORPH
|
||||
|
||||
from ..schemas import validate_token_pattern
|
||||
|
@ -124,7 +125,7 @@ cdef class Matcher:
|
|||
key = self._normalize_key(key)
|
||||
for pattern in patterns:
|
||||
try:
|
||||
specs = _preprocess_pattern(pattern, self.vocab.strings,
|
||||
specs = _preprocess_pattern(pattern, self.vocab,
|
||||
self._extensions, self._extra_predicates)
|
||||
self.patterns.push_back(init_pattern(self.mem, key, specs))
|
||||
for spec in specs:
|
||||
|
@ -195,7 +196,7 @@ cdef class Matcher:
|
|||
else:
|
||||
yield doc
|
||||
|
||||
def __call__(self, object doclike, *, as_spans=False):
|
||||
def __call__(self, object doclike, *, as_spans=False, allow_missing=False):
|
||||
"""Find all token sequences matching the supplied pattern.
|
||||
|
||||
doclike (Doc or Span): The document to match over.
|
||||
|
@ -215,16 +216,19 @@ cdef class Matcher:
|
|||
else:
|
||||
raise ValueError(Errors.E195.format(good="Doc or Span", got=type(doclike).__name__))
|
||||
cdef Pool tmp_pool = Pool()
|
||||
if TAG in self._seen_attrs and not doc.has_annotation("TAG"):
|
||||
raise ValueError(Errors.E155.format(pipe="tagger", attr="TAG"))
|
||||
if POS in self._seen_attrs and not doc.has_annotation("POS"):
|
||||
raise ValueError(Errors.E155.format(pipe="morphologizer", attr="POS"))
|
||||
if MORPH in self._seen_attrs and not doc.has_annotation("MORPH"):
|
||||
raise ValueError(Errors.E155.format(pipe="morphologizer", attr="MORPH"))
|
||||
if LEMMA in self._seen_attrs and not doc.has_annotation("LEMMA"):
|
||||
raise ValueError(Errors.E155.format(pipe="lemmatizer", attr="LEMMA"))
|
||||
if DEP in self._seen_attrs and not doc.has_annotation("DEP"):
|
||||
raise ValueError(Errors.E156.format())
|
||||
if not allow_missing:
|
||||
for attr in (TAG, POS, MORPH, LEMMA, DEP):
|
||||
if attr in self._seen_attrs and not doc.has_annotation(attr):
|
||||
if attr == TAG:
|
||||
pipe = "tagger"
|
||||
elif attr in (POS, MORPH):
|
||||
pipe = "morphologizer"
|
||||
elif attr == LEMMA:
|
||||
pipe = "lemmatizer"
|
||||
elif attr == DEP:
|
||||
pipe = "parser"
|
||||
error_msg = Errors.E155.format(pipe=pipe, attr=self.vocab.strings.as_string(attr))
|
||||
raise ValueError(error_msg)
|
||||
matches = find_matches(&self.patterns[0], self.patterns.size(), doclike, length,
|
||||
extensions=self._extensions, predicates=self._extra_predicates)
|
||||
final_matches = []
|
||||
|
@ -660,7 +664,7 @@ cdef attr_t get_ent_id(const TokenPatternC* pattern) nogil:
|
|||
return id_attr.value
|
||||
|
||||
|
||||
def _preprocess_pattern(token_specs, string_store, extensions_table, extra_predicates):
|
||||
def _preprocess_pattern(token_specs, vocab, extensions_table, extra_predicates):
|
||||
"""This function interprets the pattern, converting the various bits of
|
||||
syntactic sugar before we compile it into a struct with init_pattern.
|
||||
|
||||
|
@ -675,6 +679,7 @@ def _preprocess_pattern(token_specs, string_store, extensions_table, extra_predi
|
|||
extra_predicates.
|
||||
"""
|
||||
tokens = []
|
||||
string_store = vocab.strings
|
||||
for spec in token_specs:
|
||||
if not spec:
|
||||
# Signifier for 'any token'
|
||||
|
@ -685,7 +690,7 @@ def _preprocess_pattern(token_specs, string_store, extensions_table, extra_predi
|
|||
ops = _get_operators(spec)
|
||||
attr_values = _get_attr_values(spec, string_store)
|
||||
extensions = _get_extensions(spec, string_store, extensions_table)
|
||||
predicates = _get_extra_predicates(spec, extra_predicates)
|
||||
predicates = _get_extra_predicates(spec, extra_predicates, vocab)
|
||||
for op in ops:
|
||||
tokens.append((op, list(attr_values), list(extensions), list(predicates)))
|
||||
return tokens
|
||||
|
@ -729,7 +734,7 @@ def _get_attr_values(spec, string_store):
|
|||
class _RegexPredicate:
|
||||
operators = ("REGEX",)
|
||||
|
||||
def __init__(self, i, attr, value, predicate, is_extension=False):
|
||||
def __init__(self, i, attr, value, predicate, is_extension=False, vocab=None):
|
||||
self.i = i
|
||||
self.attr = attr
|
||||
self.value = re.compile(value)
|
||||
|
@ -747,13 +752,18 @@ class _RegexPredicate:
|
|||
return bool(self.value.search(value))
|
||||
|
||||
|
||||
class _SetMemberPredicate:
|
||||
operators = ("IN", "NOT_IN")
|
||||
class _SetPredicate:
|
||||
operators = ("IN", "NOT_IN", "IS_SUBSET", "IS_SUPERSET")
|
||||
|
||||
def __init__(self, i, attr, value, predicate, is_extension=False):
|
||||
def __init__(self, i, attr, value, predicate, is_extension=False, vocab=None):
|
||||
self.i = i
|
||||
self.attr = attr
|
||||
self.value = set(get_string_id(v) for v in value)
|
||||
self.vocab = vocab
|
||||
if self.attr == MORPH:
|
||||
# normalize morph strings
|
||||
self.value = set(self.vocab.morphology.add(v) for v in value)
|
||||
else:
|
||||
self.value = set(get_string_id(v) for v in value)
|
||||
self.predicate = predicate
|
||||
self.is_extension = is_extension
|
||||
self.key = (attr, self.predicate, srsly.json_dumps(value, sort_keys=True))
|
||||
|
@ -765,19 +775,32 @@ class _SetMemberPredicate:
|
|||
value = get_string_id(token._.get(self.attr))
|
||||
else:
|
||||
value = get_token_attr_for_matcher(token.c, self.attr)
|
||||
|
||||
if self.predicate in ("IS_SUBSET", "IS_SUPERSET"):
|
||||
if self.attr == MORPH:
|
||||
# break up MORPH into individual Feat=Val values
|
||||
value = set(get_string_id(v) for v in MorphAnalysis.from_id(self.vocab, value))
|
||||
else:
|
||||
# IS_SUBSET for other attrs will be equivalent to "IN"
|
||||
# IS_SUPERSET will only match for other attrs with 0 or 1 values
|
||||
value = set([value])
|
||||
if self.predicate == "IN":
|
||||
return value in self.value
|
||||
else:
|
||||
elif self.predicate == "NOT_IN":
|
||||
return value not in self.value
|
||||
elif self.predicate == "IS_SUBSET":
|
||||
return value <= self.value
|
||||
elif self.predicate == "IS_SUPERSET":
|
||||
return value >= self.value
|
||||
|
||||
def __repr__(self):
|
||||
return repr(("SetMemberPredicate", self.i, self.attr, self.value, self.predicate))
|
||||
return repr(("SetPredicate", self.i, self.attr, self.value, self.predicate))
|
||||
|
||||
|
||||
class _ComparisonPredicate:
|
||||
operators = ("==", "!=", ">=", "<=", ">", "<")
|
||||
|
||||
def __init__(self, i, attr, value, predicate, is_extension=False):
|
||||
def __init__(self, i, attr, value, predicate, is_extension=False, vocab=None):
|
||||
self.i = i
|
||||
self.attr = attr
|
||||
self.value = value
|
||||
|
@ -806,11 +829,13 @@ class _ComparisonPredicate:
|
|||
return value < self.value
|
||||
|
||||
|
||||
def _get_extra_predicates(spec, extra_predicates):
|
||||
def _get_extra_predicates(spec, extra_predicates, vocab):
|
||||
predicate_types = {
|
||||
"REGEX": _RegexPredicate,
|
||||
"IN": _SetMemberPredicate,
|
||||
"NOT_IN": _SetMemberPredicate,
|
||||
"IN": _SetPredicate,
|
||||
"NOT_IN": _SetPredicate,
|
||||
"IS_SUBSET": _SetPredicate,
|
||||
"IS_SUPERSET": _SetPredicate,
|
||||
"==": _ComparisonPredicate,
|
||||
"!=": _ComparisonPredicate,
|
||||
">=": _ComparisonPredicate,
|
||||
|
@ -838,7 +863,7 @@ def _get_extra_predicates(spec, extra_predicates):
|
|||
value_with_upper_keys = {k.upper(): v for k, v in value.items()}
|
||||
for type_, cls in predicate_types.items():
|
||||
if type_ in value_with_upper_keys:
|
||||
predicate = cls(len(extra_predicates), attr, value_with_upper_keys[type_], type_)
|
||||
predicate = cls(len(extra_predicates), attr, value_with_upper_keys[type_], type_, vocab=vocab)
|
||||
# Don't create a redundant predicates.
|
||||
# This helps with efficiency, as we're caching the results.
|
||||
if predicate.key in seen_predicates:
|
||||
|
|
|
@ -186,16 +186,18 @@ cdef class PhraseMatcher:
|
|||
if isinstance(doc, Doc):
|
||||
attrs = (TAG, POS, MORPH, LEMMA, DEP)
|
||||
has_annotation = {attr: doc.has_annotation(attr) for attr in attrs}
|
||||
if self.attr == TAG and not has_annotation[TAG]:
|
||||
raise ValueError(Errors.E155.format(pipe="tagger", attr="TAG"))
|
||||
if self.attr == POS and not has_annotation[POS]:
|
||||
raise ValueError(Errors.E155.format(pipe="morphologizer", attr="POS"))
|
||||
if self.attr == MORPH and not has_annotation[MORPH]:
|
||||
raise ValueError(Errors.E155.format(pipe="morphologizer", attr="MORPH"))
|
||||
if self.attr == LEMMA and not has_annotation[LEMMA]:
|
||||
raise ValueError(Errors.E155.format(pipe="lemmatizer", attr="LEMMA"))
|
||||
if self.attr == DEP and not has_annotation[DEP]:
|
||||
raise ValueError(Errors.E156.format())
|
||||
for attr in attrs:
|
||||
if self.attr == attr and not has_annotation[attr]:
|
||||
if attr == TAG:
|
||||
pipe = "tagger"
|
||||
elif attr in (POS, MORPH):
|
||||
pipe = "morphologizer"
|
||||
elif attr == LEMMA:
|
||||
pipe = "lemmatizer"
|
||||
elif attr == DEP:
|
||||
pipe = "parser"
|
||||
error_msg = Errors.E155.format(pipe=pipe, attr=self.vocab.strings.as_string(attr))
|
||||
raise ValueError(error_msg)
|
||||
if self._validate and any(has_annotation.values()) \
|
||||
and self.attr not in attrs:
|
||||
string_attr = self.vocab.strings[self.attr]
|
||||
|
|
|
@ -79,7 +79,7 @@ class AttributeRuler(Pipe):
|
|||
|
||||
DOCS: https://nightly.spacy.io/api/attributeruler#call
|
||||
"""
|
||||
matches = sorted(self.matcher(doc))
|
||||
matches = sorted(self.matcher(doc, allow_missing=True))
|
||||
|
||||
for match_id, start, end in matches:
|
||||
span = Span(doc, start, end, label=match_id)
|
||||
|
@ -126,8 +126,12 @@ class AttributeRuler(Pipe):
|
|||
for tag, attrs in tag_map.items():
|
||||
pattern = [{"TAG": tag}]
|
||||
attrs, morph_attrs = _split_morph_attrs(attrs)
|
||||
morph = self.vocab.morphology.add(morph_attrs)
|
||||
attrs["MORPH"] = self.vocab.strings[morph]
|
||||
if "MORPH" not in attrs:
|
||||
morph = self.vocab.morphology.add(morph_attrs)
|
||||
attrs["MORPH"] = self.vocab.strings[morph]
|
||||
else:
|
||||
morph = self.vocab.morphology.add(attrs["MORPH"])
|
||||
attrs["MORPH"] = self.vocab.strings[morph]
|
||||
self.add([pattern], attrs)
|
||||
|
||||
def load_from_morph_rules(
|
||||
|
@ -146,8 +150,12 @@ class AttributeRuler(Pipe):
|
|||
pattern = [{"ORTH": word, "TAG": tag}]
|
||||
attrs = morph_rules[tag][word]
|
||||
attrs, morph_attrs = _split_morph_attrs(attrs)
|
||||
morph = self.vocab.morphology.add(morph_attrs)
|
||||
attrs["MORPH"] = self.vocab.strings[morph]
|
||||
if "MORPH" in attrs:
|
||||
morph = self.vocab.morphology.add(attrs["MORPH"])
|
||||
attrs["MORPH"] = self.vocab.strings[morph]
|
||||
elif morph_attrs:
|
||||
morph = self.vocab.morphology.add(morph_attrs)
|
||||
attrs["MORPH"] = self.vocab.strings[morph]
|
||||
self.add([pattern], attrs)
|
||||
|
||||
def add(
|
||||
|
|
|
@ -16,6 +16,7 @@ from ..training import Example, validate_examples
|
|||
from ..errors import Errors, Warnings
|
||||
from ..util import SimpleFrozenList
|
||||
from .. import util
|
||||
from ..scorer import Scorer
|
||||
|
||||
|
||||
default_model_config = """
|
||||
|
@ -47,6 +48,11 @@ DEFAULT_NEL_MODEL = Config().from_str(default_model_config)["model"]
|
|||
"incl_context": True,
|
||||
"get_candidates": {"@misc": "spacy.CandidateGenerator.v1"},
|
||||
},
|
||||
default_score_weights={
|
||||
"nel_micro_f": 1.0,
|
||||
"nel_micro_r": None,
|
||||
"nel_micro_p": None,
|
||||
},
|
||||
)
|
||||
def make_entity_linker(
|
||||
nlp: Language,
|
||||
|
@ -209,12 +215,11 @@ class EntityLinker(Pipe):
|
|||
# it does run the model twice :(
|
||||
predictions = self.model.predict(docs)
|
||||
for eg in examples:
|
||||
sentences = [s for s in eg.predicted.sents]
|
||||
sentences = [s for s in eg.reference.sents]
|
||||
kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True)
|
||||
for ent in eg.predicted.ents:
|
||||
kb_id = kb_ids[
|
||||
ent.start
|
||||
] # KB ID of the first token is the same as the whole span
|
||||
for ent in eg.reference.ents:
|
||||
# KB ID of the first token is the same as the whole span
|
||||
kb_id = kb_ids[ent.start]
|
||||
if kb_id:
|
||||
try:
|
||||
# find the sentence in the list of sentences.
|
||||
|
@ -253,7 +258,7 @@ class EntityLinker(Pipe):
|
|||
entity_encodings = []
|
||||
for eg in examples:
|
||||
kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True)
|
||||
for ent in eg.predicted.ents:
|
||||
for ent in eg.reference.ents:
|
||||
kb_id = kb_ids[ent.start]
|
||||
if kb_id:
|
||||
entity_encoding = self.kb.get_vector(kb_id)
|
||||
|
@ -415,6 +420,17 @@ class EntityLinker(Pipe):
|
|||
for token in ent:
|
||||
token.ent_kb_id_ = kb_id
|
||||
|
||||
def score(self, examples, **kwargs):
|
||||
"""Score a batch of examples.
|
||||
|
||||
examples (Iterable[Example]): The examples to score.
|
||||
RETURNS (Dict[str, Any]): The scores.
|
||||
|
||||
DOCS TODO: https://nightly.spacy.io/api/entity_linker#score
|
||||
"""
|
||||
validate_examples(examples, "EntityLinker.score")
|
||||
return Scorer.score_links(examples, negative_labels=[self.NIL])
|
||||
|
||||
def to_disk(
|
||||
self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList()
|
||||
) -> None:
|
||||
|
|
|
@ -6,7 +6,7 @@ from .transition_parser cimport Parser
|
|||
from ._parser_internals.ner cimport BiluoPushDown
|
||||
|
||||
from ..language import Language
|
||||
from ..scorer import Scorer
|
||||
from ..scorer import get_ner_prf, PRFScore
|
||||
from ..training import validate_examples
|
||||
|
||||
|
||||
|
@ -117,9 +117,18 @@ cdef class EntityRecognizer(Parser):
|
|||
"""Score a batch of examples.
|
||||
|
||||
examples (Iterable[Example]): The examples to score.
|
||||
RETURNS (Dict[str, Any]): The scores, produced by Scorer.score_spans.
|
||||
RETURNS (Dict[str, Any]): The NER precision, recall and f-scores.
|
||||
|
||||
DOCS: https://nightly.spacy.io/api/entityrecognizer#score
|
||||
"""
|
||||
validate_examples(examples, "EntityRecognizer.score")
|
||||
return Scorer.score_spans(examples, "ents", **kwargs)
|
||||
score_per_type = get_ner_prf(examples)
|
||||
totals = PRFScore()
|
||||
for prf in score_per_type.values():
|
||||
totals += prf
|
||||
return {
|
||||
"ents_p": totals.precision,
|
||||
"ents_r": totals.recall,
|
||||
"ents_f": totals.fscore,
|
||||
"ents_per_type": {k: v.to_dict() for k, v in score_per_type.items()},
|
||||
}
|
||||
|
|
|
@ -61,6 +61,8 @@ class TokenPatternString(BaseModel):
|
|||
REGEX: Optional[StrictStr] = Field(None, alias="regex")
|
||||
IN: Optional[List[StrictStr]] = Field(None, alias="in")
|
||||
NOT_IN: Optional[List[StrictStr]] = Field(None, alias="not_in")
|
||||
IS_SUBSET: Optional[List[StrictStr]] = Field(None, alias="is_subset")
|
||||
IS_SUPERSET: Optional[List[StrictStr]] = Field(None, alias="is_superset")
|
||||
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
|
@ -77,6 +79,8 @@ class TokenPatternNumber(BaseModel):
|
|||
REGEX: Optional[StrictStr] = Field(None, alias="regex")
|
||||
IN: Optional[List[StrictInt]] = Field(None, alias="in")
|
||||
NOT_IN: Optional[List[StrictInt]] = Field(None, alias="not_in")
|
||||
ISSUBSET: Optional[List[StrictInt]] = Field(None, alias="issubset")
|
||||
ISSUPERSET: Optional[List[StrictInt]] = Field(None, alias="issuperset")
|
||||
EQ: Union[StrictInt, StrictFloat] = Field(None, alias="==")
|
||||
NEQ: Union[StrictInt, StrictFloat] = Field(None, alias="!=")
|
||||
GEQ: Union[StrictInt, StrictFloat] = Field(None, alias=">=")
|
||||
|
@ -115,6 +119,7 @@ class TokenPattern(BaseModel):
|
|||
lower: Optional[StringValue] = None
|
||||
pos: Optional[StringValue] = None
|
||||
tag: Optional[StringValue] = None
|
||||
morph: Optional[StringValue] = None
|
||||
dep: Optional[StringValue] = None
|
||||
lemma: Optional[StringValue] = None
|
||||
shape: Optional[StringValue] = None
|
||||
|
|
121
spacy/scorer.py
121
spacy/scorer.py
|
@ -1,5 +1,6 @@
|
|||
from typing import Optional, Iterable, Dict, Any, Callable, TYPE_CHECKING
|
||||
import numpy as np
|
||||
from collections import defaultdict
|
||||
|
||||
from .training import Example
|
||||
from .tokens import Token, Doc, Span
|
||||
|
@ -23,6 +24,19 @@ class PRFScore:
|
|||
self.fp = 0
|
||||
self.fn = 0
|
||||
|
||||
def __iadd__(self, other):
|
||||
self.tp += other.tp
|
||||
self.fp += other.fp
|
||||
self.fn += other.fn
|
||||
return self
|
||||
|
||||
def __add__(self, other):
|
||||
return PRFScore(
|
||||
tp=self.tp+other.tp,
|
||||
fp=self.fp+other.fp,
|
||||
fn=self.fn+other.fn
|
||||
)
|
||||
|
||||
def score_set(self, cand: set, gold: set) -> None:
|
||||
self.tp += len(cand.intersection(gold))
|
||||
self.fp += len(cand - gold)
|
||||
|
@ -295,12 +309,6 @@ class Scorer:
|
|||
# Find all predidate labels, for all and per type
|
||||
gold_spans = set()
|
||||
pred_spans = set()
|
||||
# Special case for ents:
|
||||
# If we have missing values in the gold, we can't easily tell
|
||||
# whether our NER predictions are true.
|
||||
# It seems bad but it's what we've always done.
|
||||
if attr == "ents" and not all(token.ent_iob != 0 for token in gold_doc):
|
||||
continue
|
||||
for span in getter(gold_doc, attr):
|
||||
gold_span = (span.label_, span.start, span.end - 1)
|
||||
gold_spans.add(gold_span)
|
||||
|
@ -451,6 +459,74 @@ class Scorer:
|
|||
results[f"{attr}_score_desc"] = "macro AUC"
|
||||
return results
|
||||
|
||||
@staticmethod
|
||||
def score_links(
|
||||
examples: Iterable[Example], *, negative_labels: Iterable[str]
|
||||
) -> Dict[str, Any]:
|
||||
"""Returns PRF for predicted links on the entity level.
|
||||
To disentangle the performance of the NEL from the NER,
|
||||
this method only evaluates NEL links for entities that overlap
|
||||
between the gold reference and the predictions.
|
||||
|
||||
examples (Iterable[Example]): Examples to score
|
||||
negative_labels (Iterable[str]): The string values that refer to no annotation (e.g. "NIL")
|
||||
RETURNS (Dict[str, Any]): A dictionary containing the scores.
|
||||
|
||||
DOCS (TODO): https://nightly.spacy.io/api/scorer#score_links
|
||||
"""
|
||||
f_per_type = {}
|
||||
for example in examples:
|
||||
gold_ent_by_offset = {}
|
||||
for gold_ent in example.reference.ents:
|
||||
gold_ent_by_offset[(gold_ent.start_char, gold_ent.end_char)] = gold_ent
|
||||
|
||||
for pred_ent in example.predicted.ents:
|
||||
gold_span = gold_ent_by_offset.get(
|
||||
(pred_ent.start_char, pred_ent.end_char), None
|
||||
)
|
||||
label = gold_span.label_
|
||||
if not label in f_per_type:
|
||||
f_per_type[label] = PRFScore()
|
||||
gold = gold_span.kb_id_
|
||||
# only evaluating entities that overlap between gold and pred,
|
||||
# to disentangle the performance of the NEL from the NER
|
||||
if gold is not None:
|
||||
pred = pred_ent.kb_id_
|
||||
if gold in negative_labels and pred in negative_labels:
|
||||
# ignore true negatives
|
||||
pass
|
||||
elif gold == pred:
|
||||
f_per_type[label].tp += 1
|
||||
elif gold in negative_labels:
|
||||
f_per_type[label].fp += 1
|
||||
elif pred in negative_labels:
|
||||
f_per_type[label].fn += 1
|
||||
else:
|
||||
# a wrong prediction (e.g. Q42 != Q3) counts as both a FP as well as a FN
|
||||
f_per_type[label].fp += 1
|
||||
f_per_type[label].fn += 1
|
||||
micro_prf = PRFScore()
|
||||
for label_prf in f_per_type.values():
|
||||
micro_prf.tp += label_prf.tp
|
||||
micro_prf.fn += label_prf.fn
|
||||
micro_prf.fp += label_prf.fp
|
||||
n_labels = len(f_per_type) + 1e-100
|
||||
macro_p = sum(prf.precision for prf in f_per_type.values()) / n_labels
|
||||
macro_r = sum(prf.recall for prf in f_per_type.values()) / n_labels
|
||||
macro_f = sum(prf.fscore for prf in f_per_type.values()) / n_labels
|
||||
results = {
|
||||
f"nel_score": micro_prf.fscore,
|
||||
f"nel_score_desc": "micro F",
|
||||
f"nel_micro_p": micro_prf.precision,
|
||||
f"nel_micro_r": micro_prf.recall,
|
||||
f"nel_micro_f": micro_prf.fscore,
|
||||
f"nel_macro_p": macro_p,
|
||||
f"nel_macro_r": macro_r,
|
||||
f"nel_macro_f": macro_f,
|
||||
f"nel_f_per_type": {k: v.to_dict() for k, v in f_per_type.items()},
|
||||
}
|
||||
return results
|
||||
|
||||
@staticmethod
|
||||
def score_deps(
|
||||
examples: Iterable[Example],
|
||||
|
@ -545,6 +621,39 @@ class Scorer:
|
|||
}
|
||||
|
||||
|
||||
def get_ner_prf(examples: Iterable[Example]) -> Dict[str, PRFScore]:
|
||||
"""Compute per-entity PRFScore objects for a sequence of examples. The
|
||||
results are returned as a dictionary keyed by the entity type. You can
|
||||
add the PRFScore objects to get micro-averaged total.
|
||||
"""
|
||||
scores = defaultdict(PRFScore)
|
||||
for eg in examples:
|
||||
if not eg.y.has_annotation("ENT_IOB"):
|
||||
continue
|
||||
golds = {(e.label_, e.start, e.end) for e in eg.y.ents}
|
||||
align_x2y = eg.alignment.x2y
|
||||
preds = set()
|
||||
for pred_ent in eg.x.ents:
|
||||
if pred_ent.label_ not in scores:
|
||||
scores[pred_ent.label_] = PRFScore()
|
||||
indices = align_x2y[pred_ent.start : pred_ent.end].dataXd.ravel()
|
||||
if len(indices):
|
||||
g_span = eg.y[indices[0] : indices[-1] + 1]
|
||||
# Check we aren't missing annotation on this span. If so,
|
||||
# our prediction is neither right nor wrong, we just
|
||||
# ignore it.
|
||||
if all(token.ent_iob != 0 for token in g_span):
|
||||
key = (pred_ent.label_, indices[0], indices[-1] + 1)
|
||||
if key in golds:
|
||||
scores[pred_ent.label_].tp += 1
|
||||
golds.remove(key)
|
||||
else:
|
||||
scores[pred_ent.label_].fp += 1
|
||||
for label, start, end in golds:
|
||||
scores[label].fn += 1
|
||||
return scores
|
||||
|
||||
|
||||
#############################################################################
|
||||
#
|
||||
# The following implementation of roc_auc_score() is adapted from
|
||||
|
|
|
@ -230,6 +230,106 @@ def test_matcher_set_value_operator(en_vocab):
|
|||
assert len(matches) == 1
|
||||
|
||||
|
||||
def test_matcher_subset_value_operator(en_vocab):
|
||||
matcher = Matcher(en_vocab)
|
||||
pattern = [{"MORPH": {"IS_SUBSET": ["Feat=Val", "Feat2=Val2"]}}]
|
||||
matcher.add("M", [pattern])
|
||||
doc = Doc(en_vocab, words=["a", "b", "c"])
|
||||
assert len(matcher(doc)) == 3
|
||||
doc[0].morph_ = "Feat=Val"
|
||||
assert len(matcher(doc)) == 3
|
||||
doc[0].morph_ = "Feat=Val|Feat2=Val2"
|
||||
assert len(matcher(doc)) == 3
|
||||
doc[0].morph_ = "Feat=Val|Feat2=Val2|Feat3=Val3"
|
||||
assert len(matcher(doc)) == 2
|
||||
doc[0].morph_ = "Feat=Val|Feat2=Val2|Feat3=Val3|Feat4=Val4"
|
||||
assert len(matcher(doc)) == 2
|
||||
|
||||
# IS_SUBSET acts like "IN" for attrs other than MORPH
|
||||
matcher = Matcher(en_vocab)
|
||||
pattern = [{"TAG": {"IS_SUBSET": ["A", "B"]}}]
|
||||
matcher.add("M", [pattern])
|
||||
doc = Doc(en_vocab, words=["a", "b", "c"])
|
||||
doc[0].tag_ = "A"
|
||||
assert len(matcher(doc)) == 1
|
||||
|
||||
# IS_SUBSET with an empty list matches nothing
|
||||
matcher = Matcher(en_vocab)
|
||||
pattern = [{"TAG": {"IS_SUBSET": []}}]
|
||||
matcher.add("M", [pattern])
|
||||
doc = Doc(en_vocab, words=["a", "b", "c"])
|
||||
doc[0].tag_ = "A"
|
||||
assert len(matcher(doc)) == 0
|
||||
|
||||
|
||||
def test_matcher_superset_value_operator(en_vocab):
|
||||
matcher = Matcher(en_vocab)
|
||||
pattern = [{"MORPH": {"IS_SUPERSET": ["Feat=Val", "Feat2=Val2", "Feat3=Val3"]}}]
|
||||
matcher.add("M", [pattern])
|
||||
doc = Doc(en_vocab, words=["a", "b", "c"])
|
||||
assert len(matcher(doc)) == 0
|
||||
doc[0].morph_ = "Feat=Val|Feat2=Val2"
|
||||
assert len(matcher(doc)) == 0
|
||||
doc[0].morph_ = "Feat=Val|Feat2=Val2|Feat3=Val3"
|
||||
assert len(matcher(doc)) == 1
|
||||
doc[0].morph_ = "Feat=Val|Feat2=Val2|Feat3=Val3|Feat4=Val4"
|
||||
assert len(matcher(doc)) == 1
|
||||
|
||||
# IS_SUPERSET with more than one value only matches for MORPH
|
||||
matcher = Matcher(en_vocab)
|
||||
pattern = [{"TAG": {"IS_SUPERSET": ["A", "B"]}}]
|
||||
matcher.add("M", [pattern])
|
||||
doc = Doc(en_vocab, words=["a", "b", "c"])
|
||||
doc[0].tag_ = "A"
|
||||
assert len(matcher(doc)) == 0
|
||||
|
||||
# IS_SUPERSET with one value is the same as ==
|
||||
matcher = Matcher(en_vocab)
|
||||
pattern = [{"TAG": {"IS_SUPERSET": ["A"]}}]
|
||||
matcher.add("M", [pattern])
|
||||
doc = Doc(en_vocab, words=["a", "b", "c"])
|
||||
doc[0].tag_ = "A"
|
||||
assert len(matcher(doc)) == 1
|
||||
|
||||
# IS_SUPERSET with an empty value matches everything
|
||||
matcher = Matcher(en_vocab)
|
||||
pattern = [{"TAG": {"IS_SUPERSET": []}}]
|
||||
matcher.add("M", [pattern])
|
||||
doc = Doc(en_vocab, words=["a", "b", "c"])
|
||||
doc[0].tag_ = "A"
|
||||
assert len(matcher(doc)) == 3
|
||||
|
||||
|
||||
def test_matcher_morph_handling(en_vocab):
|
||||
# order of features in pattern doesn't matter
|
||||
matcher = Matcher(en_vocab)
|
||||
pattern1 = [{"MORPH": {"IN": ["Feat1=Val1|Feat2=Val2"]}}]
|
||||
pattern2 = [{"MORPH": {"IN": ["Feat2=Val2|Feat1=Val1"]}}]
|
||||
matcher.add("M", [pattern1])
|
||||
matcher.add("N", [pattern2])
|
||||
doc = Doc(en_vocab, words=["a", "b", "c"])
|
||||
assert len(matcher(doc)) == 0
|
||||
|
||||
doc[0].morph_ = "Feat2=Val2|Feat1=Val1"
|
||||
assert len(matcher(doc)) == 2
|
||||
doc[0].morph_ = "Feat1=Val1|Feat2=Val2"
|
||||
assert len(matcher(doc)) == 2
|
||||
|
||||
# multiple values are split
|
||||
matcher = Matcher(en_vocab)
|
||||
pattern1 = [{"MORPH": {"IS_SUPERSET": ["Feat1=Val1", "Feat2=Val2"]}}]
|
||||
pattern2 = [{"MORPH": {"IS_SUPERSET": ["Feat1=Val1", "Feat1=Val3", "Feat2=Val2"]}}]
|
||||
matcher.add("M", [pattern1])
|
||||
matcher.add("N", [pattern2])
|
||||
doc = Doc(en_vocab, words=["a", "b", "c"])
|
||||
assert len(matcher(doc)) == 0
|
||||
|
||||
doc[0].morph_ = "Feat2=Val2,Val3|Feat1=Val1"
|
||||
assert len(matcher(doc)) == 1
|
||||
doc[0].morph_ = "Feat1=Val1,Val3|Feat2=Val2"
|
||||
assert len(matcher(doc)) == 2
|
||||
|
||||
|
||||
def test_matcher_regex(en_vocab):
|
||||
matcher = Matcher(en_vocab)
|
||||
pattern = [{"ORTH": {"REGEX": r"(?:a|an)"}}]
|
||||
|
@ -316,6 +416,9 @@ def test_attr_pipeline_checks(en_vocab):
|
|||
matcher(doc2)
|
||||
with pytest.raises(ValueError):
|
||||
matcher(doc3)
|
||||
# errors can be suppressed if desired
|
||||
matcher(doc2, allow_missing=True)
|
||||
matcher(doc3, allow_missing=True)
|
||||
# TAG, POS, LEMMA require those values
|
||||
for attr in ("TAG", "POS", "LEMMA"):
|
||||
matcher = Matcher(en_vocab)
|
||||
|
|
|
@ -2,8 +2,10 @@ from typing import Callable, Iterable
|
|||
import pytest
|
||||
|
||||
from spacy.kb import KnowledgeBase, get_candidates, Candidate
|
||||
from spacy.vocab import Vocab
|
||||
|
||||
from spacy import util, registry
|
||||
from spacy.scorer import Scorer
|
||||
from spacy.training import Example
|
||||
from spacy.lang.en import English
|
||||
from spacy.tests.util import make_tempdir
|
||||
|
@ -151,22 +153,15 @@ def test_kb_serialize(nlp):
|
|||
# normal read-write behaviour
|
||||
mykb.to_disk(d / "kb")
|
||||
mykb.from_disk(d / "kb")
|
||||
mykb.to_disk(d / "kb.file")
|
||||
mykb.from_disk(d / "kb.file")
|
||||
mykb.to_disk(d / "new" / "kb")
|
||||
mykb.from_disk(d / "new" / "kb")
|
||||
# allow overwriting an existing file
|
||||
mykb.to_disk(d / "kb.file")
|
||||
with pytest.raises(ValueError):
|
||||
# can not write to a directory
|
||||
mykb.to_disk(d)
|
||||
with pytest.raises(ValueError):
|
||||
# can not read from a directory
|
||||
mykb.from_disk(d)
|
||||
mykb.to_disk(d / "kb")
|
||||
with pytest.raises(ValueError):
|
||||
# can not read from an unknown file
|
||||
mykb.from_disk(d / "unknown" / "kb")
|
||||
|
||||
|
||||
def test_candidate_generation(nlp):
|
||||
"""Test correct candidate generation"""
|
||||
mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
|
||||
|
@ -254,6 +249,41 @@ def test_el_pipe_configuration(nlp):
|
|||
assert doc[2].ent_kb_id_ == "Q2"
|
||||
|
||||
|
||||
def test_vocab_serialization(nlp):
|
||||
"""Test that string information is retained across storage"""
|
||||
mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
|
||||
|
||||
# adding entities
|
||||
q1_hash = mykb.add_entity(entity="Q1", freq=27, entity_vector=[1])
|
||||
q2_hash = mykb.add_entity(entity="Q2", freq=12, entity_vector=[2])
|
||||
q3_hash = mykb.add_entity(entity="Q3", freq=5, entity_vector=[3])
|
||||
|
||||
# adding aliases
|
||||
douglas_hash = mykb.add_alias(
|
||||
alias="douglas", entities=["Q2", "Q3"], probabilities=[0.4, 0.1]
|
||||
)
|
||||
adam_hash = mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])
|
||||
|
||||
candidates = mykb.get_alias_candidates("adam")
|
||||
assert len(candidates) == 1
|
||||
assert candidates[0].entity == q2_hash
|
||||
assert candidates[0].entity_ == "Q2"
|
||||
assert candidates[0].alias == adam_hash
|
||||
assert candidates[0].alias_ == "adam"
|
||||
|
||||
with make_tempdir() as d:
|
||||
mykb.to_disk(d / "kb")
|
||||
kb_new_vocab = KnowledgeBase(Vocab(), entity_vector_length=1)
|
||||
kb_new_vocab.from_disk(d / "kb")
|
||||
|
||||
candidates = kb_new_vocab.get_alias_candidates("adam")
|
||||
assert len(candidates) == 1
|
||||
assert candidates[0].entity == q2_hash
|
||||
assert candidates[0].entity_ == "Q2"
|
||||
assert candidates[0].alias == adam_hash
|
||||
assert candidates[0].alias_ == "adam"
|
||||
|
||||
|
||||
def test_append_alias(nlp):
|
||||
"""Test that we can append additional alias-entity pairs"""
|
||||
mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
|
||||
|
@ -377,16 +407,20 @@ def test_preserving_links_ents_2(nlp):
|
|||
TRAIN_DATA = [
|
||||
("Russ Cochran captured his first major title with his son as caddie.",
|
||||
{"links": {(0, 12): {"Q7381115": 0.0, "Q2146908": 1.0}},
|
||||
"entities": [(0, 12, "PERSON")]}),
|
||||
"entities": [(0, 12, "PERSON")],
|
||||
"sent_starts": [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}),
|
||||
("Russ Cochran his reprints include EC Comics.",
|
||||
{"links": {(0, 12): {"Q7381115": 1.0, "Q2146908": 0.0}},
|
||||
"entities": [(0, 12, "PERSON")]}),
|
||||
"entities": [(0, 12, "PERSON")],
|
||||
"sent_starts": [1, -1, 0, 0, 0, 0, 0, 0]}),
|
||||
("Russ Cochran has been publishing comic art.",
|
||||
{"links": {(0, 12): {"Q7381115": 1.0, "Q2146908": 0.0}},
|
||||
"entities": [(0, 12, "PERSON")]}),
|
||||
"entities": [(0, 12, "PERSON")],
|
||||
"sent_starts": [1, -1, 0, 0, 0, 0, 0, 0]}),
|
||||
("Russ Cochran was a member of University of Kentucky's golf team.",
|
||||
{"links": {(0, 12): {"Q7381115": 0.0, "Q2146908": 1.0}},
|
||||
"entities": [(0, 12, "PERSON"), (43, 51, "LOC")]}),
|
||||
"entities": [(0, 12, "PERSON"), (43, 51, "LOC")],
|
||||
"sent_starts": [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]})
|
||||
]
|
||||
GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"]
|
||||
# fmt: on
|
||||
|
@ -395,16 +429,8 @@ GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"]
|
|||
def test_overfitting_IO():
|
||||
# Simple test to try and quickly overfit the NEL component - ensuring the ML models work correctly
|
||||
nlp = English()
|
||||
nlp.add_pipe("sentencizer")
|
||||
vector_length = 3
|
||||
|
||||
# Add a custom component to recognize "Russ Cochran" as an entity for the example training data
|
||||
patterns = [
|
||||
{"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}
|
||||
]
|
||||
ruler = nlp.add_pipe("entity_ruler")
|
||||
ruler.add_patterns(patterns)
|
||||
|
||||
# Convert the texts to docs to make sure we have doc.ents set for the training examples
|
||||
train_examples = []
|
||||
for text, annotation in TRAIN_DATA:
|
||||
|
@ -446,6 +472,16 @@ def test_overfitting_IO():
|
|||
nlp.update(train_examples, sgd=optimizer, losses=losses)
|
||||
assert losses["entity_linker"] < 0.001
|
||||
|
||||
# adding additional components that are required for the entity_linker
|
||||
nlp.add_pipe("sentencizer", first=True)
|
||||
|
||||
# Add a custom component to recognize "Russ Cochran" as an entity for the example training data
|
||||
patterns = [
|
||||
{"label": "PERSON", "pattern": [{"LOWER": "russ"}, {"LOWER": "cochran"}]}
|
||||
]
|
||||
ruler = nlp.add_pipe("entity_ruler", before="entity_linker")
|
||||
ruler.add_patterns(patterns)
|
||||
|
||||
# test the trained model
|
||||
predictions = []
|
||||
for text, annotation in TRAIN_DATA:
|
||||
|
@ -465,3 +501,46 @@ def test_overfitting_IO():
|
|||
for ent in doc2.ents:
|
||||
predictions.append(ent.kb_id_)
|
||||
assert predictions == GOLD_entities
|
||||
|
||||
|
||||
def test_scorer_links():
|
||||
train_examples = []
|
||||
nlp = English()
|
||||
ref1 = nlp("Julia lives in London happily.")
|
||||
ref1.ents = [
|
||||
Span(ref1, 0, 1, label="PERSON", kb_id="Q2"),
|
||||
Span(ref1, 3, 4, label="LOC", kb_id="Q3"),
|
||||
]
|
||||
pred1 = nlp("Julia lives in London happily.")
|
||||
pred1.ents = [
|
||||
Span(pred1, 0, 1, label="PERSON", kb_id="Q70"),
|
||||
Span(pred1, 3, 4, label="LOC", kb_id="Q3"),
|
||||
]
|
||||
train_examples.append(Example(pred1, ref1))
|
||||
|
||||
ref2 = nlp("She loves London.")
|
||||
ref2.ents = [
|
||||
Span(ref2, 0, 1, label="PERSON", kb_id="Q2"),
|
||||
Span(ref2, 2, 3, label="LOC", kb_id="Q13"),
|
||||
]
|
||||
pred2 = nlp("She loves London.")
|
||||
pred2.ents = [
|
||||
Span(pred2, 0, 1, label="PERSON", kb_id="Q2"),
|
||||
Span(pred2, 2, 3, label="LOC", kb_id="NIL"),
|
||||
]
|
||||
train_examples.append(Example(pred2, ref2))
|
||||
|
||||
ref3 = nlp("London is great.")
|
||||
ref3.ents = [Span(ref3, 0, 1, label="LOC", kb_id="NIL")]
|
||||
pred3 = nlp("London is great.")
|
||||
pred3.ents = [Span(pred3, 0, 1, label="LOC", kb_id="NIL")]
|
||||
train_examples.append(Example(pred3, ref3))
|
||||
|
||||
scores = Scorer().score_links(train_examples, negative_labels=["NIL"])
|
||||
assert scores["nel_f_per_type"]["PERSON"]["p"] == 1 / 2
|
||||
assert scores["nel_f_per_type"]["PERSON"]["r"] == 1 / 2
|
||||
assert scores["nel_f_per_type"]["LOC"]["p"] == 1 / 1
|
||||
assert scores["nel_f_per_type"]["LOC"]["r"] == 1 / 2
|
||||
|
||||
assert scores["nel_micro_p"] == 2 / 3
|
||||
assert scores["nel_micro_r"] == 2 / 4
|
||||
|
|
|
@ -345,12 +345,13 @@ def test_language_factories_invalid():
|
|||
[{"a": 100, "b": 400}, {"c": 0.5, "d": 0.5}],
|
||||
{"a": 0.1, "b": 0.4, "c": 0.25, "d": 0.25},
|
||||
),
|
||||
([{"a": 0.5, "b": 0.5}, {"b": 1.0}], {"a": 0.25, "b": 0.75},),
|
||||
([{"a": 0.5, "b": 0.5}, {"b": 1.0}], {"a": 0.25, "b": 0.75}),
|
||||
([{"a": 0.0, "b": 0.0}, {"c": 0.0}], {"a": 0.0, "b": 0.0, "c": 0.0}),
|
||||
],
|
||||
)
|
||||
def test_language_factories_combine_score_weights(weights, expected):
|
||||
result = combine_score_weights(weights)
|
||||
assert sum(result.values()) in (0.99, 1.0)
|
||||
assert sum(result.values()) in (0.99, 1.0, 0.0)
|
||||
assert result == expected
|
||||
|
||||
|
||||
|
|
|
@ -244,3 +244,22 @@ def test_Example_from_dict_with_links_invalid(annots):
|
|||
predicted = Doc(vocab, words=annots["words"])
|
||||
with pytest.raises(ValueError):
|
||||
Example.from_dict(predicted, annots)
|
||||
|
||||
|
||||
def test_Example_from_dict_sentences():
|
||||
vocab = Vocab()
|
||||
predicted = Doc(vocab, words=["One", "sentence", ".", "one", "more"])
|
||||
annots = {"sent_starts": [1, 0, 0, 1, 0]}
|
||||
ex = Example.from_dict(predicted, annots)
|
||||
assert len(list(ex.reference.sents)) == 2
|
||||
|
||||
# this currently throws an error - bug or feature?
|
||||
# predicted = Doc(vocab, words=["One", "sentence", "not", "one", "more"])
|
||||
# annots = {"sent_starts": [1, 0, 0, 0, 0]}
|
||||
# ex = Example.from_dict(predicted, annots)
|
||||
# assert len(list(ex.reference.sents)) == 1
|
||||
|
||||
predicted = Doc(vocab, words=["One", "sentence", "not", "one", "more"])
|
||||
annots = {"sent_starts": [1, -1, 0, 0, 0]}
|
||||
ex = Example.from_dict(predicted, annots)
|
||||
assert len(list(ex.reference.sents)) == 1
|
|
@ -1,4 +1,5 @@
|
|||
from ..tokens.doc cimport Doc
|
||||
from libc.stdint cimport uint64_t
|
||||
|
||||
|
||||
cdef class Example:
|
||||
|
@ -7,3 +8,5 @@ cdef class Example:
|
|||
cdef readonly object _cached_alignment
|
||||
cdef readonly object _cached_words_x
|
||||
cdef readonly object _cached_words_y
|
||||
cdef readonly uint64_t _x_sig
|
||||
cdef readonly uint64_t _y_sig
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from collections import Iterable as IterableInstance
|
||||
import warnings
|
||||
import numpy
|
||||
from murmurhash.mrmr cimport hash64
|
||||
|
||||
from ..tokens.doc cimport Doc
|
||||
from ..tokens.span cimport Span
|
||||
|
@ -97,15 +98,36 @@ cdef class Example:
|
|||
|
||||
@property
|
||||
def alignment(self):
|
||||
words_x = [token.text for token in self.x]
|
||||
words_y = [token.text for token in self.y]
|
||||
if self._cached_alignment is None or \
|
||||
words_x != self._cached_words_x or \
|
||||
words_y != self._cached_words_y:
|
||||
self._cached_alignment = Alignment.from_strings(words_x, words_y)
|
||||
x_sig = hash64(self.x.c, sizeof(self.x.c[0]) * self.x.length, 0)
|
||||
y_sig = hash64(self.y.c, sizeof(self.y.c[0]) * self.y.length, 0)
|
||||
if self._cached_alignment is None:
|
||||
words_x = [token.text for token in self.x]
|
||||
words_y = [token.text for token in self.y]
|
||||
self._x_sig = x_sig
|
||||
self._y_sig = y_sig
|
||||
self._cached_words_x = words_x
|
||||
self._cached_words_y = words_y
|
||||
return self._cached_alignment
|
||||
self._cached_alignment = Alignment.from_strings(words_x, words_y)
|
||||
return self._cached_alignment
|
||||
elif self._x_sig == x_sig and self._y_sig == y_sig:
|
||||
# If we have a cached alignment, check whether the cache is invalid
|
||||
# due to retokenization. To make this check fast in loops, we first
|
||||
# check a hash of the TokenC arrays.
|
||||
return self._cached_alignment
|
||||
else:
|
||||
words_x = [token.text for token in self.x]
|
||||
words_y = [token.text for token in self.y]
|
||||
if words_x == self._cached_words_x and words_y == self._cached_words_y:
|
||||
self._x_sig = x_sig
|
||||
self._y_sig = y_sig
|
||||
return self._cached_alignment
|
||||
else:
|
||||
self._cached_alignment = Alignment.from_strings(words_x, words_y)
|
||||
self._cached_words_x = words_x
|
||||
self._cached_words_y = words_y
|
||||
self._x_sig = x_sig
|
||||
self._y_sig = y_sig
|
||||
return self._cached_alignment
|
||||
|
||||
def get_aligned(self, field, as_string=False):
|
||||
"""Return an aligned array for a token attribute."""
|
||||
|
@ -288,7 +310,6 @@ def _annot2array(vocab, tok_annot, doc_annot):
|
|||
|
||||
|
||||
def _add_entities_to_doc(doc, ner_data):
|
||||
print(ner_data)
|
||||
if ner_data is None:
|
||||
return
|
||||
elif ner_data == []:
|
||||
|
|
|
@ -1233,8 +1233,13 @@ def combine_score_weights(
|
|||
# components.
|
||||
total = sum(w_dict.values())
|
||||
for key, value in w_dict.items():
|
||||
weight = round(value / total / len(all_weights), 2)
|
||||
result[key] = result.get(key, 0.0) + weight
|
||||
if total == 0:
|
||||
weight = 0.0
|
||||
else:
|
||||
weight = round(value / total / len(all_weights), 2)
|
||||
prev_weight = result.get(key, 0.0)
|
||||
prev_weight = 0.0 if prev_weight is None else prev_weight
|
||||
result[key] = prev_weight + weight
|
||||
return result
|
||||
|
||||
|
||||
|
|
|
@ -225,6 +225,21 @@ pipe's entity linking model and context encoder. Delegates to
|
|||
| `losses` | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
|
||||
| **RETURNS** | The updated `losses` dictionary. ~~Dict[str, float]~~ |
|
||||
|
||||
## EntityLinker.score {#score tag="method" new="3"}
|
||||
|
||||
Score a batch of examples.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> scores = entity_linker.score(examples)
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | ---------------------------------------------------------------------------------------------- |
|
||||
| `examples` | The examples to score. ~~Iterable[Example]~~ |
|
||||
| **RETURNS** | The scores, produced by [`Scorer.score_links`](/api/scorer#score_links) . ~~Dict[str, float]~~ |
|
||||
|
||||
## EntityLinker.create_optimizer {#create_optimizer tag="method"}
|
||||
|
||||
Create an optimizer for the pipeline component.
|
||||
|
|
|
@ -242,10 +242,10 @@ Score a batch of examples.
|
|||
> scores = ner.score(examples)
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | ---------------------------------------------------------------------------------------------------------------------- |
|
||||
| `examples` | The examples to score. ~~Iterable[Example]~~ |
|
||||
| **RETURNS** | The scores, produced by [`Scorer.score_spans`](/api/scorer#score_spans). ~~Dict[str, Union[float, Dict[str, float]]]~~ |
|
||||
| Name | Description |
|
||||
| ----------- | --------------------------------------------------------- |
|
||||
| `examples` | The examples to score. ~~Iterable[Example]~~ |
|
||||
| **RETURNS** | The scores. ~~Dict[str, Union[float, Dict[str, float]]]~~ |
|
||||
|
||||
## EntityRecognizer.create_optimizer {#create_optimizer tag="method"}
|
||||
|
||||
|
|
|
@ -30,20 +30,20 @@ pattern keys correspond to a number of
|
|||
[`Token` attributes](/api/token#attributes). The supported attributes for
|
||||
rule-based matching are:
|
||||
|
||||
| Attribute | Description |
|
||||
| -------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `ORTH` | The exact verbatim text of a token. ~~str~~ |
|
||||
| `TEXT` <Tag variant="new">2.1</Tag> | The exact verbatim text of a token. ~~str~~ |
|
||||
| `LOWER` | The lowercase form of the token text. ~~str~~ |
|
||||
| `LENGTH` | The length of the token text. ~~int~~ |
|
||||
| `IS_ALPHA`, `IS_ASCII`, `IS_DIGIT` | Token text consists of alphabetic characters, ASCII characters, digits. ~~bool~~ |
|
||||
| `IS_LOWER`, `IS_UPPER`, `IS_TITLE` | Token text is in lowercase, uppercase, titlecase. ~~bool~~ |
|
||||
| `IS_PUNCT`, `IS_SPACE`, `IS_STOP` | Token is punctuation, whitespace, stop word. ~~bool~~ |
|
||||
| `LIKE_NUM`, `LIKE_URL`, `LIKE_EMAIL` | Token text resembles a number, URL, email. ~~bool~~ |
|
||||
| `POS`, `TAG`, `DEP`, `LEMMA`, `SHAPE` | The token's simple and extended part-of-speech tag, dependency label, lemma, shape. ~~str~~ |
|
||||
| `ENT_TYPE` | The token's entity label. ~~str~~ |
|
||||
| `_` <Tag variant="new">2.1</Tag> | Properties in [custom extension attributes](/usage/processing-pipelines#custom-components-attributes). ~~Dict[str, Any]~~ |
|
||||
| `OP` | Operator or quantifier to determine how often to match a token pattern. ~~str~~ |
|
||||
| Attribute | Description |
|
||||
| ----------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `ORTH` | The exact verbatim text of a token. ~~str~~ |
|
||||
| `TEXT` <Tag variant="new">2.1</Tag> | The exact verbatim text of a token. ~~str~~ |
|
||||
| `LOWER` | The lowercase form of the token text. ~~str~~ |
|
||||
| `LENGTH` | The length of the token text. ~~int~~ |
|
||||
| `IS_ALPHA`, `IS_ASCII`, `IS_DIGIT` | Token text consists of alphabetic characters, ASCII characters, digits. ~~bool~~ |
|
||||
| `IS_LOWER`, `IS_UPPER`, `IS_TITLE` | Token text is in lowercase, uppercase, titlecase. ~~bool~~ |
|
||||
| `IS_PUNCT`, `IS_SPACE`, `IS_STOP` | Token is punctuation, whitespace, stop word. ~~bool~~ |
|
||||
| `LIKE_NUM`, `LIKE_URL`, `LIKE_EMAIL` | Token text resembles a number, URL, email. ~~bool~~ |
|
||||
| `POS`, `TAG`, `MORPH`, `DEP`, `LEMMA`, `SHAPE` | The token's simple and extended part-of-speech tag, morphological analysis, dependency label, lemma, shape. ~~str~~ |
|
||||
| `ENT_TYPE` | The token's entity label. ~~str~~ |
|
||||
| `_` <Tag variant="new">2.1</Tag> | Properties in [custom extension attributes](/usage/processing-pipelines#custom-components-attributes). ~~Dict[str, Any]~~ |
|
||||
| `OP` | Operator or quantifier to determine how often to match a token pattern. ~~str~~ |
|
||||
|
||||
Operators and quantifiers define **how often** a token pattern should be
|
||||
matched:
|
||||
|
@ -79,6 +79,8 @@ it compares to another value.
|
|||
| -------------------------- | ------------------------------------------------------------------------------------------------------- |
|
||||
| `IN` | Attribute value is member of a list. ~~Any~~ |
|
||||
| `NOT_IN` | Attribute value is _not_ member of a list. ~~Any~~ |
|
||||
| `ISSUBSET` | Attribute values (for `MORPH`) are a subset of a list. ~~Any~~ |
|
||||
| `ISSUPERSET` | Attribute values (for `MORPH`) are a superset of a list. ~~Any~~ |
|
||||
| `==`, `>=`, `<=`, `>`, `<` | Attribute value is equal, greater or equal, smaller or equal, greater or smaller. ~~Union[int, float]~~ |
|
||||
|
||||
## Matcher.\_\_init\_\_ {#init tag="method"}
|
||||
|
|
|
@ -206,3 +206,26 @@ depends on the scorer settings:
|
|||
| `multi_label` | Whether the attribute allows multiple labels. Defaults to `True`. ~~bool~~ |
|
||||
| `positive_label` | The positive label for a binary task with exclusive classes. Defaults to `None`. ~~Optional[str]~~ |
|
||||
| **RETURNS** | A dictionary containing the scores, with inapplicable scores as `None`. ~~Dict[str, Optional[float]]~~ |
|
||||
|
||||
## Scorer.score_links {#score_links tag="staticmethod" new="3"}
|
||||
|
||||
Returns PRF for predicted links on the entity level. To disentangle the
|
||||
performance of the NEL from the NER, this method only evaluates NEL links for
|
||||
entities that overlap between the gold reference and the predictions.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> scores = Scorer.score_links(
|
||||
> examples,
|
||||
> negative_labels=["NIL", ""]
|
||||
> )
|
||||
> print(scores["nel_micro_f"])
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------------- | ------------------------------------------------------------------------------------------------------------------- |
|
||||
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
|
||||
| _keyword-only_ | |
|
||||
| `negative_labels` | The string values that refer to no annotation (e.g. "NIL"). ~~Iterable[str]~~ |
|
||||
| **RETURNS** | A dictionary containing the scores. ~~Dict[str, Optional[float]]~~ |
|
||||
|
|
|
@ -158,20 +158,20 @@ The available token pattern keys correspond to a number of
|
|||
[`Token` attributes](/api/token#attributes). The supported attributes for
|
||||
rule-based matching are:
|
||||
|
||||
| Attribute | Description |
|
||||
| -------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `ORTH` | The exact verbatim text of a token. ~~str~~ |
|
||||
| `TEXT` <Tag variant="new">2.1</Tag> | The exact verbatim text of a token. ~~str~~ |
|
||||
| `LOWER` | The lowercase form of the token text. ~~str~~ |
|
||||
| `LENGTH` | The length of the token text. ~~int~~ |
|
||||
| `IS_ALPHA`, `IS_ASCII`, `IS_DIGIT` | Token text consists of alphabetic characters, ASCII characters, digits. ~~bool~~ |
|
||||
| `IS_LOWER`, `IS_UPPER`, `IS_TITLE` | Token text is in lowercase, uppercase, titlecase. ~~bool~~ |
|
||||
| `IS_PUNCT`, `IS_SPACE`, `IS_STOP` | Token is punctuation, whitespace, stop word. ~~bool~~ |
|
||||
| `LIKE_NUM`, `LIKE_URL`, `LIKE_EMAIL` | Token text resembles a number, URL, email. ~~bool~~ |
|
||||
| `POS`, `TAG`, `DEP`, `LEMMA`, `SHAPE` | The token's simple and extended part-of-speech tag, dependency label, lemma, shape. ~~str~~ |
|
||||
| `ENT_TYPE` | The token's entity label. ~~str~~ |
|
||||
| `_` <Tag variant="new">2.1</Tag> | Properties in [custom extension attributes](/usage/processing-pipelines#custom-components-attributes). ~~Dict[str, Any]~~ |
|
||||
| `OP` | [Operator or quantifier](#quantifiers) to determine how often to match a token pattern. ~~str~~ |
|
||||
| Attribute | Description |
|
||||
| ----------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `ORTH` | The exact verbatim text of a token. ~~str~~ |
|
||||
| `TEXT` <Tag variant="new">2.1</Tag> | The exact verbatim text of a token. ~~str~~ |
|
||||
| `LOWER` | The lowercase form of the token text. ~~str~~ |
|
||||
| `LENGTH` | The length of the token text. ~~int~~ |
|
||||
| `IS_ALPHA`, `IS_ASCII`, `IS_DIGIT` | Token text consists of alphabetic characters, ASCII characters, digits. ~~bool~~ |
|
||||
| `IS_LOWER`, `IS_UPPER`, `IS_TITLE` | Token text is in lowercase, uppercase, titlecase. ~~bool~~ |
|
||||
| `IS_PUNCT`, `IS_SPACE`, `IS_STOP` | Token is punctuation, whitespace, stop word. ~~bool~~ |
|
||||
| `LIKE_NUM`, `LIKE_URL`, `LIKE_EMAIL` | Token text resembles a number, URL, email. ~~bool~~ |
|
||||
| `POS`, `TAG`, `MORPH`, `DEP`, `LEMMA`, `SHAPE` | The token's simple and extended part-of-speech tag, morphological analysis, dependency label, lemma, shape. ~~str~~ |
|
||||
| `ENT_TYPE` | The token's entity label. ~~str~~ |
|
||||
| `_` <Tag variant="new">2.1</Tag> | Properties in [custom extension attributes](/usage/processing-pipelines#custom-components-attributes). ~~Dict[str, Any]~~ |
|
||||
| `OP` | [Operator or quantifier](#quantifiers) to determine how often to match a token pattern. ~~str~~ |
|
||||
|
||||
<Accordion title="Does it matter if the attribute names are uppercase or lowercase?">
|
||||
|
||||
|
@ -236,6 +236,8 @@ following rich comparison attributes are available:
|
|||
| -------------------------- | ------------------------------------------------------------------------------------------------------- |
|
||||
| `IN` | Attribute value is member of a list. ~~Any~~ |
|
||||
| `NOT_IN` | Attribute value is _not_ member of a list. ~~Any~~ |
|
||||
| `ISSUBSET` | Attribute values (for `MORPH`) are a subset of a list. ~~Any~~ |
|
||||
| `ISSUPERSET` | Attribute values (for `MORPH`) are a superset of a list. ~~Any~~ |
|
||||
| `==`, `>=`, `<=`, `>`, `<` | Attribute value is equal, greater or equal, smaller or equal, greater or smaller. ~~Union[int, float]~~ |
|
||||
|
||||
#### Regular expressions {#regex new="2.1"}
|
||||
|
|
|
@ -11,12 +11,24 @@ import { Table, Tr, Td, Th } from '../components/table'
|
|||
import Tag from '../components/tag'
|
||||
import { H2, Label } from '../components/typography'
|
||||
import Icon from '../components/icon'
|
||||
import Link from '../components/link'
|
||||
import Link, { OptionalLink } from '../components/link'
|
||||
import Infobox from '../components/infobox'
|
||||
import Accordion from '../components/accordion'
|
||||
import { join, arrayToObj, abbrNum, markdownToReact } from '../components/util'
|
||||
import { isString, isEmptyObj } from '../components/util'
|
||||
|
||||
const COMPONENT_LINKS = {
|
||||
tok2vec: '/api/tok2vec',
|
||||
transformer: '/api/transformer',
|
||||
tagger: '/api/tagger',
|
||||
parser: '/api/dependencyparser',
|
||||
ner: '/api/entityrecognizer',
|
||||
lemmatizer: '/api/lemmatizer',
|
||||
attribute_ruler: '/api/attributeruler',
|
||||
senter: '/api/sentencerecognizer',
|
||||
morphologizer: '/api/morphologizer',
|
||||
}
|
||||
|
||||
const MODEL_META = {
|
||||
core: 'Vocabulary, syntax, entities, vectors',
|
||||
core_sm: 'Vocabulary, syntax, entities',
|
||||
|
@ -78,10 +90,15 @@ function isStableVersion(v) {
|
|||
return !v.includes('a') && !v.includes('b') && !v.includes('dev') && !v.includes('rc')
|
||||
}
|
||||
|
||||
function getLatestVersion(modelId, compatibility) {
|
||||
function getLatestVersion(modelId, compatibility, prereleases) {
|
||||
for (let [version, models] of Object.entries(compatibility)) {
|
||||
if (isStableVersion(version) && models[modelId]) {
|
||||
return models[modelId][0]
|
||||
const modelVersions = models[modelId]
|
||||
for (let modelVersion of modelVersions) {
|
||||
if (isStableVersion(modelVersion) || prereleases) {
|
||||
return modelVersion
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -141,18 +158,44 @@ function formatSources(data = []) {
|
|||
))
|
||||
}
|
||||
|
||||
function linkComponents(components = []) {
|
||||
return join(
|
||||
components.map(c => (
|
||||
<Fragment key={c}>
|
||||
<OptionalLink to={COMPONENT_LINKS[c]} hideIcon>
|
||||
<InlineCode>{c}</InlineCode>
|
||||
</OptionalLink>
|
||||
</Fragment>
|
||||
))
|
||||
)
|
||||
}
|
||||
|
||||
const Help = ({ children }) => (
|
||||
<span data-tooltip={children}>
|
||||
<Icon name="help2" width={16} variant="subtle" inline />
|
||||
</span>
|
||||
)
|
||||
|
||||
const Model = ({ name, langId, langName, baseUrl, repo, compatibility, hasExamples, licenses }) => {
|
||||
const Model = ({
|
||||
name,
|
||||
langId,
|
||||
langName,
|
||||
baseUrl,
|
||||
repo,
|
||||
compatibility,
|
||||
hasExamples,
|
||||
licenses,
|
||||
prereleases,
|
||||
}) => {
|
||||
const [initialized, setInitialized] = useState(false)
|
||||
const [isError, setIsError] = useState(true)
|
||||
const [meta, setMeta] = useState({})
|
||||
const { type, genre, size } = getModelComponents(name)
|
||||
const version = useMemo(() => getLatestVersion(name, compatibility), [name, compatibility])
|
||||
const version = useMemo(() => getLatestVersion(name, compatibility, prereleases), [
|
||||
name,
|
||||
compatibility,
|
||||
prereleases,
|
||||
])
|
||||
|
||||
useEffect(() => {
|
||||
window.dispatchEvent(new Event('resize')) // scroll position for progress
|
||||
|
@ -173,10 +216,8 @@ const Model = ({ name, langId, langName, baseUrl, repo, compatibility, hasExampl
|
|||
|
||||
const releaseTag = meta.fullName ? `/tag/${meta.fullName}` : ''
|
||||
const releaseUrl = `https://github.com/${repo}/releases/${releaseTag}`
|
||||
const pipeline =
|
||||
meta.pipeline && join(meta.pipeline.map(p => <InlineCode key={p}>{p}</InlineCode>))
|
||||
const components =
|
||||
meta.components && join(meta.components.map(p => <InlineCode key={p}>{p}</InlineCode>))
|
||||
const pipeline = linkComponents(meta.pipeline)
|
||||
const components = linkComponents(meta.components)
|
||||
const sources = formatSources(meta.sources)
|
||||
const author = !meta.url ? meta.author : <Link to={meta.url}>{meta.author}</Link>
|
||||
const licenseUrl = licenses[meta.license] ? licenses[meta.license].url : null
|
||||
|
@ -332,7 +373,7 @@ const Model = ({ name, langId, langName, baseUrl, repo, compatibility, hasExampl
|
|||
const Models = ({ pageContext, repo, children }) => {
|
||||
const [initialized, setInitialized] = useState(false)
|
||||
const [compatibility, setCompatibility] = useState({})
|
||||
const { id, title, meta, hasExamples } = pageContext
|
||||
const { id, title, meta } = pageContext
|
||||
const { models, isStarters } = meta
|
||||
const baseUrl = `https://raw.githubusercontent.com/${repo}/master`
|
||||
|
||||
|
@ -381,6 +422,7 @@ const Models = ({ pageContext, repo, children }) => {
|
|||
repo={repo}
|
||||
licenses={arrayToObj(site.siteMetadata.licenses, 'id')}
|
||||
hasExamples={meta.hasExamples}
|
||||
prereleases={site.siteMetadata.nightly}
|
||||
/>
|
||||
))
|
||||
}
|
||||
|
@ -397,6 +439,7 @@ const query = graphql`
|
|||
query ModelsQuery {
|
||||
site {
|
||||
siteMetadata {
|
||||
nightly
|
||||
licenses {
|
||||
id
|
||||
url
|
||||
|
|
Loading…
Reference in New Issue
Block a user