mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-29 11:26:28 +03:00
c0f4a1e43b
* verbose and tag_map options * adding init_tok2vec option and only changing the tok2vec that is specified * adding omit_extra_lookups and verifying textcat config * wip * pretrain bugfix * add replace and resume options * train_textcat fix * raw text functionality * improve UX when KeyError or when input data can't be parsed * avoid unnecessary access to goldparse in TextCat pipe * save performance information in nlp.meta * add noise_level to config * move nn_parser's defaults to config file * multitask in config - doesn't work yet * scorer offering both F and AUC options, need to be specified in config * add textcat verification code from old train script * small fixes to config files * clean up * set default config for ner/parser to allow create_pipe to work as before * two more test fixes * small fixes * cleanup * fix NER pickling + additional unit test * create_pipe as before
39 lines
1.3 KiB
Python
39 lines
1.3 KiB
Python
from collections import defaultdict
|
|
|
|
from spacy.pipeline.defaults import default_ner
|
|
from spacy.pipeline import EntityRecognizer
|
|
|
|
from spacy.lang.en import English
|
|
from spacy.tokens import Span
|
|
|
|
|
|
def test_issue4313():
|
|
""" This should not crash or exit with some strange error code """
|
|
beam_width = 16
|
|
beam_density = 0.0001
|
|
nlp = English()
|
|
config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
|
|
ner = EntityRecognizer(nlp.vocab, default_ner(), **config)
|
|
ner.add_label("SOME_LABEL")
|
|
ner.begin_training([])
|
|
nlp.add_pipe(ner)
|
|
|
|
# add a new label to the doc
|
|
doc = nlp("What do you think about Apple ?")
|
|
assert len(ner.labels) == 1
|
|
assert "SOME_LABEL" in ner.labels
|
|
apple_ent = Span(doc, 5, 6, label="MY_ORG")
|
|
doc.ents = list(doc.ents) + [apple_ent]
|
|
|
|
# ensure the beam_parse still works with the new label
|
|
docs = [doc]
|
|
beams = nlp.entity.beam_parse(
|
|
docs, beam_width=beam_width, beam_density=beam_density
|
|
)
|
|
|
|
for doc, beam in zip(docs, beams):
|
|
entity_scores = defaultdict(float)
|
|
for score, ents in nlp.entity.moves.get_beam_parses(beam):
|
|
for start, end, label in ents:
|
|
entity_scores[(start, end, label)] += score
|