mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-13 13:17:06 +03:00
Merge branch 'master' into spacy.io
This commit is contained in:
commit
d7a94edba6
|
@ -4,7 +4,7 @@ preshed>=3.0.2,<3.1.0
|
||||||
thinc>=7.3.0,<7.4.0
|
thinc>=7.3.0,<7.4.0
|
||||||
blis>=0.4.0,<0.5.0
|
blis>=0.4.0,<0.5.0
|
||||||
murmurhash>=0.28.0,<1.1.0
|
murmurhash>=0.28.0,<1.1.0
|
||||||
wasabi>=0.3.0,<1.1.0
|
wasabi>=0.4.0,<1.1.0
|
||||||
srsly>=0.1.0,<1.1.0
|
srsly>=0.1.0,<1.1.0
|
||||||
# Third party dependencies
|
# Third party dependencies
|
||||||
numpy>=1.15.0
|
numpy>=1.15.0
|
||||||
|
|
10
setup.cfg
10
setup.cfg
|
@ -40,17 +40,19 @@ setup_requires =
|
||||||
murmurhash>=0.28.0,<1.1.0
|
murmurhash>=0.28.0,<1.1.0
|
||||||
thinc>=7.3.0,<7.4.0
|
thinc>=7.3.0,<7.4.0
|
||||||
install_requires =
|
install_requires =
|
||||||
setuptools
|
# Our libraries
|
||||||
numpy>=1.15.0
|
|
||||||
murmurhash>=0.28.0,<1.1.0
|
murmurhash>=0.28.0,<1.1.0
|
||||||
cymem>=2.0.2,<2.1.0
|
cymem>=2.0.2,<2.1.0
|
||||||
preshed>=3.0.2,<3.1.0
|
preshed>=3.0.2,<3.1.0
|
||||||
thinc>=7.3.0,<7.4.0
|
thinc>=7.3.0,<7.4.0
|
||||||
blis>=0.4.0,<0.5.0
|
blis>=0.4.0,<0.5.0
|
||||||
|
wasabi>=0.4.0,<1.1.0
|
||||||
|
srsly>=0.1.0,<1.1.0
|
||||||
|
# Third-party dependencies
|
||||||
|
setuptools
|
||||||
|
numpy>=1.15.0
|
||||||
plac>=0.9.6,<1.2.0
|
plac>=0.9.6,<1.2.0
|
||||||
requests>=2.13.0,<3.0.0
|
requests>=2.13.0,<3.0.0
|
||||||
wasabi>=0.3.0,<1.1.0
|
|
||||||
srsly>=0.1.0,<1.1.0
|
|
||||||
pathlib==1.0.1; python_version < "3.4"
|
pathlib==1.0.1; python_version < "3.4"
|
||||||
importlib_metadata>=0.20; python_version < "3.8"
|
importlib_metadata>=0.20; python_version < "3.8"
|
||||||
|
|
||||||
|
|
|
@ -7,12 +7,10 @@ from __future__ import print_function
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import plac
|
import plac
|
||||||
import sys
|
import sys
|
||||||
from wasabi import Printer
|
from wasabi import msg
|
||||||
from spacy.cli import download, link, info, package, train, pretrain, convert
|
from spacy.cli import download, link, info, package, train, pretrain, convert
|
||||||
from spacy.cli import init_model, profile, evaluate, validate, debug_data
|
from spacy.cli import init_model, profile, evaluate, validate, debug_data
|
||||||
|
|
||||||
msg = Printer()
|
|
||||||
|
|
||||||
commands = {
|
commands = {
|
||||||
"download": download,
|
"download": download,
|
||||||
"link": link,
|
"link": link,
|
||||||
|
|
|
@ -121,6 +121,8 @@ def debug_data(
|
||||||
msg.text("{} training docs".format(len(train_docs)))
|
msg.text("{} training docs".format(len(train_docs)))
|
||||||
msg.text("{} evaluation docs".format(len(dev_docs)))
|
msg.text("{} evaluation docs".format(len(dev_docs)))
|
||||||
|
|
||||||
|
if not len(dev_docs):
|
||||||
|
msg.fail("No evaluation docs")
|
||||||
overlap = len(train_texts.intersection(dev_texts))
|
overlap = len(train_texts.intersection(dev_texts))
|
||||||
if overlap:
|
if overlap:
|
||||||
msg.warn("{} training examples also in evaluation data".format(overlap))
|
msg.warn("{} training examples also in evaluation data".format(overlap))
|
||||||
|
|
|
@ -6,16 +6,13 @@ import requests
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
from wasabi import Printer
|
from wasabi import msg
|
||||||
|
|
||||||
from .link import link
|
from .link import link
|
||||||
from ..util import get_package_path
|
from ..util import get_package_path
|
||||||
from .. import about
|
from .. import about
|
||||||
|
|
||||||
|
|
||||||
msg = Printer()
|
|
||||||
|
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
model=("Model to download (shortcut or name)", "positional", None, str),
|
model=("Model to download (shortcut or name)", "positional", None, str),
|
||||||
direct=("Force direct download of name + version", "flag", "d", bool),
|
direct=("Force direct download of name + version", "flag", "d", bool),
|
||||||
|
|
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals, division, print_function
|
||||||
|
|
||||||
import plac
|
import plac
|
||||||
from timeit import default_timer as timer
|
from timeit import default_timer as timer
|
||||||
from wasabi import Printer
|
from wasabi import msg
|
||||||
|
|
||||||
from ..gold import GoldCorpus
|
from ..gold import GoldCorpus
|
||||||
from .. import util
|
from .. import util
|
||||||
|
@ -32,7 +32,6 @@ def evaluate(
|
||||||
Evaluate a model. To render a sample of parses in a HTML file, set an
|
Evaluate a model. To render a sample of parses in a HTML file, set an
|
||||||
output directory as the displacy_path argument.
|
output directory as the displacy_path argument.
|
||||||
"""
|
"""
|
||||||
msg = Printer()
|
|
||||||
util.fix_random_seed()
|
util.fix_random_seed()
|
||||||
if gpu_id >= 0:
|
if gpu_id >= 0:
|
||||||
util.use_gpu(gpu_id)
|
util.use_gpu(gpu_id)
|
||||||
|
|
|
@ -4,7 +4,7 @@ from __future__ import unicode_literals
|
||||||
import plac
|
import plac
|
||||||
import platform
|
import platform
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from wasabi import Printer
|
from wasabi import msg
|
||||||
import srsly
|
import srsly
|
||||||
|
|
||||||
from ..compat import path2str, basestring_, unicode_
|
from ..compat import path2str, basestring_, unicode_
|
||||||
|
@ -23,7 +23,6 @@ def info(model=None, markdown=False, silent=False):
|
||||||
speficied as an argument, print model information. Flag --markdown
|
speficied as an argument, print model information. Flag --markdown
|
||||||
prints details in Markdown for easy copy-pasting to GitHub issues.
|
prints details in Markdown for easy copy-pasting to GitHub issues.
|
||||||
"""
|
"""
|
||||||
msg = Printer()
|
|
||||||
if model:
|
if model:
|
||||||
if util.is_package(model):
|
if util.is_package(model):
|
||||||
model_path = util.get_package_path(model)
|
model_path = util.get_package_path(model)
|
||||||
|
|
|
@ -11,7 +11,7 @@ import tarfile
|
||||||
import gzip
|
import gzip
|
||||||
import zipfile
|
import zipfile
|
||||||
import srsly
|
import srsly
|
||||||
from wasabi import Printer
|
from wasabi import msg
|
||||||
|
|
||||||
from ..vectors import Vectors
|
from ..vectors import Vectors
|
||||||
from ..errors import Errors, Warnings, user_warning
|
from ..errors import Errors, Warnings, user_warning
|
||||||
|
@ -24,7 +24,6 @@ except ImportError:
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_OOV_PROB = -20
|
DEFAULT_OOV_PROB = -20
|
||||||
msg = Printer()
|
|
||||||
|
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
|
|
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import plac
|
import plac
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from wasabi import Printer
|
from wasabi import msg
|
||||||
|
|
||||||
from ..compat import symlink_to, path2str
|
from ..compat import symlink_to, path2str
|
||||||
from .. import util
|
from .. import util
|
||||||
|
@ -20,7 +20,6 @@ def link(origin, link_name, force=False, model_path=None):
|
||||||
either the name of a pip package, or the local path to the model data
|
either the name of a pip package, or the local path to the model data
|
||||||
directory. Linking models allows loading them via spacy.load(link_name).
|
directory. Linking models allows loading them via spacy.load(link_name).
|
||||||
"""
|
"""
|
||||||
msg = Printer()
|
|
||||||
if util.is_package(origin):
|
if util.is_package(origin):
|
||||||
model_path = util.get_package_path(origin)
|
model_path = util.get_package_path(origin)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -4,7 +4,7 @@ from __future__ import unicode_literals
|
||||||
import plac
|
import plac
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from wasabi import Printer, get_raw_input
|
from wasabi import msg, get_raw_input
|
||||||
import srsly
|
import srsly
|
||||||
|
|
||||||
from ..compat import path2str
|
from ..compat import path2str
|
||||||
|
@ -27,7 +27,6 @@ def package(input_dir, output_dir, meta_path=None, create_meta=False, force=Fals
|
||||||
set and a meta.json already exists in the output directory, the existing
|
set and a meta.json already exists in the output directory, the existing
|
||||||
values will be used as the defaults in the command-line prompt.
|
values will be used as the defaults in the command-line prompt.
|
||||||
"""
|
"""
|
||||||
msg = Printer()
|
|
||||||
input_path = util.ensure_path(input_dir)
|
input_path = util.ensure_path(input_dir)
|
||||||
output_path = util.ensure_path(output_dir)
|
output_path = util.ensure_path(output_dir)
|
||||||
meta_path = util.ensure_path(meta_path)
|
meta_path = util.ensure_path(meta_path)
|
||||||
|
|
|
@ -11,7 +11,7 @@ from pathlib import Path
|
||||||
from thinc.v2v import Affine, Maxout
|
from thinc.v2v import Affine, Maxout
|
||||||
from thinc.misc import LayerNorm as LN
|
from thinc.misc import LayerNorm as LN
|
||||||
from thinc.neural.util import prefer_gpu
|
from thinc.neural.util import prefer_gpu
|
||||||
from wasabi import Printer
|
from wasabi import msg
|
||||||
import srsly
|
import srsly
|
||||||
|
|
||||||
from ..errors import Errors
|
from ..errors import Errors
|
||||||
|
@ -122,7 +122,6 @@ def pretrain(
|
||||||
for key in config:
|
for key in config:
|
||||||
if isinstance(config[key], Path):
|
if isinstance(config[key], Path):
|
||||||
config[key] = str(config[key])
|
config[key] = str(config[key])
|
||||||
msg = Printer()
|
|
||||||
util.fix_random_seed(seed)
|
util.fix_random_seed(seed)
|
||||||
|
|
||||||
has_gpu = prefer_gpu()
|
has_gpu = prefer_gpu()
|
||||||
|
|
|
@ -9,7 +9,7 @@ import pstats
|
||||||
import sys
|
import sys
|
||||||
import itertools
|
import itertools
|
||||||
import thinc.extra.datasets
|
import thinc.extra.datasets
|
||||||
from wasabi import Printer
|
from wasabi import msg
|
||||||
|
|
||||||
from ..util import load_model
|
from ..util import load_model
|
||||||
|
|
||||||
|
@ -26,7 +26,6 @@ def profile(model, inputs=None, n_texts=10000):
|
||||||
It can either be provided as a JSONL file, or be read from sys.sytdin.
|
It can either be provided as a JSONL file, or be read from sys.sytdin.
|
||||||
If no input file is specified, the IMDB dataset is loaded via Thinc.
|
If no input file is specified, the IMDB dataset is loaded via Thinc.
|
||||||
"""
|
"""
|
||||||
msg = Printer()
|
|
||||||
if inputs is not None:
|
if inputs is not None:
|
||||||
inputs = _read_inputs(inputs, msg)
|
inputs = _read_inputs(inputs, msg)
|
||||||
if inputs is None:
|
if inputs is None:
|
||||||
|
|
|
@ -8,7 +8,7 @@ from thinc.neural._classes.model import Model
|
||||||
from timeit import default_timer as timer
|
from timeit import default_timer as timer
|
||||||
import shutil
|
import shutil
|
||||||
import srsly
|
import srsly
|
||||||
from wasabi import Printer
|
from wasabi import msg
|
||||||
import contextlib
|
import contextlib
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
@ -89,7 +89,6 @@ def train(
|
||||||
# temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200
|
# temp fix to avoid import issues cf https://github.com/explosion/spaCy/issues/4200
|
||||||
import tqdm
|
import tqdm
|
||||||
|
|
||||||
msg = Printer()
|
|
||||||
util.fix_random_seed()
|
util.fix_random_seed()
|
||||||
util.set_env_log(verbose)
|
util.set_env_log(verbose)
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@ from pathlib import Path
|
||||||
import sys
|
import sys
|
||||||
import requests
|
import requests
|
||||||
import srsly
|
import srsly
|
||||||
from wasabi import Printer
|
from wasabi import msg
|
||||||
|
|
||||||
from ..compat import path2str
|
from ..compat import path2str
|
||||||
from ..util import get_data_path
|
from ..util import get_data_path
|
||||||
|
@ -17,7 +17,6 @@ def validate():
|
||||||
Validate that the currently installed version of spaCy is compatible
|
Validate that the currently installed version of spaCy is compatible
|
||||||
with the installed models. Should be run after `pip install -U spacy`.
|
with the installed models. Should be run after `pip install -U spacy`.
|
||||||
"""
|
"""
|
||||||
msg = Printer()
|
|
||||||
with msg.loading("Loading compatibility table..."):
|
with msg.loading("Loading compatibility table..."):
|
||||||
r = requests.get(about.__compatibility__)
|
r = requests.get(about.__compatibility__)
|
||||||
if r.status_code != 200:
|
if r.status_code != 200:
|
||||||
|
|
|
@ -82,6 +82,7 @@ class Scorer(object):
|
||||||
self.sbd = PRFScore()
|
self.sbd = PRFScore()
|
||||||
self.unlabelled = PRFScore()
|
self.unlabelled = PRFScore()
|
||||||
self.labelled = PRFScore()
|
self.labelled = PRFScore()
|
||||||
|
self.labelled_per_dep = dict()
|
||||||
self.tags = PRFScore()
|
self.tags = PRFScore()
|
||||||
self.ner = PRFScore()
|
self.ner = PRFScore()
|
||||||
self.ner_per_ents = dict()
|
self.ner_per_ents = dict()
|
||||||
|
@ -124,9 +125,18 @@ class Scorer(object):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def las(self):
|
def las(self):
|
||||||
"""RETURNS (float): Labelled depdendency score."""
|
"""RETURNS (float): Labelled dependency score."""
|
||||||
return self.labelled.fscore * 100
|
return self.labelled.fscore * 100
|
||||||
|
|
||||||
|
@property
|
||||||
|
def las_per_type(self):
|
||||||
|
"""RETURNS (dict): Scores per dependency label.
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
k: {"p": v.precision * 100, "r": v.recall * 100, "f": v.fscore * 100}
|
||||||
|
for k, v in self.labelled_per_dep.items()
|
||||||
|
}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def ents_p(self):
|
def ents_p(self):
|
||||||
"""RETURNS (float): Named entity accuracy (precision)."""
|
"""RETURNS (float): Named entity accuracy (precision)."""
|
||||||
|
@ -196,6 +206,7 @@ class Scorer(object):
|
||||||
return {
|
return {
|
||||||
"uas": self.uas,
|
"uas": self.uas,
|
||||||
"las": self.las,
|
"las": self.las,
|
||||||
|
"las_per_type": self.las_per_type,
|
||||||
"ents_p": self.ents_p,
|
"ents_p": self.ents_p,
|
||||||
"ents_r": self.ents_r,
|
"ents_r": self.ents_r,
|
||||||
"ents_f": self.ents_f,
|
"ents_f": self.ents_f,
|
||||||
|
@ -223,13 +234,20 @@ class Scorer(object):
|
||||||
doc, tuple(zip(*gold.orig_annot)) + (gold.cats,)
|
doc, tuple(zip(*gold.orig_annot)) + (gold.cats,)
|
||||||
)
|
)
|
||||||
gold_deps = set()
|
gold_deps = set()
|
||||||
|
gold_deps_per_dep = {}
|
||||||
gold_tags = set()
|
gold_tags = set()
|
||||||
gold_ents = set(tags_to_entities([annot[-1] for annot in gold.orig_annot]))
|
gold_ents = set(tags_to_entities([annot[-1] for annot in gold.orig_annot]))
|
||||||
for id_, word, tag, head, dep, ner in gold.orig_annot:
|
for id_, word, tag, head, dep, ner in gold.orig_annot:
|
||||||
gold_tags.add((id_, tag))
|
gold_tags.add((id_, tag))
|
||||||
if dep not in (None, "") and dep.lower() not in punct_labels:
|
if dep not in (None, "") and dep.lower() not in punct_labels:
|
||||||
gold_deps.add((id_, head, dep.lower()))
|
gold_deps.add((id_, head, dep.lower()))
|
||||||
|
if dep.lower() not in self.labelled_per_dep:
|
||||||
|
self.labelled_per_dep[dep.lower()] = PRFScore()
|
||||||
|
if dep.lower() not in gold_deps_per_dep:
|
||||||
|
gold_deps_per_dep[dep.lower()] = set()
|
||||||
|
gold_deps_per_dep[dep.lower()].add((id_, head, dep.lower()))
|
||||||
cand_deps = set()
|
cand_deps = set()
|
||||||
|
cand_deps_per_dep = {}
|
||||||
cand_tags = set()
|
cand_tags = set()
|
||||||
for token in doc:
|
for token in doc:
|
||||||
if token.orth_.isspace():
|
if token.orth_.isspace():
|
||||||
|
@ -249,6 +267,11 @@ class Scorer(object):
|
||||||
self.labelled.fp += 1
|
self.labelled.fp += 1
|
||||||
else:
|
else:
|
||||||
cand_deps.add((gold_i, gold_head, token.dep_.lower()))
|
cand_deps.add((gold_i, gold_head, token.dep_.lower()))
|
||||||
|
if token.dep_.lower() not in self.labelled_per_dep:
|
||||||
|
self.labelled_per_dep[token.dep_.lower()] = PRFScore()
|
||||||
|
if token.dep_.lower() not in cand_deps_per_dep:
|
||||||
|
cand_deps_per_dep[token.dep_.lower()] = set()
|
||||||
|
cand_deps_per_dep[token.dep_.lower()].add((gold_i, gold_head, token.dep_.lower()))
|
||||||
if "-" not in [token[-1] for token in gold.orig_annot]:
|
if "-" not in [token[-1] for token in gold.orig_annot]:
|
||||||
# Find all NER labels in gold and doc
|
# Find all NER labels in gold and doc
|
||||||
ent_labels = set([x[0] for x in gold_ents] + [k.label_ for k in doc.ents])
|
ent_labels = set([x[0] for x in gold_ents] + [k.label_ for k in doc.ents])
|
||||||
|
@ -280,6 +303,8 @@ class Scorer(object):
|
||||||
self.ner.score_set(cand_ents, gold_ents)
|
self.ner.score_set(cand_ents, gold_ents)
|
||||||
self.tags.score_set(cand_tags, gold_tags)
|
self.tags.score_set(cand_tags, gold_tags)
|
||||||
self.labelled.score_set(cand_deps, gold_deps)
|
self.labelled.score_set(cand_deps, gold_deps)
|
||||||
|
for dep in self.labelled_per_dep:
|
||||||
|
self.labelled_per_dep[dep].score_set(cand_deps_per_dep.get(dep, set()), gold_deps_per_dep.get(dep, set()))
|
||||||
self.unlabelled.score_set(
|
self.unlabelled.score_set(
|
||||||
set(item[:2] for item in cand_deps), set(item[:2] for item in gold_deps)
|
set(item[:2] for item in cand_deps), set(item[:2] for item in gold_deps)
|
||||||
)
|
)
|
||||||
|
|
|
@ -9,6 +9,14 @@ from spacy.scorer import Scorer, ROCAUCScore
|
||||||
from spacy.scorer import _roc_auc_score, _roc_curve
|
from spacy.scorer import _roc_auc_score, _roc_curve
|
||||||
from .util import get_doc
|
from .util import get_doc
|
||||||
|
|
||||||
|
test_las_apple = [
|
||||||
|
[
|
||||||
|
"Apple is looking at buying U.K. startup for $ 1 billion",
|
||||||
|
{"heads": [2, 2, 2, 2, 3, 6, 4, 4, 10, 10, 7],
|
||||||
|
"deps": ['nsubj', 'aux', 'ROOT', 'prep', 'pcomp', 'compound', 'dobj', 'prep', 'quantmod', 'compound', 'pobj']},
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
test_ner_cardinal = [
|
test_ner_cardinal = [
|
||||||
["100 - 200", {"entities": [[0, 3, "CARDINAL"], [6, 9, "CARDINAL"]]}]
|
["100 - 200", {"entities": [[0, 3, "CARDINAL"], [6, 9, "CARDINAL"]]}]
|
||||||
]
|
]
|
||||||
|
@ -21,6 +29,53 @@ test_ner_apple = [
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_las_per_type(en_vocab):
|
||||||
|
# Gold and Doc are identical
|
||||||
|
scorer = Scorer()
|
||||||
|
for input_, annot in test_las_apple:
|
||||||
|
doc = get_doc(
|
||||||
|
en_vocab,
|
||||||
|
words=input_.split(" "),
|
||||||
|
heads=([h - i for i, h in enumerate(annot["heads"])]),
|
||||||
|
deps=annot["deps"],
|
||||||
|
)
|
||||||
|
gold = GoldParse(doc, heads=annot["heads"], deps=annot["deps"])
|
||||||
|
scorer.score(doc, gold)
|
||||||
|
results = scorer.scores
|
||||||
|
|
||||||
|
assert results["uas"] == 100
|
||||||
|
assert results["las"] == 100
|
||||||
|
assert results["las_per_type"]["nsubj"]["p"] == 100
|
||||||
|
assert results["las_per_type"]["nsubj"]["r"] == 100
|
||||||
|
assert results["las_per_type"]["nsubj"]["f"] == 100
|
||||||
|
assert results["las_per_type"]["compound"]["p"] == 100
|
||||||
|
assert results["las_per_type"]["compound"]["r"] == 100
|
||||||
|
assert results["las_per_type"]["compound"]["f"] == 100
|
||||||
|
|
||||||
|
# One dep is incorrect in Doc
|
||||||
|
scorer = Scorer()
|
||||||
|
for input_, annot in test_las_apple:
|
||||||
|
doc = get_doc(
|
||||||
|
en_vocab,
|
||||||
|
words=input_.split(" "),
|
||||||
|
heads=([h - i for i, h in enumerate(annot["heads"])]),
|
||||||
|
deps=annot["deps"]
|
||||||
|
)
|
||||||
|
gold = GoldParse(doc, heads=annot["heads"], deps=annot["deps"])
|
||||||
|
doc[0].dep_ = "compound"
|
||||||
|
scorer.score(doc, gold)
|
||||||
|
results = scorer.scores
|
||||||
|
|
||||||
|
assert results["uas"] == 100
|
||||||
|
assert_almost_equal(results["las"], 90.9090909)
|
||||||
|
assert results["las_per_type"]["nsubj"]["p"] == 0
|
||||||
|
assert results["las_per_type"]["nsubj"]["r"] == 0
|
||||||
|
assert results["las_per_type"]["nsubj"]["f"] == 0
|
||||||
|
assert_almost_equal(results["las_per_type"]["compound"]["p"], 66.6666666)
|
||||||
|
assert results["las_per_type"]["compound"]["r"] == 100
|
||||||
|
assert results["las_per_type"]["compound"]["f"] == 80
|
||||||
|
|
||||||
|
|
||||||
def test_ner_per_type(en_vocab):
|
def test_ner_per_type(en_vocab):
|
||||||
# Gold and Doc are identical
|
# Gold and Doc are identical
|
||||||
scorer = Scorer()
|
scorer = Scorer()
|
||||||
|
|
|
@ -1861,6 +1861,30 @@
|
||||||
"author_links": {
|
"author_links": {
|
||||||
"github": "microsoft"
|
"github": "microsoft"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "dframcy",
|
||||||
|
"title": "Dframcy",
|
||||||
|
"slogan": "Dataframe Integration with spaCy NLP",
|
||||||
|
"github": "yash1994/dframcy",
|
||||||
|
"description": "DframCy is a light-weight utility module to integrate Pandas Dataframe to spaCy's linguistic annotation and training tasks.",
|
||||||
|
"pip": "dframcy",
|
||||||
|
"category": ["pipeline", "training"],
|
||||||
|
"tags": ["pandas"],
|
||||||
|
"code_example": [
|
||||||
|
"import spacy",
|
||||||
|
"from dframcy import DframCy",
|
||||||
|
"",
|
||||||
|
"nlp = spacy.load('en_core_web_sm')",
|
||||||
|
"dframcy = DframCy(nlp)",
|
||||||
|
"doc = dframcy.nlp(u'Apple is looking at buying U.K. startup for $1 billion')",
|
||||||
|
"annotation_dataframe = dframcy.to_dataframe(doc)"
|
||||||
|
],
|
||||||
|
"author": "Yash Patadia",
|
||||||
|
"author_links": {
|
||||||
|
"twitter": "PatadiaYash",
|
||||||
|
"github": "yash1994"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user