mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Tidy up and auto-format
This commit is contained in:
parent
05dcab10aa
commit
3eaeb73342
|
@ -1,4 +1,4 @@
|
||||||
from typing import Union, Iterator, Iterable, Sequence, TypeVar, List, Callable
|
from typing import Union, Iterable, Sequence, TypeVar, List, Callable
|
||||||
from typing import Optional, Any
|
from typing import Optional, Any
|
||||||
from functools import partial
|
from functools import partial
|
||||||
import itertools
|
import itertools
|
||||||
|
@ -123,7 +123,11 @@ def minibatch_by_padded_size(
|
||||||
|
|
||||||
|
|
||||||
def minibatch_by_words(
|
def minibatch_by_words(
|
||||||
seqs: Iterable[ItemT], size: Sizing, tolerance=0.2, discard_oversize=False, get_length=len
|
seqs: Iterable[ItemT],
|
||||||
|
size: Sizing,
|
||||||
|
tolerance=0.2,
|
||||||
|
discard_oversize=False,
|
||||||
|
get_length=len,
|
||||||
) -> Iterable[List[ItemT]]:
|
) -> Iterable[List[ItemT]]:
|
||||||
"""Create minibatches of roughly a given number of words. If any examples
|
"""Create minibatches of roughly a given number of words. If any examples
|
||||||
are longer than the specified batch length, they will appear in a batch by
|
are longer than the specified batch length, they will appear in a batch by
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from ...pipeline import Lemmatizer
|
from ...pipeline import Lemmatizer
|
||||||
from ...tokens import Token
|
from ...tokens import Token
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,6 @@ from .lang.tokenizer_exceptions import URL_MATCH, BASE_EXCEPTIONS
|
||||||
from .lang.punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
|
from .lang.punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
|
||||||
from .lang.punctuation import TOKENIZER_INFIXES
|
from .lang.punctuation import TOKENIZER_INFIXES
|
||||||
from .tokens import Doc
|
from .tokens import Doc
|
||||||
from .lookups import load_lookups
|
|
||||||
from .tokenizer import Tokenizer
|
from .tokenizer import Tokenizer
|
||||||
from .errors import Errors, Warnings
|
from .errors import Errors, Warnings
|
||||||
from .schemas import ConfigSchema
|
from .schemas import ConfigSchema
|
||||||
|
@ -1439,10 +1438,7 @@ class Language:
|
||||||
or lang_cls is not cls
|
or lang_cls is not cls
|
||||||
):
|
):
|
||||||
raise ValueError(Errors.E943.format(value=type(lang_cls)))
|
raise ValueError(Errors.E943.format(value=type(lang_cls)))
|
||||||
nlp = lang_cls(
|
nlp = lang_cls(vocab=vocab, create_tokenizer=create_tokenizer)
|
||||||
vocab=vocab,
|
|
||||||
create_tokenizer=create_tokenizer,
|
|
||||||
)
|
|
||||||
if after_creation is not None:
|
if after_creation is not None:
|
||||||
nlp = after_creation(nlp)
|
nlp = after_creation(nlp)
|
||||||
if not isinstance(nlp, cls):
|
if not isinstance(nlp, cls):
|
||||||
|
|
|
@ -46,6 +46,7 @@ class Tok2Vec(Pipe):
|
||||||
avoid relying on object identity within the models to achieve the parameter
|
avoid relying on object identity within the models to achieve the parameter
|
||||||
sharing.
|
sharing.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, vocab: Vocab, model: Model, name: str = "tok2vec") -> None:
|
def __init__(self, vocab: Vocab, model: Model, name: str = "tok2vec") -> None:
|
||||||
"""Initialize a tok2vec component.
|
"""Initialize a tok2vec component.
|
||||||
|
|
||||||
|
@ -239,6 +240,7 @@ class Tok2VecListener(Model):
|
||||||
from the Tok2Vec component into downstream components, and communicating
|
from the Tok2Vec component into downstream components, and communicating
|
||||||
gradients back upstream.
|
gradients back upstream.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
name = "tok2vec-listener"
|
name = "tok2vec-listener"
|
||||||
|
|
||||||
def __init__(self, upstream_name: str, width: int) -> None:
|
def __init__(self, upstream_name: str, width: int) -> None:
|
||||||
|
|
|
@ -426,7 +426,7 @@ class Scorer:
|
||||||
f"{attr}_auc_per_type": {k: v.score for k, v in auc_per_type.items()},
|
f"{attr}_auc_per_type": {k: v.score for k, v in auc_per_type.items()},
|
||||||
}
|
}
|
||||||
if len(labels) == 2 and not multi_label and positive_label:
|
if len(labels) == 2 and not multi_label and positive_label:
|
||||||
positive_label_f = results[f"{attr}_f_per_type"][positive_label]['f']
|
positive_label_f = results[f"{attr}_f_per_type"][positive_label]["f"]
|
||||||
results[f"{attr}_score"] = positive_label_f
|
results[f"{attr}_score"] = positive_label_f
|
||||||
results[f"{attr}_score_desc"] = f"F ({positive_label})"
|
results[f"{attr}_score_desc"] = f"F ({positive_label})"
|
||||||
elif not multi_label:
|
elif not multi_label:
|
||||||
|
|
|
@ -15,5 +15,7 @@ def morphology():
|
||||||
def test_morphology_pickle_roundtrip(morphology):
|
def test_morphology_pickle_roundtrip(morphology):
|
||||||
b = pickle.dumps(morphology)
|
b = pickle.dumps(morphology)
|
||||||
reloaded_morphology = pickle.loads(b)
|
reloaded_morphology = pickle.loads(b)
|
||||||
assert reloaded_morphology.get(morphology.strings["Feat1=Val1|Feat2=Val2"]) == "Feat1=Val1|Feat2=Val2"
|
feat = reloaded_morphology.get(morphology.strings["Feat1=Val1|Feat2=Val2"])
|
||||||
assert reloaded_morphology.get(morphology.strings["Feat3=Val3|Feat4=Val4"]) == "Feat3=Val3|Feat4=Val4"
|
assert feat == "Feat1=Val1|Feat2=Val2"
|
||||||
|
feat = reloaded_morphology.get(morphology.strings["Feat3=Val3|Feat4=Val4"])
|
||||||
|
assert feat == "Feat3=Val3|Feat4=Val4"
|
||||||
|
|
|
@ -144,8 +144,7 @@ def test_accept_blocked_token():
|
||||||
# 1. test normal behaviour
|
# 1. test normal behaviour
|
||||||
nlp1 = English()
|
nlp1 = English()
|
||||||
doc1 = nlp1("I live in New York")
|
doc1 = nlp1("I live in New York")
|
||||||
config = {
|
config = {}
|
||||||
}
|
|
||||||
ner1 = nlp1.create_pipe("ner", config=config)
|
ner1 = nlp1.create_pipe("ner", config=config)
|
||||||
assert [token.ent_iob_ for token in doc1] == ["", "", "", "", ""]
|
assert [token.ent_iob_ for token in doc1] == ["", "", "", "", ""]
|
||||||
assert [token.ent_type_ for token in doc1] == ["", "", "", "", ""]
|
assert [token.ent_type_ for token in doc1] == ["", "", "", "", ""]
|
||||||
|
@ -164,8 +163,7 @@ def test_accept_blocked_token():
|
||||||
# 2. test blocking behaviour
|
# 2. test blocking behaviour
|
||||||
nlp2 = English()
|
nlp2 = English()
|
||||||
doc2 = nlp2("I live in New York")
|
doc2 = nlp2("I live in New York")
|
||||||
config = {
|
config = {}
|
||||||
}
|
|
||||||
ner2 = nlp2.create_pipe("ner", config=config)
|
ner2 = nlp2.create_pipe("ner", config=config)
|
||||||
|
|
||||||
# set "New York" to a blocked entity
|
# set "New York" to a blocked entity
|
||||||
|
@ -220,8 +218,7 @@ def test_overwrite_token():
|
||||||
assert [token.ent_iob_ for token in doc] == ["O", "O", "O", "O", "O"]
|
assert [token.ent_iob_ for token in doc] == ["O", "O", "O", "O", "O"]
|
||||||
assert [token.ent_type_ for token in doc] == ["", "", "", "", ""]
|
assert [token.ent_type_ for token in doc] == ["", "", "", "", ""]
|
||||||
# Check that a new ner can overwrite O
|
# Check that a new ner can overwrite O
|
||||||
config = {
|
config = {}
|
||||||
}
|
|
||||||
ner2 = nlp.create_pipe("ner", config=config)
|
ner2 = nlp.create_pipe("ner", config=config)
|
||||||
ner2.moves.add_action(5, "")
|
ner2.moves.add_action(5, "")
|
||||||
ner2.add_label("GPE")
|
ner2.add_label("GPE")
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from spacy import util, registry
|
from spacy import util, registry
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.lookups import Lookups, load_lookups
|
from spacy.lookups import Lookups
|
||||||
|
|
||||||
from ..util import make_tempdir
|
from ..util import make_tempdir
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,8 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from spacy import util
|
from spacy import util
|
||||||
from spacy.gold import Example
|
from spacy.gold import Example
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.language import Language
|
from spacy.language import Language
|
||||||
from spacy.symbols import POS, NOUN
|
|
||||||
|
|
||||||
from ..util import make_tempdir
|
from ..util import make_tempdir
|
||||||
|
|
||||||
|
|
|
@ -117,9 +117,7 @@ def test_overfitting_IO():
|
||||||
assert cats2["POSITIVE"] + cats2["NEGATIVE"] == pytest.approx(1.0, 0.1)
|
assert cats2["POSITIVE"] + cats2["NEGATIVE"] == pytest.approx(1.0, 0.1)
|
||||||
|
|
||||||
# Test scoring
|
# Test scoring
|
||||||
scores = nlp.evaluate(
|
scores = nlp.evaluate(train_examples, scorer_cfg={"positive_label": "POSITIVE"})
|
||||||
train_examples, scorer_cfg={"positive_label": "POSITIVE"}
|
|
||||||
)
|
|
||||||
assert scores["cats_micro_f"] == 1.0
|
assert scores["cats_micro_f"] == 1.0
|
||||||
assert scores["cats_score"] == 1.0
|
assert scores["cats_score"] == 1.0
|
||||||
assert "cats_score_desc" in scores
|
assert "cats_score_desc" in scores
|
||||||
|
|
|
@ -1,11 +1,9 @@
|
||||||
import pytest
|
import pytest
|
||||||
import random
|
import random
|
||||||
|
|
||||||
from spacy import util
|
from spacy import util
|
||||||
from spacy.gold import Example
|
from spacy.gold import Example
|
||||||
from spacy.matcher import Matcher
|
from spacy.matcher import Matcher
|
||||||
from spacy.attrs import IS_PUNCT, ORTH, LOWER
|
from spacy.attrs import IS_PUNCT, ORTH, LOWER
|
||||||
from spacy.symbols import POS, VERB
|
|
||||||
from spacy.vocab import Vocab
|
from spacy.vocab import Vocab
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.lookups import Lookups
|
from spacy.lookups import Lookups
|
||||||
|
|
|
@ -6,8 +6,7 @@ from spacy.lang.en import English
|
||||||
from spacy.lang.lex_attrs import LEX_ATTRS
|
from spacy.lang.lex_attrs import LEX_ATTRS
|
||||||
from spacy.matcher import Matcher
|
from spacy.matcher import Matcher
|
||||||
from spacy.tokenizer import Tokenizer
|
from spacy.tokenizer import Tokenizer
|
||||||
from spacy.lookups import Lookups
|
from spacy.symbols import ORTH, LEMMA, POS
|
||||||
from spacy.symbols import ORTH, LEMMA, POS, VERB
|
|
||||||
|
|
||||||
|
|
||||||
def test_issue1061():
|
def test_issue1061():
|
||||||
|
|
|
@ -271,8 +271,7 @@ def test_issue1963(en_tokenizer):
|
||||||
@pytest.mark.parametrize("label", ["U-JOB-NAME"])
|
@pytest.mark.parametrize("label", ["U-JOB-NAME"])
|
||||||
def test_issue1967(label):
|
def test_issue1967(label):
|
||||||
nlp = Language()
|
nlp = Language()
|
||||||
config = {
|
config = {}
|
||||||
}
|
|
||||||
ner = nlp.create_pipe("ner", config=config)
|
ner = nlp.create_pipe("ner", config=config)
|
||||||
example = Example.from_dict(
|
example = Example.from_dict(
|
||||||
Doc(ner.vocab, words=["word"]),
|
Doc(ner.vocab, words=["word"]),
|
||||||
|
|
|
@ -157,7 +157,11 @@ def test_issue3540(en_vocab):
|
||||||
|
|
||||||
with doc.retokenize() as retokenizer:
|
with doc.retokenize() as retokenizer:
|
||||||
heads = [(doc[3], 1), doc[2]]
|
heads = [(doc[3], 1), doc[2]]
|
||||||
attrs = {"POS": ["PROPN", "PROPN"], "LEMMA": ["New", "York"], "DEP": ["pobj", "compound"]}
|
attrs = {
|
||||||
|
"POS": ["PROPN", "PROPN"],
|
||||||
|
"LEMMA": ["New", "York"],
|
||||||
|
"DEP": ["pobj", "compound"],
|
||||||
|
}
|
||||||
retokenizer.split(doc[3], ["New", "York"], heads=heads, attrs=attrs)
|
retokenizer.split(doc[3], ["New", "York"], heads=heads, attrs=attrs)
|
||||||
|
|
||||||
gold_text = ["I", "live", "in", "New", "York", "right", "now"]
|
gold_text = ["I", "live", "in", "New", "York", "right", "now"]
|
||||||
|
|
|
@ -138,8 +138,7 @@ def test_issue4042_bug2():
|
||||||
if not output_dir.exists():
|
if not output_dir.exists():
|
||||||
output_dir.mkdir()
|
output_dir.mkdir()
|
||||||
ner1.to_disk(output_dir)
|
ner1.to_disk(output_dir)
|
||||||
config = {
|
config = {}
|
||||||
}
|
|
||||||
ner2 = nlp1.create_pipe("ner", config=config)
|
ner2 = nlp1.create_pipe("ner", config=config)
|
||||||
ner2.from_disk(output_dir)
|
ner2.from_disk(output_dir)
|
||||||
assert len(ner2.labels) == 2
|
assert len(ner2.labels) == 2
|
||||||
|
@ -301,8 +300,7 @@ def test_issue4313():
|
||||||
beam_width = 16
|
beam_width = 16
|
||||||
beam_density = 0.0001
|
beam_density = 0.0001
|
||||||
nlp = English()
|
nlp = English()
|
||||||
config = {
|
config = {}
|
||||||
}
|
|
||||||
ner = nlp.create_pipe("ner", config=config)
|
ner = nlp.create_pipe("ner", config=config)
|
||||||
ner.add_label("SOME_LABEL")
|
ner.add_label("SOME_LABEL")
|
||||||
ner.begin_training([])
|
ner.begin_training([])
|
||||||
|
|
Loading…
Reference in New Issue
Block a user