Remove object subclassing

This commit is contained in:
Ines Montani 2020-07-12 14:03:23 +02:00
parent 11bbc82c24
commit 5f6f4ff594
25 changed files with 66 additions and 44 deletions

View File

@ -312,7 +312,7 @@ def create_pretraining_model(nlp, tok2vec, pretrain_config):
return model
class ProgressTracker(object):
class ProgressTracker:
def __init__(self, frequency=1000000):
self.loss = 0.0
self.prev_loss = 0.0

View File

@ -16,7 +16,7 @@ DEFAULT_LANG = "en"
DEFAULT_DIR = "ltr"
class DependencyRenderer(object):
class DependencyRenderer:
"""Render dependency parses as SVGs."""
style = "dep"
@ -224,7 +224,7 @@ class DependencyRenderer(object):
return sorted(list(levels))
class EntityRenderer(object):
class EntityRenderer:
"""Render named entities as HTML."""
style = "ent"

View File

@ -15,7 +15,7 @@ def add_codes(err_cls):
# fmt: off
@add_codes
class Warnings(object):
class Warnings:
W004 = ("No text fixing enabled. Run `pip install ftfy` to enable fixing "
"using ftfy.fix_text if necessary.")
W005 = ("Doc object not parsed. This means displaCy won't be able to "
@ -118,7 +118,7 @@ class Warnings(object):
@add_codes
class Errors(object):
class Errors:
E001 = ("No component '{name}' found in pipeline. Available names: {opts}")
E002 = ("Can't find factory for '{name}'. This usually happens when spaCy "
"calls `nlp.create_pipe` with a component name that's not built "
@ -538,7 +538,7 @@ class Errors(object):
@add_codes
class TempErrors(object):
class TempErrors:
T003 = ("Resizing pretrained Tagger models is not currently supported.")
T007 = ("Can't yet set {attr} from Span. Vote for this feature on the "
"issue tracker: http://github.com/explosion/spaCy/issues")

View File

@ -39,7 +39,7 @@ from . import about
ENABLE_PIPELINE_ANALYSIS = False
class BaseDefaults(object):
class BaseDefaults:
@classmethod
def create_lemmatizer(cls, nlp=None, lookups=None):
if lookups is None:
@ -126,7 +126,7 @@ class BaseDefaults(object):
paired_orth_variants = []
class Language(object):
class Language:
"""A text-processing pipeline. Usually you'll load this once per process,
and pass the instance around your application.
@ -531,10 +531,16 @@ class Language(object):
if len(examples) == 0:
return losses
if not isinstance(examples, Iterable):
raise TypeError(Errors.E978.format(name="language", method="update", types=type(examples)))
raise TypeError(
Errors.E978.format(
name="language", method="update", types=type(examples)
)
)
wrong_types = set([type(eg) for eg in examples if not isinstance(eg, Example)])
if wrong_types:
raise TypeError(Errors.E978.format(name="language", method="update", types=wrong_types))
raise TypeError(
Errors.E978.format(name="language", method="update", types=wrong_types)
)
if sgd is None:
if self._optimizer is None:
@ -580,10 +586,18 @@ class Language(object):
if len(examples) == 0:
return
if not isinstance(examples, Iterable):
raise TypeError(Errors.E978.format(name="language", method="rehearse", types=type(examples)))
raise TypeError(
Errors.E978.format(
name="language", method="rehearse", types=type(examples)
)
)
wrong_types = set([type(eg) for eg in examples if not isinstance(eg, Example)])
if wrong_types:
raise TypeError(Errors.E978.format(name="language", method="rehearse", types=wrong_types))
raise TypeError(
Errors.E978.format(
name="language", method="rehearse", types=wrong_types
)
)
if sgd is None:
if self._optimizer is None:
self._optimizer = create_default_optimizer()
@ -692,10 +706,18 @@ class Language(object):
DOCS: https://spacy.io/api/language#evaluate
"""
if not isinstance(examples, Iterable):
raise TypeError(Errors.E978.format(name="language", method="evaluate", types=type(examples)))
raise TypeError(
Errors.E978.format(
name="language", method="evaluate", types=type(examples)
)
)
wrong_types = set([type(eg) for eg in examples if not isinstance(eg, Example)])
if wrong_types:
raise TypeError(Errors.E978.format(name="language", method="evaluate", types=wrong_types))
raise TypeError(
Errors.E978.format(
name="language", method="evaluate", types=wrong_types
)
)
if scorer is None:
scorer = Scorer(pipeline=self.pipeline)
if component_cfg is None:
@ -1043,7 +1065,7 @@ class Language(object):
return self
class component(object):
class component:
"""Decorator for pipeline components. Can decorate both function components
and class components and will automatically register components in the
Language.factories. If the component is a class and needs access to the

View File

@ -2,7 +2,7 @@ from .errors import Errors
from .parts_of_speech import NAMES as UPOS_NAMES
class Lemmatizer(object):
class Lemmatizer:
"""
The Lemmatizer supports simple part-of-speech-sensitive suffix rules and
lookup tables.

View File

@ -10,7 +10,7 @@ from .strings import get_string_id
UNSET = object()
class Lookups(object):
class Lookups:
"""Container for large lookup tables and dictionaries, e.g. lemmatization
data or tokenizer exception lists. Lookups are available via vocab.lookups,
so they can be accessed before the pipeline components are applied (e.g.

View File

@ -690,7 +690,7 @@ def _get_attr_values(spec, string_store):
# These predicate helper classes are used to match the REGEX, IN, >= etc
# extensions to the matcher introduced in #3173.
class _RegexPredicate(object):
class _RegexPredicate:
operators = ("REGEX",)
def __init__(self, i, attr, value, predicate, is_extension=False):
@ -711,7 +711,7 @@ class _RegexPredicate(object):
return bool(self.value.search(value))
class _SetMemberPredicate(object):
class _SetMemberPredicate:
operators = ("IN", "NOT_IN")
def __init__(self, i, attr, value, predicate, is_extension=False):
@ -738,7 +738,7 @@ class _SetMemberPredicate(object):
return repr(("SetMemberPredicate", self.i, self.attr, self.value, self.predicate))
class _ComparisonPredicate(object):
class _ComparisonPredicate:
operators = ("==", "!=", ">=", "<=", ">", "<")
def __init__(self, i, attr, value, predicate, is_extension=False):

View File

@ -94,7 +94,7 @@ def build_masked_language_model(vocab, wrapped_model, mask_prob=0.15):
return mlm_model
class _RandomWords(object):
class _RandomWords:
def __init__(self, vocab):
self.words = [lex.text for lex in vocab if lex.prob != 0.0]
self.probs = [lex.prob for lex in vocab if lex.prob != 0.0]

View File

@ -11,7 +11,7 @@ DEFAULT_ENT_ID_SEP = "||"
@component("entity_ruler", assigns=["doc.ents", "token.ent_type", "token.ent_iob"])
class EntityRuler(object):
class EntityRuler:
"""The EntityRuler lets you add spans to the `Doc.ents` using token-based
rules or exact phrase matches. It can be combined with the statistical
`EntityRecognizer` to boost accuracy, or used on its own to implement a

View File

@ -6,7 +6,7 @@ from ..util import link_vectors_to_models
@component("sentencizer_hook", assigns=["doc.user_hooks"])
class SentenceSegmenter(object):
class SentenceSegmenter:
"""A simple spaCy hook, to allow custom sentence boundary detection logic
(that doesn't require the dependency parse). To change the sentence
boundary detection strategy, pass a generator function `strategy` on

View File

@ -35,7 +35,7 @@ def _load_cfg(path):
return {}
class Pipe(object):
class Pipe:
"""This class is not instantiated directly. Components inherit from it, and
it defines the interface that components should follow to function as
components in a spaCy analysis pipeline.

View File

@ -3,7 +3,7 @@ import numpy as np
from .errors import Errors
class PRFScore(object):
class PRFScore:
"""
A precision / recall / F score
"""
@ -33,7 +33,7 @@ class PRFScore(object):
return 2 * ((p * r) / (p + r + 1e-100))
class ROCAUCScore(object):
class ROCAUCScore:
"""
An AUC ROC score.
"""
@ -62,7 +62,7 @@ class ROCAUCScore(object):
return self.saved_score
class Scorer(object):
class Scorer:
"""Compute evaluation scores."""
def __init__(self, eval_punct=False, pipeline=None):

View File

@ -378,7 +378,7 @@ def test_ner_warns_no_lookups():
assert not record.list
class BlockerComponent1(object):
class BlockerComponent1:
name = "my_blocker"
def __init__(self, start, end):

View File

@ -20,7 +20,7 @@ def test_component_decorator_function():
def test_component_decorator_class():
@component(name="test")
class TestComponent(object):
class TestComponent:
"""docstring1"""
foo = "bar"
@ -97,7 +97,7 @@ def test_component_factories_from_nlp():
"""Test that class components can implement a from_nlp classmethod that
gives them access to the nlp object and config via the factory."""
class TestComponent5(object):
class TestComponent5:
def __call__(self, doc):
return doc

View File

@ -5,7 +5,7 @@ from spacy.tests.util import make_tempdir
def test_issue5137():
class MyComponent(object):
class MyComponent:
name = "my_component"
def __init__(self, nlp, **cfg):

View File

@ -24,7 +24,7 @@ def vectors():
def custom_pipe():
# create dummy pipe partially implementing interface -- only want to test to_disk
class SerializableDummy(object):
class SerializableDummy:
def __init__(self, **cfg):
if cfg:
self.cfg = cfg

View File

@ -6,7 +6,7 @@ from spacy.errors import add_codes
@add_codes
class Errors(object):
class Errors:
E001 = "error description"

View File

@ -13,7 +13,7 @@ ALL_ATTRS = ("ORTH", "TAG", "HEAD", "DEP", "ENT_IOB", "ENT_TYPE", "ENT_KB_ID", "
# fmt: on
class DocBin(object):
class DocBin:
"""Pack Doc objects for binary serialization.
The DocBin class lets you efficiently serialize the information from a

View File

@ -4,7 +4,7 @@ import copy
from ..errors import Errors
class Underscore(object):
class Underscore:
mutable_types = (dict, list, set)
doc_extensions = {}
span_extensions = {}

View File

@ -997,7 +997,7 @@ class SimpleFrozenDict(dict):
raise NotImplementedError(Errors.E095)
class DummyTokenizer(object):
class DummyTokenizer:
# add dummy methods for to_bytes, from_bytes, to_disk and from_disk to
# allow serialization (see #1557)
def to_bytes(self, **kwargs):

View File

@ -18,7 +18,7 @@ def unpickle_vectors(bytes_data):
return Vectors().from_bytes(bytes_data)
class GlobalRegistry(object):
class GlobalRegistry:
"""Global store of vectors, to avoid repeatedly loading the data."""
data = {}

View File

@ -1004,7 +1004,7 @@ object. Let's say we have the following class as our tokenizer:
import spacy
from spacy.tokens import Doc
class WhitespaceTokenizer(object):
class WhitespaceTokenizer:
def __init__(self, vocab):
self.vocab = vocab

View File

@ -401,7 +401,7 @@ import spacy
from spacy.matcher import PhraseMatcher
from spacy.tokens import Span
class EntityMatcher(object):
class EntityMatcher:
name = "entity_matcher"
def __init__(self, nlp, terms, label):
@ -683,7 +683,7 @@ to `Doc.user_span_hooks` and `Doc.user_token_hooks`.
```python
### Add custom similarity hooks
class SimilarityModel(object):
class SimilarityModel:
def __init__(self, model):
self._model = model

View File

@ -511,7 +511,7 @@ from spacy.tokens import Token
# We're using a class because the component needs to be initialized with
# the shared vocab via the nlp object
class BadHTMLMerger(object):
class BadHTMLMerger:
def __init__(self, nlp):
patterns = [
[{"ORTH": "<"}, {"LOWER": "br"}, {"ORTH": ">"}],

View File

@ -193,7 +193,7 @@ add to that data and saves and loads the data to and from a JSON file.
```python
### {highlight="15-19,21-26"}
class CustomComponent(object):
class CustomComponent:
name = "my_component"
def __init__(self):
@ -345,7 +345,7 @@ snek = """
`'--....--'`
"""
class SnekFactory(object):
class SnekFactory:
def __init__(self, nlp, **cfg):
self.nlp = nlp
@ -433,7 +433,7 @@ nlp = spacy.load("en_core_snek_sm", snek_style="cute")
```python
SNEKS = {"basic": snek, "cute": cute_snek} # collection of sneks
class SnekFactory(object):
class SnekFactory:
def __init__(self, nlp, **cfg):
self.nlp = nlp
self.snek_style = cfg.get("snek_style", "basic")