From 5f6f4ff59436d4ca87e4b4f065665c83c5b9e164 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sun, 12 Jul 2020 14:03:23 +0200 Subject: [PATCH] Remove object subclassing --- spacy/cli/pretrain.py | 2 +- spacy/displacy/render.py | 4 +-- spacy/errors.py | 6 ++-- spacy/language.py | 40 +++++++++++++++++----- spacy/lemmatizer.py | 2 +- spacy/lookups.py | 2 +- spacy/matcher/matcher.pyx | 6 ++-- spacy/ml/models/multi_task.py | 2 +- spacy/pipeline/entityruler.py | 2 +- spacy/pipeline/hooks.py | 2 +- spacy/pipeline/pipes.pyx | 2 +- spacy/scorer.py | 6 ++-- spacy/tests/parser/test_ner.py | 2 +- spacy/tests/pipeline/test_analysis.py | 4 +-- spacy/tests/regression/test_issue5137.py | 2 +- spacy/tests/regression/test_issue5230.py | 2 +- spacy/tests/test_errors.py | 2 +- spacy/tokens/_serialize.py | 2 +- spacy/tokens/underscore.py | 2 +- spacy/util.py | 2 +- spacy/vectors.pyx | 2 +- website/docs/usage/linguistic-features.md | 2 +- website/docs/usage/processing-pipelines.md | 4 +-- website/docs/usage/rule-based-matching.md | 2 +- website/docs/usage/saving-loading.md | 6 ++-- 25 files changed, 66 insertions(+), 44 deletions(-) diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py index 58e82028b..c05e9cb35 100644 --- a/spacy/cli/pretrain.py +++ b/spacy/cli/pretrain.py @@ -312,7 +312,7 @@ def create_pretraining_model(nlp, tok2vec, pretrain_config): return model -class ProgressTracker(object): +class ProgressTracker: def __init__(self, frequency=1000000): self.loss = 0.0 self.prev_loss = 0.0 diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py index ef8632cbc..fcf4ccaa6 100644 --- a/spacy/displacy/render.py +++ b/spacy/displacy/render.py @@ -16,7 +16,7 @@ DEFAULT_LANG = "en" DEFAULT_DIR = "ltr" -class DependencyRenderer(object): +class DependencyRenderer: """Render dependency parses as SVGs.""" style = "dep" @@ -224,7 +224,7 @@ class DependencyRenderer(object): return sorted(list(levels)) -class EntityRenderer(object): +class EntityRenderer: """Render named entities as HTML.""" style = "ent" diff --git a/spacy/errors.py b/spacy/errors.py index fa432382d..45de5ed45 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -15,7 +15,7 @@ def add_codes(err_cls): # fmt: off @add_codes -class Warnings(object): +class Warnings: W004 = ("No text fixing enabled. Run `pip install ftfy` to enable fixing " "using ftfy.fix_text if necessary.") W005 = ("Doc object not parsed. This means displaCy won't be able to " @@ -118,7 +118,7 @@ class Warnings(object): @add_codes -class Errors(object): +class Errors: E001 = ("No component '{name}' found in pipeline. Available names: {opts}") E002 = ("Can't find factory for '{name}'. This usually happens when spaCy " "calls `nlp.create_pipe` with a component name that's not built " @@ -538,7 +538,7 @@ class Errors(object): @add_codes -class TempErrors(object): +class TempErrors: T003 = ("Resizing pretrained Tagger models is not currently supported.") T007 = ("Can't yet set {attr} from Span. Vote for this feature on the " "issue tracker: http://github.com/explosion/spaCy/issues") diff --git a/spacy/language.py b/spacy/language.py index 32c8512fc..5f35357a4 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -39,7 +39,7 @@ from . import about ENABLE_PIPELINE_ANALYSIS = False -class BaseDefaults(object): +class BaseDefaults: @classmethod def create_lemmatizer(cls, nlp=None, lookups=None): if lookups is None: @@ -126,7 +126,7 @@ class BaseDefaults(object): paired_orth_variants = [] -class Language(object): +class Language: """A text-processing pipeline. Usually you'll load this once per process, and pass the instance around your application. @@ -531,10 +531,16 @@ class Language(object): if len(examples) == 0: return losses if not isinstance(examples, Iterable): - raise TypeError(Errors.E978.format(name="language", method="update", types=type(examples))) + raise TypeError( + Errors.E978.format( + name="language", method="update", types=type(examples) + ) + ) wrong_types = set([type(eg) for eg in examples if not isinstance(eg, Example)]) if wrong_types: - raise TypeError(Errors.E978.format(name="language", method="update", types=wrong_types)) + raise TypeError( + Errors.E978.format(name="language", method="update", types=wrong_types) + ) if sgd is None: if self._optimizer is None: @@ -580,10 +586,18 @@ class Language(object): if len(examples) == 0: return if not isinstance(examples, Iterable): - raise TypeError(Errors.E978.format(name="language", method="rehearse", types=type(examples))) + raise TypeError( + Errors.E978.format( + name="language", method="rehearse", types=type(examples) + ) + ) wrong_types = set([type(eg) for eg in examples if not isinstance(eg, Example)]) if wrong_types: - raise TypeError(Errors.E978.format(name="language", method="rehearse", types=wrong_types)) + raise TypeError( + Errors.E978.format( + name="language", method="rehearse", types=wrong_types + ) + ) if sgd is None: if self._optimizer is None: self._optimizer = create_default_optimizer() @@ -692,10 +706,18 @@ class Language(object): DOCS: https://spacy.io/api/language#evaluate """ if not isinstance(examples, Iterable): - raise TypeError(Errors.E978.format(name="language", method="evaluate", types=type(examples))) + raise TypeError( + Errors.E978.format( + name="language", method="evaluate", types=type(examples) + ) + ) wrong_types = set([type(eg) for eg in examples if not isinstance(eg, Example)]) if wrong_types: - raise TypeError(Errors.E978.format(name="language", method="evaluate", types=wrong_types)) + raise TypeError( + Errors.E978.format( + name="language", method="evaluate", types=wrong_types + ) + ) if scorer is None: scorer = Scorer(pipeline=self.pipeline) if component_cfg is None: @@ -1043,7 +1065,7 @@ class Language(object): return self -class component(object): +class component: """Decorator for pipeline components. Can decorate both function components and class components and will automatically register components in the Language.factories. If the component is a class and needs access to the diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py index c108c975a..9546b1e1c 100644 --- a/spacy/lemmatizer.py +++ b/spacy/lemmatizer.py @@ -2,7 +2,7 @@ from .errors import Errors from .parts_of_speech import NAMES as UPOS_NAMES -class Lemmatizer(object): +class Lemmatizer: """ The Lemmatizer supports simple part-of-speech-sensitive suffix rules and lookup tables. diff --git a/spacy/lookups.py b/spacy/lookups.py index d6aa5f9a0..2c8f430aa 100644 --- a/spacy/lookups.py +++ b/spacy/lookups.py @@ -10,7 +10,7 @@ from .strings import get_string_id UNSET = object() -class Lookups(object): +class Lookups: """Container for large lookup tables and dictionaries, e.g. lemmatization data or tokenizer exception lists. Lookups are available via vocab.lookups, so they can be accessed before the pipeline components are applied (e.g. diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index 673cb3298..6c8ee4204 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -690,7 +690,7 @@ def _get_attr_values(spec, string_store): # These predicate helper classes are used to match the REGEX, IN, >= etc # extensions to the matcher introduced in #3173. -class _RegexPredicate(object): +class _RegexPredicate: operators = ("REGEX",) def __init__(self, i, attr, value, predicate, is_extension=False): @@ -711,7 +711,7 @@ class _RegexPredicate(object): return bool(self.value.search(value)) -class _SetMemberPredicate(object): +class _SetMemberPredicate: operators = ("IN", "NOT_IN") def __init__(self, i, attr, value, predicate, is_extension=False): @@ -738,7 +738,7 @@ class _SetMemberPredicate(object): return repr(("SetMemberPredicate", self.i, self.attr, self.value, self.predicate)) -class _ComparisonPredicate(object): +class _ComparisonPredicate: operators = ("==", "!=", ">=", "<=", ">", "<") def __init__(self, i, attr, value, predicate, is_extension=False): diff --git a/spacy/ml/models/multi_task.py b/spacy/ml/models/multi_task.py index 8a00f5c95..ed85b1a91 100644 --- a/spacy/ml/models/multi_task.py +++ b/spacy/ml/models/multi_task.py @@ -94,7 +94,7 @@ def build_masked_language_model(vocab, wrapped_model, mask_prob=0.15): return mlm_model -class _RandomWords(object): +class _RandomWords: def __init__(self, vocab): self.words = [lex.text for lex in vocab if lex.prob != 0.0] self.probs = [lex.prob for lex in vocab if lex.prob != 0.0] diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py index bdc009192..d9c950ad0 100644 --- a/spacy/pipeline/entityruler.py +++ b/spacy/pipeline/entityruler.py @@ -11,7 +11,7 @@ DEFAULT_ENT_ID_SEP = "||" @component("entity_ruler", assigns=["doc.ents", "token.ent_type", "token.ent_iob"]) -class EntityRuler(object): +class EntityRuler: """The EntityRuler lets you add spans to the `Doc.ents` using token-based rules or exact phrase matches. It can be combined with the statistical `EntityRecognizer` to boost accuracy, or used on its own to implement a diff --git a/spacy/pipeline/hooks.py b/spacy/pipeline/hooks.py index a97e7be68..368e120ab 100644 --- a/spacy/pipeline/hooks.py +++ b/spacy/pipeline/hooks.py @@ -6,7 +6,7 @@ from ..util import link_vectors_to_models @component("sentencizer_hook", assigns=["doc.user_hooks"]) -class SentenceSegmenter(object): +class SentenceSegmenter: """A simple spaCy hook, to allow custom sentence boundary detection logic (that doesn't require the dependency parse). To change the sentence boundary detection strategy, pass a generator function `strategy` on diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx index c35cb4b68..1234733d0 100644 --- a/spacy/pipeline/pipes.pyx +++ b/spacy/pipeline/pipes.pyx @@ -35,7 +35,7 @@ def _load_cfg(path): return {} -class Pipe(object): +class Pipe: """This class is not instantiated directly. Components inherit from it, and it defines the interface that components should follow to function as components in a spaCy analysis pipeline. diff --git a/spacy/scorer.py b/spacy/scorer.py index 6fc86e412..512f27e07 100644 --- a/spacy/scorer.py +++ b/spacy/scorer.py @@ -3,7 +3,7 @@ import numpy as np from .errors import Errors -class PRFScore(object): +class PRFScore: """ A precision / recall / F score """ @@ -33,7 +33,7 @@ class PRFScore(object): return 2 * ((p * r) / (p + r + 1e-100)) -class ROCAUCScore(object): +class ROCAUCScore: """ An AUC ROC score. """ @@ -62,7 +62,7 @@ class ROCAUCScore(object): return self.saved_score -class Scorer(object): +class Scorer: """Compute evaluation scores.""" def __init__(self, eval_punct=False, pipeline=None): diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py index 2f828e7fa..ad7688344 100644 --- a/spacy/tests/parser/test_ner.py +++ b/spacy/tests/parser/test_ner.py @@ -378,7 +378,7 @@ def test_ner_warns_no_lookups(): assert not record.list -class BlockerComponent1(object): +class BlockerComponent1: name = "my_blocker" def __init__(self, start, end): diff --git a/spacy/tests/pipeline/test_analysis.py b/spacy/tests/pipeline/test_analysis.py index b826438f5..85f88c22c 100644 --- a/spacy/tests/pipeline/test_analysis.py +++ b/spacy/tests/pipeline/test_analysis.py @@ -20,7 +20,7 @@ def test_component_decorator_function(): def test_component_decorator_class(): @component(name="test") - class TestComponent(object): + class TestComponent: """docstring1""" foo = "bar" @@ -97,7 +97,7 @@ def test_component_factories_from_nlp(): """Test that class components can implement a from_nlp classmethod that gives them access to the nlp object and config via the factory.""" - class TestComponent5(object): + class TestComponent5: def __call__(self, doc): return doc diff --git a/spacy/tests/regression/test_issue5137.py b/spacy/tests/regression/test_issue5137.py index e9fd268c8..b621b5faa 100644 --- a/spacy/tests/regression/test_issue5137.py +++ b/spacy/tests/regression/test_issue5137.py @@ -5,7 +5,7 @@ from spacy.tests.util import make_tempdir def test_issue5137(): - class MyComponent(object): + class MyComponent: name = "my_component" def __init__(self, nlp, **cfg): diff --git a/spacy/tests/regression/test_issue5230.py b/spacy/tests/regression/test_issue5230.py index 86020bf17..d634ee35c 100644 --- a/spacy/tests/regression/test_issue5230.py +++ b/spacy/tests/regression/test_issue5230.py @@ -24,7 +24,7 @@ def vectors(): def custom_pipe(): # create dummy pipe partially implementing interface -- only want to test to_disk - class SerializableDummy(object): + class SerializableDummy: def __init__(self, **cfg): if cfg: self.cfg = cfg diff --git a/spacy/tests/test_errors.py b/spacy/tests/test_errors.py index 1bd4eec7f..e79abc6ab 100644 --- a/spacy/tests/test_errors.py +++ b/spacy/tests/test_errors.py @@ -6,7 +6,7 @@ from spacy.errors import add_codes @add_codes -class Errors(object): +class Errors: E001 = "error description" diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py index f2374bdc6..96245a0e1 100644 --- a/spacy/tokens/_serialize.py +++ b/spacy/tokens/_serialize.py @@ -13,7 +13,7 @@ ALL_ATTRS = ("ORTH", "TAG", "HEAD", "DEP", "ENT_IOB", "ENT_TYPE", "ENT_KB_ID", " # fmt: on -class DocBin(object): +class DocBin: """Pack Doc objects for binary serialization. The DocBin class lets you efficiently serialize the information from a diff --git a/spacy/tokens/underscore.py b/spacy/tokens/underscore.py index fab10b94d..b7966fd6e 100644 --- a/spacy/tokens/underscore.py +++ b/spacy/tokens/underscore.py @@ -4,7 +4,7 @@ import copy from ..errors import Errors -class Underscore(object): +class Underscore: mutable_types = (dict, list, set) doc_extensions = {} span_extensions = {} diff --git a/spacy/util.py b/spacy/util.py index 4ed002f37..58b83b63b 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -997,7 +997,7 @@ class SimpleFrozenDict(dict): raise NotImplementedError(Errors.E095) -class DummyTokenizer(object): +class DummyTokenizer: # add dummy methods for to_bytes, from_bytes, to_disk and from_disk to # allow serialization (see #1557) def to_bytes(self, **kwargs): diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx index 81f3affca..0cc7409a7 100644 --- a/spacy/vectors.pyx +++ b/spacy/vectors.pyx @@ -18,7 +18,7 @@ def unpickle_vectors(bytes_data): return Vectors().from_bytes(bytes_data) -class GlobalRegistry(object): +class GlobalRegistry: """Global store of vectors, to avoid repeatedly loading the data.""" data = {} diff --git a/website/docs/usage/linguistic-features.md b/website/docs/usage/linguistic-features.md index 9c028ce61..27512f61b 100644 --- a/website/docs/usage/linguistic-features.md +++ b/website/docs/usage/linguistic-features.md @@ -1004,7 +1004,7 @@ object. Let's say we have the following class as our tokenizer: import spacy from spacy.tokens import Doc -class WhitespaceTokenizer(object): +class WhitespaceTokenizer: def __init__(self, vocab): self.vocab = vocab diff --git a/website/docs/usage/processing-pipelines.md b/website/docs/usage/processing-pipelines.md index fc335ac5d..5679c318e 100644 --- a/website/docs/usage/processing-pipelines.md +++ b/website/docs/usage/processing-pipelines.md @@ -401,7 +401,7 @@ import spacy from spacy.matcher import PhraseMatcher from spacy.tokens import Span -class EntityMatcher(object): +class EntityMatcher: name = "entity_matcher" def __init__(self, nlp, terms, label): @@ -683,7 +683,7 @@ to `Doc.user_span_hooks` and `Doc.user_token_hooks`. ```python ### Add custom similarity hooks -class SimilarityModel(object): +class SimilarityModel: def __init__(self, model): self._model = model diff --git a/website/docs/usage/rule-based-matching.md b/website/docs/usage/rule-based-matching.md index 392bcf0c0..14d394651 100644 --- a/website/docs/usage/rule-based-matching.md +++ b/website/docs/usage/rule-based-matching.md @@ -511,7 +511,7 @@ from spacy.tokens import Token # We're using a class because the component needs to be initialized with # the shared vocab via the nlp object -class BadHTMLMerger(object): +class BadHTMLMerger: def __init__(self, nlp): patterns = [ [{"ORTH": "<"}, {"LOWER": "br"}, {"ORTH": ">"}], diff --git a/website/docs/usage/saving-loading.md b/website/docs/usage/saving-loading.md index ac6b275d8..6d17ee7e3 100644 --- a/website/docs/usage/saving-loading.md +++ b/website/docs/usage/saving-loading.md @@ -193,7 +193,7 @@ add to that data and saves and loads the data to and from a JSON file. ```python ### {highlight="15-19,21-26"} -class CustomComponent(object): +class CustomComponent: name = "my_component" def __init__(self): @@ -345,7 +345,7 @@ snek = """ `'--....--'` """ -class SnekFactory(object): +class SnekFactory: def __init__(self, nlp, **cfg): self.nlp = nlp @@ -433,7 +433,7 @@ nlp = spacy.load("en_core_snek_sm", snek_style="cute") ```python SNEKS = {"basic": snek, "cute": cute_snek} # collection of sneks -class SnekFactory(object): +class SnekFactory: def __init__(self, nlp, **cfg): self.nlp = nlp self.snek_style = cfg.get("snek_style", "basic")