mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Remove object subclassing
This commit is contained in:
parent
11bbc82c24
commit
5f6f4ff594
|
@ -312,7 +312,7 @@ def create_pretraining_model(nlp, tok2vec, pretrain_config):
|
|||
return model
|
||||
|
||||
|
||||
class ProgressTracker(object):
|
||||
class ProgressTracker:
|
||||
def __init__(self, frequency=1000000):
|
||||
self.loss = 0.0
|
||||
self.prev_loss = 0.0
|
||||
|
|
|
@ -16,7 +16,7 @@ DEFAULT_LANG = "en"
|
|||
DEFAULT_DIR = "ltr"
|
||||
|
||||
|
||||
class DependencyRenderer(object):
|
||||
class DependencyRenderer:
|
||||
"""Render dependency parses as SVGs."""
|
||||
|
||||
style = "dep"
|
||||
|
@ -224,7 +224,7 @@ class DependencyRenderer(object):
|
|||
return sorted(list(levels))
|
||||
|
||||
|
||||
class EntityRenderer(object):
|
||||
class EntityRenderer:
|
||||
"""Render named entities as HTML."""
|
||||
|
||||
style = "ent"
|
||||
|
|
|
@ -15,7 +15,7 @@ def add_codes(err_cls):
|
|||
# fmt: off
|
||||
|
||||
@add_codes
|
||||
class Warnings(object):
|
||||
class Warnings:
|
||||
W004 = ("No text fixing enabled. Run `pip install ftfy` to enable fixing "
|
||||
"using ftfy.fix_text if necessary.")
|
||||
W005 = ("Doc object not parsed. This means displaCy won't be able to "
|
||||
|
@ -118,7 +118,7 @@ class Warnings(object):
|
|||
|
||||
|
||||
@add_codes
|
||||
class Errors(object):
|
||||
class Errors:
|
||||
E001 = ("No component '{name}' found in pipeline. Available names: {opts}")
|
||||
E002 = ("Can't find factory for '{name}'. This usually happens when spaCy "
|
||||
"calls `nlp.create_pipe` with a component name that's not built "
|
||||
|
@ -538,7 +538,7 @@ class Errors(object):
|
|||
|
||||
|
||||
@add_codes
|
||||
class TempErrors(object):
|
||||
class TempErrors:
|
||||
T003 = ("Resizing pretrained Tagger models is not currently supported.")
|
||||
T007 = ("Can't yet set {attr} from Span. Vote for this feature on the "
|
||||
"issue tracker: http://github.com/explosion/spaCy/issues")
|
||||
|
|
|
@ -39,7 +39,7 @@ from . import about
|
|||
ENABLE_PIPELINE_ANALYSIS = False
|
||||
|
||||
|
||||
class BaseDefaults(object):
|
||||
class BaseDefaults:
|
||||
@classmethod
|
||||
def create_lemmatizer(cls, nlp=None, lookups=None):
|
||||
if lookups is None:
|
||||
|
@ -126,7 +126,7 @@ class BaseDefaults(object):
|
|||
paired_orth_variants = []
|
||||
|
||||
|
||||
class Language(object):
|
||||
class Language:
|
||||
"""A text-processing pipeline. Usually you'll load this once per process,
|
||||
and pass the instance around your application.
|
||||
|
||||
|
@ -531,10 +531,16 @@ class Language(object):
|
|||
if len(examples) == 0:
|
||||
return losses
|
||||
if not isinstance(examples, Iterable):
|
||||
raise TypeError(Errors.E978.format(name="language", method="update", types=type(examples)))
|
||||
raise TypeError(
|
||||
Errors.E978.format(
|
||||
name="language", method="update", types=type(examples)
|
||||
)
|
||||
)
|
||||
wrong_types = set([type(eg) for eg in examples if not isinstance(eg, Example)])
|
||||
if wrong_types:
|
||||
raise TypeError(Errors.E978.format(name="language", method="update", types=wrong_types))
|
||||
raise TypeError(
|
||||
Errors.E978.format(name="language", method="update", types=wrong_types)
|
||||
)
|
||||
|
||||
if sgd is None:
|
||||
if self._optimizer is None:
|
||||
|
@ -580,10 +586,18 @@ class Language(object):
|
|||
if len(examples) == 0:
|
||||
return
|
||||
if not isinstance(examples, Iterable):
|
||||
raise TypeError(Errors.E978.format(name="language", method="rehearse", types=type(examples)))
|
||||
raise TypeError(
|
||||
Errors.E978.format(
|
||||
name="language", method="rehearse", types=type(examples)
|
||||
)
|
||||
)
|
||||
wrong_types = set([type(eg) for eg in examples if not isinstance(eg, Example)])
|
||||
if wrong_types:
|
||||
raise TypeError(Errors.E978.format(name="language", method="rehearse", types=wrong_types))
|
||||
raise TypeError(
|
||||
Errors.E978.format(
|
||||
name="language", method="rehearse", types=wrong_types
|
||||
)
|
||||
)
|
||||
if sgd is None:
|
||||
if self._optimizer is None:
|
||||
self._optimizer = create_default_optimizer()
|
||||
|
@ -692,10 +706,18 @@ class Language(object):
|
|||
DOCS: https://spacy.io/api/language#evaluate
|
||||
"""
|
||||
if not isinstance(examples, Iterable):
|
||||
raise TypeError(Errors.E978.format(name="language", method="evaluate", types=type(examples)))
|
||||
raise TypeError(
|
||||
Errors.E978.format(
|
||||
name="language", method="evaluate", types=type(examples)
|
||||
)
|
||||
)
|
||||
wrong_types = set([type(eg) for eg in examples if not isinstance(eg, Example)])
|
||||
if wrong_types:
|
||||
raise TypeError(Errors.E978.format(name="language", method="evaluate", types=wrong_types))
|
||||
raise TypeError(
|
||||
Errors.E978.format(
|
||||
name="language", method="evaluate", types=wrong_types
|
||||
)
|
||||
)
|
||||
if scorer is None:
|
||||
scorer = Scorer(pipeline=self.pipeline)
|
||||
if component_cfg is None:
|
||||
|
@ -1043,7 +1065,7 @@ class Language(object):
|
|||
return self
|
||||
|
||||
|
||||
class component(object):
|
||||
class component:
|
||||
"""Decorator for pipeline components. Can decorate both function components
|
||||
and class components and will automatically register components in the
|
||||
Language.factories. If the component is a class and needs access to the
|
||||
|
|
|
@ -2,7 +2,7 @@ from .errors import Errors
|
|||
from .parts_of_speech import NAMES as UPOS_NAMES
|
||||
|
||||
|
||||
class Lemmatizer(object):
|
||||
class Lemmatizer:
|
||||
"""
|
||||
The Lemmatizer supports simple part-of-speech-sensitive suffix rules and
|
||||
lookup tables.
|
||||
|
|
|
@ -10,7 +10,7 @@ from .strings import get_string_id
|
|||
UNSET = object()
|
||||
|
||||
|
||||
class Lookups(object):
|
||||
class Lookups:
|
||||
"""Container for large lookup tables and dictionaries, e.g. lemmatization
|
||||
data or tokenizer exception lists. Lookups are available via vocab.lookups,
|
||||
so they can be accessed before the pipeline components are applied (e.g.
|
||||
|
|
|
@ -690,7 +690,7 @@ def _get_attr_values(spec, string_store):
|
|||
# These predicate helper classes are used to match the REGEX, IN, >= etc
|
||||
# extensions to the matcher introduced in #3173.
|
||||
|
||||
class _RegexPredicate(object):
|
||||
class _RegexPredicate:
|
||||
operators = ("REGEX",)
|
||||
|
||||
def __init__(self, i, attr, value, predicate, is_extension=False):
|
||||
|
@ -711,7 +711,7 @@ class _RegexPredicate(object):
|
|||
return bool(self.value.search(value))
|
||||
|
||||
|
||||
class _SetMemberPredicate(object):
|
||||
class _SetMemberPredicate:
|
||||
operators = ("IN", "NOT_IN")
|
||||
|
||||
def __init__(self, i, attr, value, predicate, is_extension=False):
|
||||
|
@ -738,7 +738,7 @@ class _SetMemberPredicate(object):
|
|||
return repr(("SetMemberPredicate", self.i, self.attr, self.value, self.predicate))
|
||||
|
||||
|
||||
class _ComparisonPredicate(object):
|
||||
class _ComparisonPredicate:
|
||||
operators = ("==", "!=", ">=", "<=", ">", "<")
|
||||
|
||||
def __init__(self, i, attr, value, predicate, is_extension=False):
|
||||
|
|
|
@ -94,7 +94,7 @@ def build_masked_language_model(vocab, wrapped_model, mask_prob=0.15):
|
|||
return mlm_model
|
||||
|
||||
|
||||
class _RandomWords(object):
|
||||
class _RandomWords:
|
||||
def __init__(self, vocab):
|
||||
self.words = [lex.text for lex in vocab if lex.prob != 0.0]
|
||||
self.probs = [lex.prob for lex in vocab if lex.prob != 0.0]
|
||||
|
|
|
@ -11,7 +11,7 @@ DEFAULT_ENT_ID_SEP = "||"
|
|||
|
||||
|
||||
@component("entity_ruler", assigns=["doc.ents", "token.ent_type", "token.ent_iob"])
|
||||
class EntityRuler(object):
|
||||
class EntityRuler:
|
||||
"""The EntityRuler lets you add spans to the `Doc.ents` using token-based
|
||||
rules or exact phrase matches. It can be combined with the statistical
|
||||
`EntityRecognizer` to boost accuracy, or used on its own to implement a
|
||||
|
|
|
@ -6,7 +6,7 @@ from ..util import link_vectors_to_models
|
|||
|
||||
|
||||
@component("sentencizer_hook", assigns=["doc.user_hooks"])
|
||||
class SentenceSegmenter(object):
|
||||
class SentenceSegmenter:
|
||||
"""A simple spaCy hook, to allow custom sentence boundary detection logic
|
||||
(that doesn't require the dependency parse). To change the sentence
|
||||
boundary detection strategy, pass a generator function `strategy` on
|
||||
|
|
|
@ -35,7 +35,7 @@ def _load_cfg(path):
|
|||
return {}
|
||||
|
||||
|
||||
class Pipe(object):
|
||||
class Pipe:
|
||||
"""This class is not instantiated directly. Components inherit from it, and
|
||||
it defines the interface that components should follow to function as
|
||||
components in a spaCy analysis pipeline.
|
||||
|
|
|
@ -3,7 +3,7 @@ import numpy as np
|
|||
from .errors import Errors
|
||||
|
||||
|
||||
class PRFScore(object):
|
||||
class PRFScore:
|
||||
"""
|
||||
A precision / recall / F score
|
||||
"""
|
||||
|
@ -33,7 +33,7 @@ class PRFScore(object):
|
|||
return 2 * ((p * r) / (p + r + 1e-100))
|
||||
|
||||
|
||||
class ROCAUCScore(object):
|
||||
class ROCAUCScore:
|
||||
"""
|
||||
An AUC ROC score.
|
||||
"""
|
||||
|
@ -62,7 +62,7 @@ class ROCAUCScore(object):
|
|||
return self.saved_score
|
||||
|
||||
|
||||
class Scorer(object):
|
||||
class Scorer:
|
||||
"""Compute evaluation scores."""
|
||||
|
||||
def __init__(self, eval_punct=False, pipeline=None):
|
||||
|
|
|
@ -378,7 +378,7 @@ def test_ner_warns_no_lookups():
|
|||
assert not record.list
|
||||
|
||||
|
||||
class BlockerComponent1(object):
|
||||
class BlockerComponent1:
|
||||
name = "my_blocker"
|
||||
|
||||
def __init__(self, start, end):
|
||||
|
|
|
@ -20,7 +20,7 @@ def test_component_decorator_function():
|
|||
|
||||
def test_component_decorator_class():
|
||||
@component(name="test")
|
||||
class TestComponent(object):
|
||||
class TestComponent:
|
||||
"""docstring1"""
|
||||
|
||||
foo = "bar"
|
||||
|
@ -97,7 +97,7 @@ def test_component_factories_from_nlp():
|
|||
"""Test that class components can implement a from_nlp classmethod that
|
||||
gives them access to the nlp object and config via the factory."""
|
||||
|
||||
class TestComponent5(object):
|
||||
class TestComponent5:
|
||||
def __call__(self, doc):
|
||||
return doc
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ from spacy.tests.util import make_tempdir
|
|||
|
||||
|
||||
def test_issue5137():
|
||||
class MyComponent(object):
|
||||
class MyComponent:
|
||||
name = "my_component"
|
||||
|
||||
def __init__(self, nlp, **cfg):
|
||||
|
|
|
@ -24,7 +24,7 @@ def vectors():
|
|||
|
||||
def custom_pipe():
|
||||
# create dummy pipe partially implementing interface -- only want to test to_disk
|
||||
class SerializableDummy(object):
|
||||
class SerializableDummy:
|
||||
def __init__(self, **cfg):
|
||||
if cfg:
|
||||
self.cfg = cfg
|
||||
|
|
|
@ -6,7 +6,7 @@ from spacy.errors import add_codes
|
|||
|
||||
|
||||
@add_codes
|
||||
class Errors(object):
|
||||
class Errors:
|
||||
E001 = "error description"
|
||||
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ ALL_ATTRS = ("ORTH", "TAG", "HEAD", "DEP", "ENT_IOB", "ENT_TYPE", "ENT_KB_ID", "
|
|||
# fmt: on
|
||||
|
||||
|
||||
class DocBin(object):
|
||||
class DocBin:
|
||||
"""Pack Doc objects for binary serialization.
|
||||
|
||||
The DocBin class lets you efficiently serialize the information from a
|
||||
|
|
|
@ -4,7 +4,7 @@ import copy
|
|||
from ..errors import Errors
|
||||
|
||||
|
||||
class Underscore(object):
|
||||
class Underscore:
|
||||
mutable_types = (dict, list, set)
|
||||
doc_extensions = {}
|
||||
span_extensions = {}
|
||||
|
|
|
@ -997,7 +997,7 @@ class SimpleFrozenDict(dict):
|
|||
raise NotImplementedError(Errors.E095)
|
||||
|
||||
|
||||
class DummyTokenizer(object):
|
||||
class DummyTokenizer:
|
||||
# add dummy methods for to_bytes, from_bytes, to_disk and from_disk to
|
||||
# allow serialization (see #1557)
|
||||
def to_bytes(self, **kwargs):
|
||||
|
|
|
@ -18,7 +18,7 @@ def unpickle_vectors(bytes_data):
|
|||
return Vectors().from_bytes(bytes_data)
|
||||
|
||||
|
||||
class GlobalRegistry(object):
|
||||
class GlobalRegistry:
|
||||
"""Global store of vectors, to avoid repeatedly loading the data."""
|
||||
data = {}
|
||||
|
||||
|
|
|
@ -1004,7 +1004,7 @@ object. Let's say we have the following class as our tokenizer:
|
|||
import spacy
|
||||
from spacy.tokens import Doc
|
||||
|
||||
class WhitespaceTokenizer(object):
|
||||
class WhitespaceTokenizer:
|
||||
def __init__(self, vocab):
|
||||
self.vocab = vocab
|
||||
|
||||
|
|
|
@ -401,7 +401,7 @@ import spacy
|
|||
from spacy.matcher import PhraseMatcher
|
||||
from spacy.tokens import Span
|
||||
|
||||
class EntityMatcher(object):
|
||||
class EntityMatcher:
|
||||
name = "entity_matcher"
|
||||
|
||||
def __init__(self, nlp, terms, label):
|
||||
|
@ -683,7 +683,7 @@ to `Doc.user_span_hooks` and `Doc.user_token_hooks`.
|
|||
|
||||
```python
|
||||
### Add custom similarity hooks
|
||||
class SimilarityModel(object):
|
||||
class SimilarityModel:
|
||||
def __init__(self, model):
|
||||
self._model = model
|
||||
|
||||
|
|
|
@ -511,7 +511,7 @@ from spacy.tokens import Token
|
|||
|
||||
# We're using a class because the component needs to be initialized with
|
||||
# the shared vocab via the nlp object
|
||||
class BadHTMLMerger(object):
|
||||
class BadHTMLMerger:
|
||||
def __init__(self, nlp):
|
||||
patterns = [
|
||||
[{"ORTH": "<"}, {"LOWER": "br"}, {"ORTH": ">"}],
|
||||
|
|
|
@ -193,7 +193,7 @@ add to that data and saves and loads the data to and from a JSON file.
|
|||
|
||||
```python
|
||||
### {highlight="15-19,21-26"}
|
||||
class CustomComponent(object):
|
||||
class CustomComponent:
|
||||
name = "my_component"
|
||||
|
||||
def __init__(self):
|
||||
|
@ -345,7 +345,7 @@ snek = """
|
|||
`'--....--'`
|
||||
"""
|
||||
|
||||
class SnekFactory(object):
|
||||
class SnekFactory:
|
||||
def __init__(self, nlp, **cfg):
|
||||
self.nlp = nlp
|
||||
|
||||
|
@ -433,7 +433,7 @@ nlp = spacy.load("en_core_snek_sm", snek_style="cute")
|
|||
```python
|
||||
SNEKS = {"basic": snek, "cute": cute_snek} # collection of sneks
|
||||
|
||||
class SnekFactory(object):
|
||||
class SnekFactory:
|
||||
def __init__(self, nlp, **cfg):
|
||||
self.nlp = nlp
|
||||
self.snek_style = cfg.get("snek_style", "basic")
|
||||
|
|
Loading…
Reference in New Issue
Block a user