2017-10-10 03:21:28 +03:00
|
|
|
import functools
|
2019-03-11 14:50:44 +03:00
|
|
|
import copy
|
2017-10-10 03:21:28 +03:00
|
|
|
|
2018-04-03 16:50:31 +03:00
|
|
|
from ..errors import Errors
|
|
|
|
|
2017-10-27 15:39:09 +03:00
|
|
|
|
2017-10-07 19:56:01 +03:00
|
|
|
class Underscore(object):
|
2019-03-11 14:50:44 +03:00
|
|
|
mutable_types = (dict, list, set)
|
2017-10-07 19:00:14 +03:00
|
|
|
doc_extensions = {}
|
|
|
|
span_extensions = {}
|
|
|
|
token_extensions = {}
|
|
|
|
|
2017-10-07 19:56:01 +03:00
|
|
|
def __init__(self, extensions, obj, start=None, end=None):
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 19:03:03 +03:00
|
|
|
object.__setattr__(self, "_extensions", extensions)
|
|
|
|
object.__setattr__(self, "_obj", obj)
|
2017-10-07 19:00:14 +03:00
|
|
|
# Assumption is that for doc values, _start and _end will both be None
|
|
|
|
# Span will set non-None values for _start and _end
|
|
|
|
# Token will have _start be non-None, _end be None
|
|
|
|
# This lets us key everything into the doc.user_data dictionary,
|
|
|
|
# (see _get_key), and lets us use a single Underscore class.
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 19:03:03 +03:00
|
|
|
object.__setattr__(self, "_doc", obj.doc)
|
|
|
|
object.__setattr__(self, "_start", start)
|
|
|
|
object.__setattr__(self, "_end", end)
|
2017-10-07 19:00:14 +03:00
|
|
|
|
2019-05-12 01:53:11 +03:00
|
|
|
def __dir__(self):
|
|
|
|
# Hack to enable autocomplete on custom extensions
|
|
|
|
extensions = list(self._extensions.keys())
|
|
|
|
return ["set", "get", "has"] + extensions
|
|
|
|
|
2017-10-07 19:00:14 +03:00
|
|
|
def __getattr__(self, name):
|
2017-10-07 19:56:01 +03:00
|
|
|
if name not in self._extensions:
|
2018-04-03 16:50:31 +03:00
|
|
|
raise AttributeError(Errors.E046.format(name=name))
|
2017-10-07 19:56:01 +03:00
|
|
|
default, method, getter, setter = self._extensions[name]
|
2017-10-07 19:00:14 +03:00
|
|
|
if getter is not None:
|
|
|
|
return getter(self._obj)
|
|
|
|
elif method is not None:
|
2019-05-12 01:53:11 +03:00
|
|
|
method_partial = functools.partial(method, self._obj)
|
|
|
|
# Hack to port over docstrings of the original function
|
|
|
|
# See https://stackoverflow.com/q/27362727/6400719
|
|
|
|
method_docstring = method.__doc__ or ""
|
|
|
|
method_docstring_prefix = (
|
|
|
|
"This method is a partial function and its first argument "
|
|
|
|
"(the object it's called on) will be filled automatically. "
|
|
|
|
)
|
|
|
|
method_partial.__doc__ = method_docstring_prefix + method_docstring
|
|
|
|
return method_partial
|
2017-10-07 19:00:14 +03:00
|
|
|
else:
|
2019-03-11 14:50:44 +03:00
|
|
|
key = self._get_key(name)
|
|
|
|
if key in self._doc.user_data:
|
|
|
|
return self._doc.user_data[key]
|
|
|
|
elif isinstance(default, self.mutable_types):
|
|
|
|
# Handle mutable default arguments (see #2581)
|
|
|
|
new_default = copy.copy(default)
|
|
|
|
self.__setattr__(name, new_default)
|
|
|
|
return new_default
|
|
|
|
return default
|
2017-10-07 19:00:14 +03:00
|
|
|
|
|
|
|
def __setattr__(self, name, value):
|
2017-10-07 19:56:01 +03:00
|
|
|
if name not in self._extensions:
|
2018-04-03 16:50:31 +03:00
|
|
|
raise AttributeError(Errors.E047.format(name=name))
|
2017-10-07 19:56:01 +03:00
|
|
|
default, method, getter, setter = self._extensions[name]
|
2017-10-07 19:00:14 +03:00
|
|
|
if setter is not None:
|
|
|
|
return setter(self._obj, value)
|
|
|
|
else:
|
|
|
|
self._doc.user_data[self._get_key(name)] = value
|
|
|
|
|
2017-10-10 05:14:35 +03:00
|
|
|
def set(self, name, value):
|
|
|
|
return self.__setattr__(name, value)
|
|
|
|
|
|
|
|
def get(self, name):
|
|
|
|
return self.__getattr__(name)
|
|
|
|
|
|
|
|
def has(self, name):
|
|
|
|
return name in self._extensions
|
|
|
|
|
2017-10-07 19:00:14 +03:00
|
|
|
def _get_key(self, name):
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 19:03:03 +03:00
|
|
|
return ("._.", name, self._start, self._end)
|
2018-04-03 19:30:17 +03:00
|
|
|
|
2020-02-12 13:50:42 +03:00
|
|
|
@classmethod
|
|
|
|
def get_state(cls):
|
|
|
|
return cls.token_extensions, cls.span_extensions, cls.doc_extensions
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def load_state(cls, state):
|
|
|
|
cls.token_extensions, cls.span_extensions, cls.doc_extensions = state
|
|
|
|
|
2018-04-03 19:30:17 +03:00
|
|
|
|
|
|
|
def get_ext_args(**kwargs):
|
|
|
|
"""Validate and convert arguments. Reused in Doc, Token and Span."""
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 19:03:03 +03:00
|
|
|
default = kwargs.get("default")
|
|
|
|
getter = kwargs.get("getter")
|
|
|
|
setter = kwargs.get("setter")
|
|
|
|
method = kwargs.get("method")
|
2018-04-03 19:30:17 +03:00
|
|
|
if getter is None and setter is not None:
|
|
|
|
raise ValueError(Errors.E089)
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 19:03:03 +03:00
|
|
|
valid_opts = ("default" in kwargs, method is not None, getter is not None)
|
2018-04-03 19:30:17 +03:00
|
|
|
nr_defined = sum(t is True for t in valid_opts)
|
|
|
|
if nr_defined != 1:
|
|
|
|
raise ValueError(Errors.E083.format(nr_defined=nr_defined))
|
💫 Tidy up and auto-format .py files (#2983)
<!--- Provide a general summary of your changes in the title. -->
## Description
- [x] Use [`black`](https://github.com/ambv/black) to auto-format all `.py` files.
- [x] Update flake8 config to exclude very large files (lemmatization tables etc.)
- [x] Update code to be compatible with flake8 rules
- [x] Fix various small bugs, inconsistencies and messy stuff in the language data
- [x] Update docs to explain new code style (`black`, `flake8`, when to use `# fmt: off` and `# fmt: on` and what `# noqa` means)
Once #2932 is merged, which auto-formats and tidies up the CLI, we'll be able to run `flake8 spacy` actually get meaningful results.
At the moment, the code style and linting isn't applied automatically, but I'm hoping that the new [GitHub Actions](https://github.com/features/actions) will let us auto-format pull requests and post comments with relevant linting information.
### Types of change
enhancement, code style
## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.
2018-11-30 19:03:03 +03:00
|
|
|
if setter is not None and not hasattr(setter, "__call__"):
|
|
|
|
raise ValueError(Errors.E091.format(name="setter", value=repr(setter)))
|
|
|
|
if getter is not None and not hasattr(getter, "__call__"):
|
|
|
|
raise ValueError(Errors.E091.format(name="getter", value=repr(getter)))
|
|
|
|
if method is not None and not hasattr(method, "__call__"):
|
|
|
|
raise ValueError(Errors.E091.format(name="method", value=repr(method)))
|
2018-04-03 19:30:17 +03:00
|
|
|
return (default, method, getter, setter)
|
2019-02-24 20:38:47 +03:00
|
|
|
|
|
|
|
|
|
|
|
def is_writable_attr(ext):
|
|
|
|
"""Check if an extension attribute is writable.
|
|
|
|
ext (tuple): The (default, getter, setter, method) tuple available via
|
|
|
|
{Doc,Span,Token}.get_extension.
|
|
|
|
RETURNS (bool): Whether the attribute is writable.
|
|
|
|
"""
|
|
|
|
default, method, getter, setter = ext
|
|
|
|
# Extension is writable if it has a setter (getter + setter), if it has a
|
|
|
|
# default value (or, if its default value is none, none of the other values
|
|
|
|
# should be set).
|
|
|
|
if setter is not None or default is not None or all(e is None for e in ext):
|
|
|
|
return True
|
|
|
|
return False
|