mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Merge branch 'master' into spacy.io
This commit is contained in:
		
						commit
						3cbbc4afcb
					
				|  | @ -383,6 +383,8 @@ class Errors(object): | ||||||
|     E133 = ("The sum of prior probabilities for alias '{alias}' should not exceed 1, " |     E133 = ("The sum of prior probabilities for alias '{alias}' should not exceed 1, " | ||||||
|             "but found {sum}.") |             "but found {sum}.") | ||||||
|     E134 = ("Alias '{alias}' defined for unknown entity '{entity}'.") |     E134 = ("Alias '{alias}' defined for unknown entity '{entity}'.") | ||||||
|  |     E135 = ("If you meant to replace a built-in component, use `create_pipe`: " | ||||||
|  |             "`nlp.replace_pipe('{name}', nlp.create_pipe('{name}'))`") | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @add_codes | @add_codes | ||||||
|  |  | ||||||
|  | @ -333,6 +333,11 @@ class Language(object): | ||||||
|         """ |         """ | ||||||
|         if name not in self.pipe_names: |         if name not in self.pipe_names: | ||||||
|             raise ValueError(Errors.E001.format(name=name, opts=self.pipe_names)) |             raise ValueError(Errors.E001.format(name=name, opts=self.pipe_names)) | ||||||
|  |         if not hasattr(component, "__call__"): | ||||||
|  |             msg = Errors.E003.format(component=repr(component), name=name) | ||||||
|  |             if isinstance(component, basestring_) and component in self.factories: | ||||||
|  |                 msg += Errors.E135.format(name=name) | ||||||
|  |             raise ValueError(msg) | ||||||
|         self.pipeline[self.pipe_names.index(name)] = (name, component) |         self.pipeline[self.pipe_names.index(name)] = (name, component) | ||||||
| 
 | 
 | ||||||
|     def rename_pipe(self, old_name, new_name): |     def rename_pipe(self, old_name, new_name): | ||||||
|  |  | ||||||
|  | @ -140,3 +140,28 @@ def test_underscore_mutable_defaults_dict(en_vocab): | ||||||
|     assert len(token1._.mutable) == 2 |     assert len(token1._.mutable) == 2 | ||||||
|     assert token1._.mutable["x"] == ["y"] |     assert token1._.mutable["x"] == ["y"] | ||||||
|     assert len(token2._.mutable) == 0 |     assert len(token2._.mutable) == 0 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_underscore_dir(en_vocab): | ||||||
|  |     """Test that dir() correctly returns extension attributes. This enables | ||||||
|  |     things like tab-completion for the attributes in doc._.""" | ||||||
|  |     Doc.set_extension("test_dir", default=None) | ||||||
|  |     doc = Doc(en_vocab, words=["hello", "world"]) | ||||||
|  |     assert "_" in dir(doc) | ||||||
|  |     assert "test_dir" in dir(doc._) | ||||||
|  |     assert "test_dir" not in dir(doc[0]._) | ||||||
|  |     assert "test_dir" not in dir(doc[0:2]._) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_underscore_docstring(en_vocab): | ||||||
|  |     """Test that docstrings are available for extension methods, even though | ||||||
|  |     they're partials.""" | ||||||
|  | 
 | ||||||
|  |     def test_method(doc, arg1=1, arg2=2): | ||||||
|  |         """I am a docstring""" | ||||||
|  |         return (arg1, arg2) | ||||||
|  | 
 | ||||||
|  |     Doc.set_extension("test_docstrings", method=test_method) | ||||||
|  |     doc = Doc(en_vocab, words=["hello", "world"]) | ||||||
|  |     assert test_method.__doc__ == "I am a docstring" | ||||||
|  |     assert doc._.test_docstrings.__doc__.rsplit(". ")[-1] == "I am a docstring" | ||||||
|  |  | ||||||
|  | @ -52,11 +52,13 @@ def test_get_pipe(nlp, name): | ||||||
|     assert nlp.get_pipe(name) == new_pipe |     assert nlp.get_pipe(name) == new_pipe | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @pytest.mark.parametrize("name,replacement", [("my_component", lambda doc: doc)]) | @pytest.mark.parametrize("name,replacement,not_callable", [("my_component", lambda doc: doc, {})]) | ||||||
| def test_replace_pipe(nlp, name, replacement): | def test_replace_pipe(nlp, name, replacement, not_callable): | ||||||
|     with pytest.raises(ValueError): |     with pytest.raises(ValueError): | ||||||
|         nlp.replace_pipe(name, new_pipe) |         nlp.replace_pipe(name, new_pipe) | ||||||
|     nlp.add_pipe(new_pipe, name=name) |     nlp.add_pipe(new_pipe, name=name) | ||||||
|  |     with pytest.raises(ValueError): | ||||||
|  |         nlp.replace_pipe(name, not_callable) | ||||||
|     nlp.replace_pipe(name, replacement) |     nlp.replace_pipe(name, replacement) | ||||||
|     assert nlp.get_pipe(name) != new_pipe |     assert nlp.get_pipe(name) != new_pipe | ||||||
|     assert nlp.get_pipe(name) == replacement |     assert nlp.get_pipe(name) == replacement | ||||||
|  |  | ||||||
|  | @ -25,6 +25,11 @@ class Underscore(object): | ||||||
|         object.__setattr__(self, "_start", start) |         object.__setattr__(self, "_start", start) | ||||||
|         object.__setattr__(self, "_end", end) |         object.__setattr__(self, "_end", end) | ||||||
| 
 | 
 | ||||||
|  |     def __dir__(self): | ||||||
|  |         # Hack to enable autocomplete on custom extensions | ||||||
|  |         extensions = list(self._extensions.keys()) | ||||||
|  |         return ["set", "get", "has"] + extensions | ||||||
|  | 
 | ||||||
|     def __getattr__(self, name): |     def __getattr__(self, name): | ||||||
|         if name not in self._extensions: |         if name not in self._extensions: | ||||||
|             raise AttributeError(Errors.E046.format(name=name)) |             raise AttributeError(Errors.E046.format(name=name)) | ||||||
|  | @ -32,7 +37,16 @@ class Underscore(object): | ||||||
|         if getter is not None: |         if getter is not None: | ||||||
|             return getter(self._obj) |             return getter(self._obj) | ||||||
|         elif method is not None: |         elif method is not None: | ||||||
|             return functools.partial(method, self._obj) |             method_partial = functools.partial(method, self._obj) | ||||||
|  |             # Hack to port over docstrings of the original function | ||||||
|  |             # See https://stackoverflow.com/q/27362727/6400719 | ||||||
|  |             method_docstring = method.__doc__ or "" | ||||||
|  |             method_docstring_prefix = ( | ||||||
|  |                 "This method is a partial function and its first argument " | ||||||
|  |                 "(the object it's called on) will be filled automatically. " | ||||||
|  |             ) | ||||||
|  |             method_partial.__doc__ = method_docstring_prefix + method_docstring | ||||||
|  |             return method_partial | ||||||
|         else: |         else: | ||||||
|             key = self._get_key(name) |             key = self._get_key(name) | ||||||
|             if key in self._doc.user_data: |             if key in self._doc.user_data: | ||||||
|  |  | ||||||
|  | @ -128,7 +128,6 @@ The L2 norm of the lexeme's vector representation. | ||||||
| | `text`                                       | unicode | Verbatim text content.                                                                                       | | | `text`                                       | unicode | Verbatim text content.                                                                                       | | ||||||
| | `orth`                                       | int     | ID of the verbatim text content.                                                                             | | | `orth`                                       | int     | ID of the verbatim text content.                                                                             | | ||||||
| | `orth_`                                      | unicode | Verbatim text content (identical to `Lexeme.text`). Exists mostly for consistency with the other attributes. | | | `orth_`                                      | unicode | Verbatim text content (identical to `Lexeme.text`). Exists mostly for consistency with the other attributes. | | ||||||
| | `lex_id`                                     | int     | ID of the lexeme's lexical type.                                                                             | |  | ||||||
| | `rank`                                       | int     | Sequential ID of the lexemes's lexical type, used to index into tables, e.g. for word vectors.               | | | `rank`                                       | int     | Sequential ID of the lexemes's lexical type, used to index into tables, e.g. for word vectors.               | | ||||||
| | `flags`                                      | int     | Container of the lexeme's binary flags.                                                                      | | | `flags`                                      | int     | Container of the lexeme's binary flags.                                                                      | | ||||||
| | `norm`                                       | int     | The lexemes's norm, i.e. a normalized form of the lexeme text.                                               | | | `norm`                                       | int     | The lexemes's norm, i.e. a normalized form of the lexeme text.                                               | | ||||||
|  |  | ||||||
|  | @ -468,7 +468,7 @@ The L2 norm of the token's vector representation. | ||||||
| | `prob`                                       | float        | Smoothed log probability estimate of token's word type (context-independent entry in the vocabulary).                                                                                                                         | | | `prob`                                       | float        | Smoothed log probability estimate of token's word type (context-independent entry in the vocabulary).                                                                                                                         | | ||||||
| | `idx`                                        | int          | The character offset of the token within the parent document.                                                                                                                                                                 | | | `idx`                                        | int          | The character offset of the token within the parent document.                                                                                                                                                                 | | ||||||
| | `sentiment`                                  | float        | A scalar value indicating the positivity or negativity of the token.                                                                                                                                                          | | | `sentiment`                                  | float        | A scalar value indicating the positivity or negativity of the token.                                                                                                                                                          | | ||||||
| | `lex_id`                                     | int          | Sequential ID of the token's lexical type.                                                                                                                                                                                    | | | `lex_id`                                     | int          | Sequential ID of the token's lexical type, used to index into tables, e.g. for word vectors.                                                                                                                                  | | ||||||
| | `rank`                                       | int          | Sequential ID of the token's lexical type, used to index into tables, e.g. for word vectors.                                                                                                                                  | | | `rank`                                       | int          | Sequential ID of the token's lexical type, used to index into tables, e.g. for word vectors.                                                                                                                                  | | ||||||
| | `cluster`                                    | int          | Brown cluster ID.                                                                                                                                                                                                             | | | `cluster`                                    | int          | Brown cluster ID.                                                                                                                                                                                                             | | ||||||
| | `_`                                          | `Underscore` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes).                                                                                                                | | | `_`                                          | `Underscore` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes).                                                                                                                | | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user