From 64d90039a1ae42a1ecb77abe71622398d3bc289b Mon Sep 17 00:00:00 2001 From: svlandeg Date: Tue, 29 Sep 2020 10:54:42 +0200 Subject: [PATCH 1/7] encoding UTF8 --- spacy/cli/project/document.py | 2 +- website/docs/usage/training.md | 2 +- website/setup/jinja_to_js.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/spacy/cli/project/document.py b/spacy/cli/project/document.py index d0265029a..811b7c746 100644 --- a/spacy/cli/project/document.py +++ b/spacy/cli/project/document.py @@ -114,6 +114,6 @@ def project_document( content = f"{before}{content}{after}" else: msg.warn("Replacing existing file") - with output_file.open("w") as f: + with output_file.open("w", encoding="utf8") as f: f.write(content) msg.good("Saved project documentation", output_file) diff --git a/website/docs/usage/training.md b/website/docs/usage/training.md index eb02b135a..97992287b 100644 --- a/website/docs/usage/training.md +++ b/website/docs/usage/training.md @@ -700,7 +700,7 @@ from pathlib import Path @spacy.registry.loggers("my_custom_logger.v1") def custom_logger(log_path): def setup_logger(nlp: "Language") -> Tuple[Callable, Callable]: - with Path(log_path).open("w") as file_: + with Path(log_path).open("w", encoding="utf8") as file_: file_.write("step\\t") file_.write("score\\t") for pipe in nlp.pipe_names: diff --git a/website/setup/jinja_to_js.py b/website/setup/jinja_to_js.py index 114d0e172..e2eca7ffb 100644 --- a/website/setup/jinja_to_js.py +++ b/website/setup/jinja_to_js.py @@ -1256,7 +1256,7 @@ def main(template_path, output=None, data_path=None): data_str = f"export const DATA = {data}" result = compiler.get_output() if output is not None: - with output.open("w") as f: + with output.open("w", encoding="utf8") as f: f.write(f"{header}\n{result}\n{data_str}") print(f"Updated {output.parts[-1]}") else: From 512197293020cc5252e3af67a5a5123df099617e Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 1 Oct 2020 09:20:09 +0200 Subject: [PATCH 2/7] add types of Tok2Vec embedding layers --- spacy/ml/models/tok2vec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/ml/models/tok2vec.py b/spacy/ml/models/tok2vec.py index fec478e21..63e79bf95 100644 --- a/spacy/ml/models/tok2vec.py +++ b/spacy/ml/models/tok2vec.py @@ -93,7 +93,7 @@ def build_Tok2Vec_model( @registry.architectures.register("spacy.MultiHashEmbed.v1") def MultiHashEmbed( width: int, rows: int, also_embed_subwords: bool, also_use_static_vectors: bool -): +) -> Model[List[Doc], List[Floats2d]]: """Construct an embedding layer that separately embeds a number of lexical attributes using hash embedding, concatenates the results, and passes it through a feed-forward subnetwork to build a mixed representations. @@ -166,7 +166,7 @@ def MultiHashEmbed( @registry.architectures.register("spacy.CharacterEmbed.v1") def CharacterEmbed( width: int, rows: int, nM: int, nC: int, also_use_static_vectors: bool -): +) -> Model[List[Doc], List[Floats2d]]: """Construct an embedded representation based on character embeddings, using a feed-forward network. A fixed number of UTF-8 byte characters are used for each word, taken from the beginning and end of the word equally. Padding is From 6787e56315880a6d1049852a02a819cb8e3665df Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 1 Oct 2020 09:21:00 +0200 Subject: [PATCH 3/7] print debugging warning before raising error if model not properly initialized --- spacy/language.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spacy/language.py b/spacy/language.py index c1d2df026..f161b2877 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -970,7 +970,8 @@ class Language: raise ValueError(Errors.E003.format(component=type(proc), name=name)) try: doc = proc(doc, **component_cfg.get(name, {})) - except KeyError: + except KeyError as e: + warnings.warn(str(e)) raise ValueError(Errors.E109.format(name=name)) from None if doc is None: raise ValueError(Errors.E005.format(name=name)) From 1328c9fd1452fc16f42fb4ee6516e53ca055a4db Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 1 Oct 2020 16:59:22 +0200 Subject: [PATCH 4/7] consistently use --code instead of --code-path --- website/docs/api/cli.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md index a6cb41e5e..ade62e3db 100644 --- a/website/docs/api/cli.md +++ b/website/docs/api/cli.md @@ -256,7 +256,7 @@ fixed. To auto-fill a partial config and save the result, you can use the [`init fill-config`](/api/cli#init-fill-config) command. ```cli -$ python -m spacy debug config [config_path] [--code-path] [--show-functions] [--show-variables] [overrides] +$ python -m spacy debug config [config_path] [--code] [--show-functions] [--show-variables] [overrides] ``` > #### Example @@ -399,7 +399,7 @@ File /path/to/thinc/thinc/schedules.py (line 91) | Name | Description | | ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | `config_path` | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. ~~Path (positional)~~ | -| `--code-path`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ | +| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ | | `--show-functions`, `-F` | Show an overview of all registered function blocks used in the config and where those functions come from, including the module name, Python file and line number. ~~bool (flag)~~ | | `--show-variables`, `-V` | Show an overview of all variables referenced in the config, e.g. `${paths.train}` and their values that will be used. This also reflects any config overrides provided on the CLI, e.g. `--paths.train /path`. ~~bool (flag)~~ | | `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ | @@ -1162,7 +1162,7 @@ examples, see the usage guide on [integration](/usage/projects#ray). ```cli -$ python -m spacy ray train [config_path] [--code-path] [--output] [--n-workers] [--address] [--gpu-id] [--verbose] [overrides] +$ python -m spacy ray train [config_path] [--code] [--output] [--n-workers] [--address] [--gpu-id] [--verbose] [overrides] ``` > #### Example From acc391c2a841936b44e91a243f39ae864d661400 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Fri, 2 Oct 2020 11:05:59 +0200 Subject: [PATCH 5/7] remove redundant str() call --- spacy/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/util.py b/spacy/util.py index 1cc7abf57..d919b161e 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -1070,7 +1070,7 @@ def import_file(name: str, loc: Union[str, Path]) -> ModuleType: RETURNS: The loaded module. """ loc = str(loc) - spec = importlib.util.spec_from_file_location(name, str(loc)) + spec = importlib.util.spec_from_file_location(name, loc) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) return module From f758804401e288ee93561073ecee81f729a2b7a9 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 3 Oct 2020 11:41:28 +0200 Subject: [PATCH 6/7] Save one line of code --- spacy/util.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/spacy/util.py b/spacy/util.py index c43943ef7..4d68e829c 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -1065,8 +1065,7 @@ def import_file(name: str, loc: Union[str, Path]) -> ModuleType: loc (str / Path): Path to the file. RETURNS: The loaded module. """ - loc = str(loc) - spec = importlib.util.spec_from_file_location(name, loc) + spec = importlib.util.spec_from_file_location(name, str(loc)) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) return module From ae15c9de7971679df9bb60034d007530957205ad Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 3 Oct 2020 11:43:56 +0200 Subject: [PATCH 7/7] Raise error from caught KeyError to preserve traceback --- spacy/language.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/language.py b/spacy/language.py index 18c08258f..d76741da3 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -973,8 +973,8 @@ class Language: try: doc = proc(doc, **component_cfg.get(name, {})) except KeyError as e: - warnings.warn(str(e)) - raise ValueError(Errors.E109.format(name=name)) from None + # This typically happens if a component is not initialized + raise ValueError(Errors.E109.format(name=name)) from e if doc is None: raise ValueError(Errors.E005.format(name=name)) return doc