Merge pull request #6234 from svlandeg/fix/various

2025-08-07 13:44:55 +03:00 · 2020-10-09 17:45:17 +02:00 · 2020-10-09 17:45:17 +02:00 · 0068bb4433
commit 0068bb4433
parent b7cb9d95e4 08cb085f6c
5 changed files with 6 additions and 8 deletions
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@ -253,7 +253,7 @@ def _get_converter(msg, converter, input_path):
    if converter == "auto":
        converter = input_path.suffix[1:]
    if converter == "ner" or converter == "iob":
-        with input_path.open() as file_:
+        with input_path.open(encoding="utf8") as file_:
            input_data = file_.read()
        converter_autodetect = autodetect_ner_format(input_data)
        if converter_autodetect == "ner":
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@ -24,11 +24,11 @@ def build_simple_cnn_text_classifier(
    """
    with Model.define_operators({">>": chain}):
        if exclusive_classes:
-            output_layer = Softmax(nO=nO, nI=tok2vec.get_dim("nO"))
+            output_layer = Softmax(nO=nO, nI=tok2vec.maybe_get_dim("nO"))
            model = tok2vec >> list2ragged() >> reduce_mean() >> output_layer
            model.set_ref("output_layer", output_layer)
        else:
-            linear_layer = Linear(nO=nO, nI=tok2vec.get_dim("nO"))
+            linear_layer = Linear(nO=nO, nI=tok2vec.maybe_get_dim("nO"))
            model = (
                tok2vec >> list2ragged() >> reduce_mean() >> linear_layer >> Logistic()
            )
--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@ -110,7 +110,7 @@ def MultiHashEmbed(

    The features used can be configured with the 'attrs' argument. The suggested
    attributes are NORM, PREFIX, SUFFIX and SHAPE. This lets the model take into
-    account some subword information, without construction a fully character-based
+    account some subword information, without constructing a fully character-based
    representation. If pretrained vectors are available, they can be included in
    the representation as well, with the vectors table will be kept static
    (i.e. it's not updated).
--- a/spacy/util.py
+++ b/spacy/util.py
@ -622,7 +622,7 @@ def load_meta(path: Union[str, Path]) -> Dict[str, Any]:
    if not path.parent.exists():
        raise IOError(Errors.E052.format(path=path.parent))
    if not path.exists() or not path.is_file():
-        raise IOError(Errors.E053.format(path=path, name="meta.json"))
+        raise IOError(Errors.E053.format(path=path.parent, name="meta.json"))
    meta = srsly.read_json(path)
    for setting in ["lang", "name", "version"]:
        if setting not in meta or not meta[setting]:
--- a/website/docs/usage/embeddings-transformers.md
+++ b/website/docs/usage/embeddings-transformers.md
@ -516,9 +516,7 @@ Many neural network models are able to use word vector tables as additional
 features, which sometimes results in significant improvements in accuracy.
 spaCy's built-in embedding layer,
 [MultiHashEmbed](/api/architectures#MultiHashEmbed), can be configured to use
-word vector tables using the `include_static_vectors` flag. This setting is
-also available on the [MultiHashEmbedCNN](/api/architectures#MultiHashEmbedCNN)
-layer, which builds the default token-to-vector encoding architecture.
+word vector tables using the `include_static_vectors` flag. 

 ```ini
 [tagger.model.tok2vec.embed]