Merge pull request #6234 from svlandeg/fix/various

This commit is contained in:
Ines Montani 2020-10-09 17:45:17 +02:00 committed by GitHub
commit 0068bb4433
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 6 additions and 8 deletions

View File

@ -253,7 +253,7 @@ def _get_converter(msg, converter, input_path):
if converter == "auto":
converter = input_path.suffix[1:]
if converter == "ner" or converter == "iob":
with input_path.open() as file_:
with input_path.open(encoding="utf8") as file_:
input_data = file_.read()
converter_autodetect = autodetect_ner_format(input_data)
if converter_autodetect == "ner":

View File

@ -24,11 +24,11 @@ def build_simple_cnn_text_classifier(
"""
with Model.define_operators({">>": chain}):
if exclusive_classes:
output_layer = Softmax(nO=nO, nI=tok2vec.get_dim("nO"))
output_layer = Softmax(nO=nO, nI=tok2vec.maybe_get_dim("nO"))
model = tok2vec >> list2ragged() >> reduce_mean() >> output_layer
model.set_ref("output_layer", output_layer)
else:
linear_layer = Linear(nO=nO, nI=tok2vec.get_dim("nO"))
linear_layer = Linear(nO=nO, nI=tok2vec.maybe_get_dim("nO"))
model = (
tok2vec >> list2ragged() >> reduce_mean() >> linear_layer >> Logistic()
)

View File

@ -110,7 +110,7 @@ def MultiHashEmbed(
The features used can be configured with the 'attrs' argument. The suggested
attributes are NORM, PREFIX, SUFFIX and SHAPE. This lets the model take into
account some subword information, without construction a fully character-based
account some subword information, without constructing a fully character-based
representation. If pretrained vectors are available, they can be included in
the representation as well, with the vectors table will be kept static
(i.e. it's not updated).

View File

@ -622,7 +622,7 @@ def load_meta(path: Union[str, Path]) -> Dict[str, Any]:
if not path.parent.exists():
raise IOError(Errors.E052.format(path=path.parent))
if not path.exists() or not path.is_file():
raise IOError(Errors.E053.format(path=path, name="meta.json"))
raise IOError(Errors.E053.format(path=path.parent, name="meta.json"))
meta = srsly.read_json(path)
for setting in ["lang", "name", "version"]:
if setting not in meta or not meta[setting]:

View File

@ -516,9 +516,7 @@ Many neural network models are able to use word vector tables as additional
features, which sometimes results in significant improvements in accuracy.
spaCy's built-in embedding layer,
[MultiHashEmbed](/api/architectures#MultiHashEmbed), can be configured to use
word vector tables using the `include_static_vectors` flag. This setting is
also available on the [MultiHashEmbedCNN](/api/architectures#MultiHashEmbedCNN)
layer, which builds the default token-to-vector encoding architecture.
word vector tables using the `include_static_vectors` flag.
```ini
[tagger.model.tok2vec.embed]