mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 18:56:36 +03:00
Merge pull request #6234 from svlandeg/fix/various
This commit is contained in:
commit
0068bb4433
|
@ -253,7 +253,7 @@ def _get_converter(msg, converter, input_path):
|
|||
if converter == "auto":
|
||||
converter = input_path.suffix[1:]
|
||||
if converter == "ner" or converter == "iob":
|
||||
with input_path.open() as file_:
|
||||
with input_path.open(encoding="utf8") as file_:
|
||||
input_data = file_.read()
|
||||
converter_autodetect = autodetect_ner_format(input_data)
|
||||
if converter_autodetect == "ner":
|
||||
|
|
|
@ -24,11 +24,11 @@ def build_simple_cnn_text_classifier(
|
|||
"""
|
||||
with Model.define_operators({">>": chain}):
|
||||
if exclusive_classes:
|
||||
output_layer = Softmax(nO=nO, nI=tok2vec.get_dim("nO"))
|
||||
output_layer = Softmax(nO=nO, nI=tok2vec.maybe_get_dim("nO"))
|
||||
model = tok2vec >> list2ragged() >> reduce_mean() >> output_layer
|
||||
model.set_ref("output_layer", output_layer)
|
||||
else:
|
||||
linear_layer = Linear(nO=nO, nI=tok2vec.get_dim("nO"))
|
||||
linear_layer = Linear(nO=nO, nI=tok2vec.maybe_get_dim("nO"))
|
||||
model = (
|
||||
tok2vec >> list2ragged() >> reduce_mean() >> linear_layer >> Logistic()
|
||||
)
|
||||
|
|
|
@ -110,7 +110,7 @@ def MultiHashEmbed(
|
|||
|
||||
The features used can be configured with the 'attrs' argument. The suggested
|
||||
attributes are NORM, PREFIX, SUFFIX and SHAPE. This lets the model take into
|
||||
account some subword information, without construction a fully character-based
|
||||
account some subword information, without constructing a fully character-based
|
||||
representation. If pretrained vectors are available, they can be included in
|
||||
the representation as well, with the vectors table will be kept static
|
||||
(i.e. it's not updated).
|
||||
|
|
|
@ -622,7 +622,7 @@ def load_meta(path: Union[str, Path]) -> Dict[str, Any]:
|
|||
if not path.parent.exists():
|
||||
raise IOError(Errors.E052.format(path=path.parent))
|
||||
if not path.exists() or not path.is_file():
|
||||
raise IOError(Errors.E053.format(path=path, name="meta.json"))
|
||||
raise IOError(Errors.E053.format(path=path.parent, name="meta.json"))
|
||||
meta = srsly.read_json(path)
|
||||
for setting in ["lang", "name", "version"]:
|
||||
if setting not in meta or not meta[setting]:
|
||||
|
|
|
@ -516,9 +516,7 @@ Many neural network models are able to use word vector tables as additional
|
|||
features, which sometimes results in significant improvements in accuracy.
|
||||
spaCy's built-in embedding layer,
|
||||
[MultiHashEmbed](/api/architectures#MultiHashEmbed), can be configured to use
|
||||
word vector tables using the `include_static_vectors` flag. This setting is
|
||||
also available on the [MultiHashEmbedCNN](/api/architectures#MultiHashEmbedCNN)
|
||||
layer, which builds the default token-to-vector encoding architecture.
|
||||
word vector tables using the `include_static_vectors` flag.
|
||||
|
||||
```ini
|
||||
[tagger.model.tok2vec.embed]
|
||||
|
|
Loading…
Reference in New Issue
Block a user