mirror of
https://github.com/explosion/spaCy.git
synced 2025-05-03 15:23:41 +03:00
Merge pull request #6234 from svlandeg/fix/various
This commit is contained in:
commit
0068bb4433
|
@ -253,7 +253,7 @@ def _get_converter(msg, converter, input_path):
|
||||||
if converter == "auto":
|
if converter == "auto":
|
||||||
converter = input_path.suffix[1:]
|
converter = input_path.suffix[1:]
|
||||||
if converter == "ner" or converter == "iob":
|
if converter == "ner" or converter == "iob":
|
||||||
with input_path.open() as file_:
|
with input_path.open(encoding="utf8") as file_:
|
||||||
input_data = file_.read()
|
input_data = file_.read()
|
||||||
converter_autodetect = autodetect_ner_format(input_data)
|
converter_autodetect = autodetect_ner_format(input_data)
|
||||||
if converter_autodetect == "ner":
|
if converter_autodetect == "ner":
|
||||||
|
|
|
@ -24,11 +24,11 @@ def build_simple_cnn_text_classifier(
|
||||||
"""
|
"""
|
||||||
with Model.define_operators({">>": chain}):
|
with Model.define_operators({">>": chain}):
|
||||||
if exclusive_classes:
|
if exclusive_classes:
|
||||||
output_layer = Softmax(nO=nO, nI=tok2vec.get_dim("nO"))
|
output_layer = Softmax(nO=nO, nI=tok2vec.maybe_get_dim("nO"))
|
||||||
model = tok2vec >> list2ragged() >> reduce_mean() >> output_layer
|
model = tok2vec >> list2ragged() >> reduce_mean() >> output_layer
|
||||||
model.set_ref("output_layer", output_layer)
|
model.set_ref("output_layer", output_layer)
|
||||||
else:
|
else:
|
||||||
linear_layer = Linear(nO=nO, nI=tok2vec.get_dim("nO"))
|
linear_layer = Linear(nO=nO, nI=tok2vec.maybe_get_dim("nO"))
|
||||||
model = (
|
model = (
|
||||||
tok2vec >> list2ragged() >> reduce_mean() >> linear_layer >> Logistic()
|
tok2vec >> list2ragged() >> reduce_mean() >> linear_layer >> Logistic()
|
||||||
)
|
)
|
||||||
|
|
|
@ -110,7 +110,7 @@ def MultiHashEmbed(
|
||||||
|
|
||||||
The features used can be configured with the 'attrs' argument. The suggested
|
The features used can be configured with the 'attrs' argument. The suggested
|
||||||
attributes are NORM, PREFIX, SUFFIX and SHAPE. This lets the model take into
|
attributes are NORM, PREFIX, SUFFIX and SHAPE. This lets the model take into
|
||||||
account some subword information, without construction a fully character-based
|
account some subword information, without constructing a fully character-based
|
||||||
representation. If pretrained vectors are available, they can be included in
|
representation. If pretrained vectors are available, they can be included in
|
||||||
the representation as well, with the vectors table will be kept static
|
the representation as well, with the vectors table will be kept static
|
||||||
(i.e. it's not updated).
|
(i.e. it's not updated).
|
||||||
|
|
|
@ -622,7 +622,7 @@ def load_meta(path: Union[str, Path]) -> Dict[str, Any]:
|
||||||
if not path.parent.exists():
|
if not path.parent.exists():
|
||||||
raise IOError(Errors.E052.format(path=path.parent))
|
raise IOError(Errors.E052.format(path=path.parent))
|
||||||
if not path.exists() or not path.is_file():
|
if not path.exists() or not path.is_file():
|
||||||
raise IOError(Errors.E053.format(path=path, name="meta.json"))
|
raise IOError(Errors.E053.format(path=path.parent, name="meta.json"))
|
||||||
meta = srsly.read_json(path)
|
meta = srsly.read_json(path)
|
||||||
for setting in ["lang", "name", "version"]:
|
for setting in ["lang", "name", "version"]:
|
||||||
if setting not in meta or not meta[setting]:
|
if setting not in meta or not meta[setting]:
|
||||||
|
|
|
@ -516,9 +516,7 @@ Many neural network models are able to use word vector tables as additional
|
||||||
features, which sometimes results in significant improvements in accuracy.
|
features, which sometimes results in significant improvements in accuracy.
|
||||||
spaCy's built-in embedding layer,
|
spaCy's built-in embedding layer,
|
||||||
[MultiHashEmbed](/api/architectures#MultiHashEmbed), can be configured to use
|
[MultiHashEmbed](/api/architectures#MultiHashEmbed), can be configured to use
|
||||||
word vector tables using the `include_static_vectors` flag. This setting is
|
word vector tables using the `include_static_vectors` flag.
|
||||||
also available on the [MultiHashEmbedCNN](/api/architectures#MultiHashEmbedCNN)
|
|
||||||
layer, which builds the default token-to-vector encoding architecture.
|
|
||||||
|
|
||||||
```ini
|
```ini
|
||||||
[tagger.model.tok2vec.embed]
|
[tagger.model.tok2vec.embed]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user