mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Auto-format
This commit is contained in:
parent
5821b020d5
commit
c23e234d65
41
spacy/_ml.py
41
spacy/_ml.py
|
@ -86,7 +86,7 @@ def with_cpu(ops, model):
|
|||
as necessary."""
|
||||
model.to_cpu()
|
||||
|
||||
def with_cpu_forward(inputs, drop=0.):
|
||||
def with_cpu_forward(inputs, drop=0.0):
|
||||
cpu_outputs, backprop = model.begin_update(_to_cpu(inputs), drop=drop)
|
||||
gpu_outputs = _to_device(ops, cpu_outputs)
|
||||
|
||||
|
@ -106,7 +106,7 @@ def _to_cpu(X):
|
|||
return tuple([_to_cpu(x) for x in X])
|
||||
elif isinstance(X, list):
|
||||
return [_to_cpu(x) for x in X]
|
||||
elif hasattr(X, 'get'):
|
||||
elif hasattr(X, "get"):
|
||||
return X.get()
|
||||
else:
|
||||
return X
|
||||
|
@ -142,7 +142,9 @@ class extract_ngrams(Model):
|
|||
# The dtype here matches what thinc is expecting -- which differs per
|
||||
# platform (by int definition). This should be fixed once the problem
|
||||
# is fixed on Thinc's side.
|
||||
lengths = self.ops.asarray([arr.shape[0] for arr in batch_keys], dtype=numpy.int_)
|
||||
lengths = self.ops.asarray(
|
||||
[arr.shape[0] for arr in batch_keys], dtype=numpy.int_
|
||||
)
|
||||
batch_keys = self.ops.xp.concatenate(batch_keys)
|
||||
batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals), dtype="f")
|
||||
return (batch_keys, batch_vals, lengths), None
|
||||
|
@ -592,32 +594,27 @@ def build_text_classifier(nr_class, width=64, **cfg):
|
|||
)
|
||||
|
||||
linear_model = build_bow_text_classifier(
|
||||
nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False)
|
||||
if cfg.get('exclusive_classes'):
|
||||
nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False
|
||||
)
|
||||
if cfg.get("exclusive_classes"):
|
||||
output_layer = Softmax(nr_class, nr_class * 2)
|
||||
else:
|
||||
output_layer = (
|
||||
zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0))
|
||||
>> logistic
|
||||
zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic
|
||||
)
|
||||
model = (
|
||||
(linear_model | cnn_model)
|
||||
>> output_layer
|
||||
)
|
||||
model = (linear_model | cnn_model) >> output_layer
|
||||
model.tok2vec = chain(tok2vec, flatten)
|
||||
model.nO = nr_class
|
||||
model.lsuv = False
|
||||
return model
|
||||
|
||||
|
||||
def build_bow_text_classifier(nr_class, ngram_size=1, exclusive_classes=False,
|
||||
no_output_layer=False, **cfg):
|
||||
def build_bow_text_classifier(
|
||||
nr_class, ngram_size=1, exclusive_classes=False, no_output_layer=False, **cfg
|
||||
):
|
||||
with Model.define_operators({">>": chain}):
|
||||
model = (
|
||||
with_cpu(Model.ops,
|
||||
extract_ngrams(ngram_size, attr=ORTH)
|
||||
>> LinearModel(nr_class)
|
||||
)
|
||||
model = with_cpu(
|
||||
Model.ops, extract_ngrams(ngram_size, attr=ORTH) >> LinearModel(nr_class)
|
||||
)
|
||||
if not no_output_layer:
|
||||
model = model >> (cpu_softmax if exclusive_classes else logistic)
|
||||
|
@ -626,11 +623,9 @@ def build_bow_text_classifier(nr_class, ngram_size=1, exclusive_classes=False,
|
|||
|
||||
|
||||
@layerize
|
||||
def cpu_softmax(X, drop=0.):
|
||||
def cpu_softmax(X, drop=0.0):
|
||||
ops = NumpyOps()
|
||||
|
||||
Y = ops.softmax(X)
|
||||
|
||||
def cpu_softmax_backward(dY, sgd=None):
|
||||
return dY
|
||||
|
||||
|
@ -648,7 +643,9 @@ def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes=False,
|
|||
if exclusive_classes:
|
||||
output_layer = Softmax(nr_class, tok2vec.nO)
|
||||
else:
|
||||
output_layer = zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
|
||||
output_layer = (
|
||||
zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
|
||||
)
|
||||
model = tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> output_layer
|
||||
model.tok2vec = chain(tok2vec, flatten)
|
||||
model.nO = nr_class
|
||||
|
|
|
@ -125,7 +125,9 @@ def pretrain(
|
|||
max_length=max_length,
|
||||
min_length=min_length,
|
||||
)
|
||||
loss = make_update(model, docs, optimizer, objective=loss_func, drop=dropout)
|
||||
loss = make_update(
|
||||
model, docs, optimizer, objective=loss_func, drop=dropout
|
||||
)
|
||||
progress = tracker.update(epoch, loss, docs)
|
||||
if progress:
|
||||
msg.row(progress, **row_settings)
|
||||
|
@ -215,8 +217,8 @@ def get_cossim_loss(yh, y):
|
|||
norm_y = xp.linalg.norm(y, axis=1, keepdims=True)
|
||||
mul_norms = norm_yh * norm_y
|
||||
cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms
|
||||
d_yh = (y / mul_norms) - (cosine * (yh / norm_yh**2))
|
||||
loss = xp.abs(cosine-1).sum()
|
||||
d_yh = (y / mul_norms) - (cosine * (yh / norm_yh ** 2))
|
||||
loss = xp.abs(cosine - 1).sum()
|
||||
return loss, -d_yh
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ...symbols import LEMMA, PRON_LEMMA, AUX
|
||||
from ...symbols import LEMMA, PRON_LEMMA
|
||||
|
||||
_subordinating_conjunctions = [
|
||||
"that",
|
||||
|
@ -457,7 +457,6 @@ MORPH_RULES = {
|
|||
"have": {"POS": "AUX"},
|
||||
"'m": {"POS": "AUX", LEMMA: "be"},
|
||||
"'ve": {"POS": "AUX"},
|
||||
"'re": {"POS": "AUX", LEMMA: "be"},
|
||||
"'s": {"POS": "AUX"},
|
||||
"is": {"POS": "AUX"},
|
||||
"'d": {"POS": "AUX"},
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# encoding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ...symbols import POS, NOUN, PRON, ADJ, ADV, INTJ, PROPN, DET, NUM, AUX,VERB
|
||||
from ...symbols import POS, NOUN, PRON, ADJ, ADV, INTJ, PROPN, DET, NUM, AUX, VERB
|
||||
from ...symbols import ADP, CCONJ, PART, PUNCT, SPACE, SCONJ
|
||||
|
||||
# Source: Korakot Chaovavanich
|
||||
|
@ -17,8 +17,8 @@ TAG_MAP = {
|
|||
"CFQC": {POS: NOUN},
|
||||
"CVBL": {POS: NOUN},
|
||||
# VERB
|
||||
"VACT":{POS:VERB},
|
||||
"VSTA":{POS:VERB},
|
||||
"VACT": {POS: VERB},
|
||||
"VSTA": {POS: VERB},
|
||||
# PRON
|
||||
"PRON": {POS: PRON},
|
||||
"NPRP": {POS: PRON},
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
import pytest
|
||||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
from ... import compat
|
||||
from spacy import compat
|
||||
|
||||
prefix_search = (
|
||||
b"^\xc2\xa7|^%|^=|^\xe2\x80\x94|^\xe2\x80\x93|^\\+(?![0-9])"
|
||||
|
@ -67,4 +69,4 @@ if compat.is_python2:
|
|||
# string above in the xpass message.
|
||||
def test_issue3356():
|
||||
pattern = re.compile(compat.unescape_unicode(prefix_search.decode("utf8")))
|
||||
assert not pattern.search(u"hello")
|
||||
assert not pattern.search("hello")
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from spacy.util import decaying
|
||||
|
||||
def test_decaying():
|
||||
sizes = decaying(10., 1., .5)
|
||||
|
||||
def test_issue3447():
|
||||
sizes = decaying(10.0, 1.0, 0.5)
|
||||
size = next(sizes)
|
||||
assert size == 10.
|
||||
assert size == 10.0
|
||||
size = next(sizes)
|
||||
assert size == 10. - 0.5
|
||||
assert size == 10.0 - 0.5
|
||||
size = next(sizes)
|
||||
assert size == 10. - 0.5 - 0.5
|
||||
assert size == 10.0 - 0.5 - 0.5
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import pytest
|
||||
from spacy.lang.en import English
|
||||
from spacy.tokens import Doc
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@ def symlink_setup_target(request, symlink_target, symlink):
|
|||
os.mkdir(path2str(symlink_target))
|
||||
# yield -- need to cleanup even if assertion fails
|
||||
# https://github.com/pytest-dev/pytest/issues/2508#issuecomment-309934240
|
||||
|
||||
def cleanup():
|
||||
symlink_remove(symlink)
|
||||
os.rmdir(path2str(symlink_target))
|
||||
|
|
Loading…
Reference in New Issue
Block a user