mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Auto-format
This commit is contained in:
parent
5821b020d5
commit
c23e234d65
41
spacy/_ml.py
41
spacy/_ml.py
|
@ -86,7 +86,7 @@ def with_cpu(ops, model):
|
||||||
as necessary."""
|
as necessary."""
|
||||||
model.to_cpu()
|
model.to_cpu()
|
||||||
|
|
||||||
def with_cpu_forward(inputs, drop=0.):
|
def with_cpu_forward(inputs, drop=0.0):
|
||||||
cpu_outputs, backprop = model.begin_update(_to_cpu(inputs), drop=drop)
|
cpu_outputs, backprop = model.begin_update(_to_cpu(inputs), drop=drop)
|
||||||
gpu_outputs = _to_device(ops, cpu_outputs)
|
gpu_outputs = _to_device(ops, cpu_outputs)
|
||||||
|
|
||||||
|
@ -106,7 +106,7 @@ def _to_cpu(X):
|
||||||
return tuple([_to_cpu(x) for x in X])
|
return tuple([_to_cpu(x) for x in X])
|
||||||
elif isinstance(X, list):
|
elif isinstance(X, list):
|
||||||
return [_to_cpu(x) for x in X]
|
return [_to_cpu(x) for x in X]
|
||||||
elif hasattr(X, 'get'):
|
elif hasattr(X, "get"):
|
||||||
return X.get()
|
return X.get()
|
||||||
else:
|
else:
|
||||||
return X
|
return X
|
||||||
|
@ -142,7 +142,9 @@ class extract_ngrams(Model):
|
||||||
# The dtype here matches what thinc is expecting -- which differs per
|
# The dtype here matches what thinc is expecting -- which differs per
|
||||||
# platform (by int definition). This should be fixed once the problem
|
# platform (by int definition). This should be fixed once the problem
|
||||||
# is fixed on Thinc's side.
|
# is fixed on Thinc's side.
|
||||||
lengths = self.ops.asarray([arr.shape[0] for arr in batch_keys], dtype=numpy.int_)
|
lengths = self.ops.asarray(
|
||||||
|
[arr.shape[0] for arr in batch_keys], dtype=numpy.int_
|
||||||
|
)
|
||||||
batch_keys = self.ops.xp.concatenate(batch_keys)
|
batch_keys = self.ops.xp.concatenate(batch_keys)
|
||||||
batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals), dtype="f")
|
batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals), dtype="f")
|
||||||
return (batch_keys, batch_vals, lengths), None
|
return (batch_keys, batch_vals, lengths), None
|
||||||
|
@ -592,32 +594,27 @@ def build_text_classifier(nr_class, width=64, **cfg):
|
||||||
)
|
)
|
||||||
|
|
||||||
linear_model = build_bow_text_classifier(
|
linear_model = build_bow_text_classifier(
|
||||||
nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False)
|
nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False
|
||||||
if cfg.get('exclusive_classes'):
|
)
|
||||||
|
if cfg.get("exclusive_classes"):
|
||||||
output_layer = Softmax(nr_class, nr_class * 2)
|
output_layer = Softmax(nr_class, nr_class * 2)
|
||||||
else:
|
else:
|
||||||
output_layer = (
|
output_layer = (
|
||||||
zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0))
|
zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic
|
||||||
>> logistic
|
|
||||||
)
|
)
|
||||||
model = (
|
model = (linear_model | cnn_model) >> output_layer
|
||||||
(linear_model | cnn_model)
|
|
||||||
>> output_layer
|
|
||||||
)
|
|
||||||
model.tok2vec = chain(tok2vec, flatten)
|
model.tok2vec = chain(tok2vec, flatten)
|
||||||
model.nO = nr_class
|
model.nO = nr_class
|
||||||
model.lsuv = False
|
model.lsuv = False
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
def build_bow_text_classifier(nr_class, ngram_size=1, exclusive_classes=False,
|
def build_bow_text_classifier(
|
||||||
no_output_layer=False, **cfg):
|
nr_class, ngram_size=1, exclusive_classes=False, no_output_layer=False, **cfg
|
||||||
|
):
|
||||||
with Model.define_operators({">>": chain}):
|
with Model.define_operators({">>": chain}):
|
||||||
model = (
|
model = with_cpu(
|
||||||
with_cpu(Model.ops,
|
Model.ops, extract_ngrams(ngram_size, attr=ORTH) >> LinearModel(nr_class)
|
||||||
extract_ngrams(ngram_size, attr=ORTH)
|
|
||||||
>> LinearModel(nr_class)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
if not no_output_layer:
|
if not no_output_layer:
|
||||||
model = model >> (cpu_softmax if exclusive_classes else logistic)
|
model = model >> (cpu_softmax if exclusive_classes else logistic)
|
||||||
|
@ -626,11 +623,9 @@ def build_bow_text_classifier(nr_class, ngram_size=1, exclusive_classes=False,
|
||||||
|
|
||||||
|
|
||||||
@layerize
|
@layerize
|
||||||
def cpu_softmax(X, drop=0.):
|
def cpu_softmax(X, drop=0.0):
|
||||||
ops = NumpyOps()
|
ops = NumpyOps()
|
||||||
|
|
||||||
Y = ops.softmax(X)
|
|
||||||
|
|
||||||
def cpu_softmax_backward(dY, sgd=None):
|
def cpu_softmax_backward(dY, sgd=None):
|
||||||
return dY
|
return dY
|
||||||
|
|
||||||
|
@ -648,7 +643,9 @@ def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes=False,
|
||||||
if exclusive_classes:
|
if exclusive_classes:
|
||||||
output_layer = Softmax(nr_class, tok2vec.nO)
|
output_layer = Softmax(nr_class, tok2vec.nO)
|
||||||
else:
|
else:
|
||||||
output_layer = zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
|
output_layer = (
|
||||||
|
zero_init(Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic
|
||||||
|
)
|
||||||
model = tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> output_layer
|
model = tok2vec >> flatten_add_lengths >> Pooling(mean_pool) >> output_layer
|
||||||
model.tok2vec = chain(tok2vec, flatten)
|
model.tok2vec = chain(tok2vec, flatten)
|
||||||
model.nO = nr_class
|
model.nO = nr_class
|
||||||
|
|
|
@ -125,7 +125,9 @@ def pretrain(
|
||||||
max_length=max_length,
|
max_length=max_length,
|
||||||
min_length=min_length,
|
min_length=min_length,
|
||||||
)
|
)
|
||||||
loss = make_update(model, docs, optimizer, objective=loss_func, drop=dropout)
|
loss = make_update(
|
||||||
|
model, docs, optimizer, objective=loss_func, drop=dropout
|
||||||
|
)
|
||||||
progress = tracker.update(epoch, loss, docs)
|
progress = tracker.update(epoch, loss, docs)
|
||||||
if progress:
|
if progress:
|
||||||
msg.row(progress, **row_settings)
|
msg.row(progress, **row_settings)
|
||||||
|
@ -215,8 +217,8 @@ def get_cossim_loss(yh, y):
|
||||||
norm_y = xp.linalg.norm(y, axis=1, keepdims=True)
|
norm_y = xp.linalg.norm(y, axis=1, keepdims=True)
|
||||||
mul_norms = norm_yh * norm_y
|
mul_norms = norm_yh * norm_y
|
||||||
cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms
|
cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms
|
||||||
d_yh = (y / mul_norms) - (cosine * (yh / norm_yh**2))
|
d_yh = (y / mul_norms) - (cosine * (yh / norm_yh ** 2))
|
||||||
loss = xp.abs(cosine-1).sum()
|
loss = xp.abs(cosine - 1).sum()
|
||||||
return loss, -d_yh
|
return loss, -d_yh
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ...symbols import LEMMA, PRON_LEMMA, AUX
|
from ...symbols import LEMMA, PRON_LEMMA
|
||||||
|
|
||||||
_subordinating_conjunctions = [
|
_subordinating_conjunctions = [
|
||||||
"that",
|
"that",
|
||||||
|
@ -457,7 +457,6 @@ MORPH_RULES = {
|
||||||
"have": {"POS": "AUX"},
|
"have": {"POS": "AUX"},
|
||||||
"'m": {"POS": "AUX", LEMMA: "be"},
|
"'m": {"POS": "AUX", LEMMA: "be"},
|
||||||
"'ve": {"POS": "AUX"},
|
"'ve": {"POS": "AUX"},
|
||||||
"'re": {"POS": "AUX", LEMMA: "be"},
|
|
||||||
"'s": {"POS": "AUX"},
|
"'s": {"POS": "AUX"},
|
||||||
"is": {"POS": "AUX"},
|
"is": {"POS": "AUX"},
|
||||||
"'d": {"POS": "AUX"},
|
"'d": {"POS": "AUX"},
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# encoding: utf8
|
# encoding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ...symbols import POS, NOUN, PRON, ADJ, ADV, INTJ, PROPN, DET, NUM, AUX,VERB
|
from ...symbols import POS, NOUN, PRON, ADJ, ADV, INTJ, PROPN, DET, NUM, AUX, VERB
|
||||||
from ...symbols import ADP, CCONJ, PART, PUNCT, SPACE, SCONJ
|
from ...symbols import ADP, CCONJ, PART, PUNCT, SPACE, SCONJ
|
||||||
|
|
||||||
# Source: Korakot Chaovavanich
|
# Source: Korakot Chaovavanich
|
||||||
|
@ -17,8 +17,8 @@ TAG_MAP = {
|
||||||
"CFQC": {POS: NOUN},
|
"CFQC": {POS: NOUN},
|
||||||
"CVBL": {POS: NOUN},
|
"CVBL": {POS: NOUN},
|
||||||
# VERB
|
# VERB
|
||||||
"VACT":{POS:VERB},
|
"VACT": {POS: VERB},
|
||||||
"VSTA":{POS:VERB},
|
"VSTA": {POS: VERB},
|
||||||
# PRON
|
# PRON
|
||||||
"PRON": {POS: PRON},
|
"PRON": {POS: PRON},
|
||||||
"NPRP": {POS: PRON},
|
"NPRP": {POS: PRON},
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
import pytest
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from ... import compat
|
from spacy import compat
|
||||||
|
|
||||||
prefix_search = (
|
prefix_search = (
|
||||||
b"^\xc2\xa7|^%|^=|^\xe2\x80\x94|^\xe2\x80\x93|^\\+(?![0-9])"
|
b"^\xc2\xa7|^%|^=|^\xe2\x80\x94|^\xe2\x80\x93|^\\+(?![0-9])"
|
||||||
|
@ -67,4 +69,4 @@ if compat.is_python2:
|
||||||
# string above in the xpass message.
|
# string above in the xpass message.
|
||||||
def test_issue3356():
|
def test_issue3356():
|
||||||
pattern = re.compile(compat.unescape_unicode(prefix_search.decode("utf8")))
|
pattern = re.compile(compat.unescape_unicode(prefix_search.decode("utf8")))
|
||||||
assert not pattern.search(u"hello")
|
assert not pattern.search("hello")
|
||||||
|
|
|
@ -1,10 +1,14 @@
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from spacy.util import decaying
|
from spacy.util import decaying
|
||||||
|
|
||||||
def test_decaying():
|
|
||||||
sizes = decaying(10., 1., .5)
|
def test_issue3447():
|
||||||
|
sizes = decaying(10.0, 1.0, 0.5)
|
||||||
size = next(sizes)
|
size = next(sizes)
|
||||||
assert size == 10.
|
assert size == 10.0
|
||||||
size = next(sizes)
|
size = next(sizes)
|
||||||
assert size == 10. - 0.5
|
assert size == 10.0 - 0.5
|
||||||
size = next(sizes)
|
size = next(sizes)
|
||||||
assert size == 10. - 0.5 - 0.5
|
assert size == 10.0 - 0.5 - 0.5
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import pytest
|
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.tokens import Doc
|
from spacy.tokens import Doc
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,7 @@ def symlink_setup_target(request, symlink_target, symlink):
|
||||||
os.mkdir(path2str(symlink_target))
|
os.mkdir(path2str(symlink_target))
|
||||||
# yield -- need to cleanup even if assertion fails
|
# yield -- need to cleanup even if assertion fails
|
||||||
# https://github.com/pytest-dev/pytest/issues/2508#issuecomment-309934240
|
# https://github.com/pytest-dev/pytest/issues/2508#issuecomment-309934240
|
||||||
|
|
||||||
def cleanup():
|
def cleanup():
|
||||||
symlink_remove(symlink)
|
symlink_remove(symlink)
|
||||||
os.rmdir(path2str(symlink_target))
|
os.rmdir(path2str(symlink_target))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user