mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
7986ada1cb
|
@ -150,10 +150,10 @@ class PrecomputableAffine(Model):
|
|||
|
||||
def _backprop_padding(self, dY, ids):
|
||||
# (1, nF, nO, nP) += (nN, nF, nO, nP) where IDs (nN, nF) < 0
|
||||
for i in range(ids.shape[0]):
|
||||
for j in range(ids.shape[1]):
|
||||
if ids[i,j] < 0:
|
||||
self.d_pad[0,j] += dY[i, j]
|
||||
mask = ids < 0.
|
||||
mask = mask.sum(axis=1)
|
||||
d_pad = dY * mask.reshape((ids.shape[0], 1, 1))
|
||||
self.d_pad += d_pad.sum(axis=0)
|
||||
return dY, ids
|
||||
|
||||
@staticmethod
|
||||
|
|
|
@ -85,6 +85,7 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
|
|||
batch_sizes = util.compounding(util.env_opt('batch_from', 1),
|
||||
util.env_opt('batch_to', 16),
|
||||
util.env_opt('batch_compound', 1.001))
|
||||
max_doc_len = util.env_opt('max_doc_len', 5000)
|
||||
corpus = GoldCorpus(train_path, dev_path, limit=n_sents)
|
||||
n_train_words = corpus.count_train()
|
||||
|
||||
|
@ -108,6 +109,9 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=30, n_sents=0,
|
|||
with tqdm.tqdm(total=n_train_words, leave=False) as pbar:
|
||||
losses = {}
|
||||
for batch in minibatch(train_docs, size=batch_sizes):
|
||||
batch = [(d, g) for (d, g) in batch if len(d) < max_doc_len]
|
||||
if not batch:
|
||||
continue
|
||||
docs, golds = zip(*batch)
|
||||
nlp.update(docs, golds, sgd=optimizer,
|
||||
drop=next(dropout_rates), losses=losses)
|
||||
|
|
|
@ -6,6 +6,7 @@ from .. import util
|
|||
from ..displacy import parse_deps, parse_ents
|
||||
from ..tokens import Span
|
||||
from .util import get_doc
|
||||
from .._ml import PrecomputableAffine
|
||||
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
@ -59,3 +60,19 @@ def test_displacy_parse_deps(en_vocab):
|
|||
assert deps['arcs'] == [{'start': 0, 'end': 1, 'label': 'nsubj', 'dir': 'left'},
|
||||
{'start': 2, 'end': 3, 'label': 'det', 'dir': 'left'},
|
||||
{'start': 1, 'end': 3, 'label': 'attr', 'dir': 'right'}]
|
||||
|
||||
|
||||
def test_PrecomputableAffine(nO=4, nI=5, nF=3, nP=2):
|
||||
model = PrecomputableAffine(nO=nO, nI=nI, nF=nF, nP=nP)
|
||||
assert model.W.shape == (nF, nO, nP, nI)
|
||||
tensor = model.ops.allocate((10, nI))
|
||||
Y, get_dX = model.begin_update(tensor)
|
||||
assert Y.shape == (tensor.shape[0]+1, nF, nO, nP)
|
||||
assert model.d_pad.shape == (1, nF, nO, nP)
|
||||
dY = model.ops.allocate((15, nF, nO, nP))
|
||||
ids = model.ops.allocate((15, nF))
|
||||
ids[1,2] = -1
|
||||
dY[1,2] = 1
|
||||
assert model.d_pad[0, 2, 0, 0] == 0.
|
||||
model._backprop_padding(dY, ids)
|
||||
assert model.d_pad[0, 2, 0, 0] == 1.
|
||||
|
|
Loading…
Reference in New Issue
Block a user