mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Increase length limit for pretrain
This commit is contained in:
parent
1b240f2119
commit
6bd1cc57ee
|
@ -78,7 +78,7 @@ def make_update(model, docs, optimizer, drop=0.):
|
||||||
return loss
|
return loss
|
||||||
|
|
||||||
|
|
||||||
def make_docs(nlp, batch):
|
def make_docs(nlp, batch, min_length=1, max_length=500):
|
||||||
docs = []
|
docs = []
|
||||||
for record in batch:
|
for record in batch:
|
||||||
text = record["text"]
|
text = record["text"]
|
||||||
|
@ -91,7 +91,7 @@ def make_docs(nlp, batch):
|
||||||
heads = numpy.asarray(heads, dtype="uint64")
|
heads = numpy.asarray(heads, dtype="uint64")
|
||||||
heads = heads.reshape((len(doc), 1))
|
heads = heads.reshape((len(doc), 1))
|
||||||
doc = doc.from_array([HEAD], heads)
|
doc = doc.from_array([HEAD], heads)
|
||||||
if len(doc) >= 1 and len(doc) < 200:
|
if len(doc) >= min_length and len(doc) < max_length:
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user