spaCy/examples/paddle/sentiment_bilstm/config.py

37 lines
1.1 KiB
Python
Raw Normal View History

2016-11-20 05:35:23 +03:00
from paddle.trainer.PyDataProvider2 import *
def get_features(doc):
return numpy.asarray(
[t.rank+1 for t in doc
if t.has_vector and not t.is_punct and not t.is_space],
dtype='int32')
def on_init(settings, lang_name, **kwargs):
print("Loading spaCy")
nlp = spacy.load('en', entity=False)
vectors = get_vectors(nlp)
settings.input_types = [
# The text is a sequence of integer values, and each value is a word id.
# The whole sequence is the sentences that we want to predict its
# sentimental.
integer_value(vectors.shape[0], seq_type=SequenceType), # text input
# label positive/negative
integer_value(2)
]
settings.nlp = nlp
settings.vectors = vectors
@provider(init_hook=on_init)
def process(settings, data_dir): # settings is not used currently.
texts, labels = read_data(data_dir)
for doc, label in zip(nlp.pipe(train_texts, batch_size=5000, n_threads=3),
labels):
for sent in doc.sents:
ids = get_features(sent)
# give data to paddle.
yield ids, label