Add (and test) Span.sentiment attribute. By default we average token.span, but can override with custom hook. Re Issue #667

This commit is contained in:
Matthew Honnibal 2016-12-02 11:05:50 +01:00
parent 296d33a4fc
commit f6e356aada
2 changed files with 49 additions and 0 deletions

View File

@ -1,6 +1,7 @@
from __future__ import unicode_literals
from spacy.attrs import HEAD
from spacy.en import English
from spacy.tokens.doc import Doc
import numpy as np
import pytest
@ -49,3 +50,44 @@ def test_sent(doc):
assert span.sent.text == 'This is a sentence.'
span = doc[6:7]
assert span.sent.root.left_edge.text == 'This'
def test_default_sentiment(EN):
'''Test new span.sentiment property's default averaging behaviour'''
good = EN.vocab[u'good']
good.sentiment = 3.0
bad = EN.vocab[u'bad']
bad.sentiment = -2.0
doc = Doc(EN.vocab, [u'good', 'stuff', u'bad', u'stuff'])
good_stuff = doc[:2]
assert good_stuff.sentiment == 3.0 / 2
bad_stuff = doc[-2:]
assert bad_stuff.sentiment == -2. / 2
good_stuff_bad = doc[:-1]
assert good_stuff_bad.sentiment == (3.+-2) / 3.
def test_override_sentiment(EN):
'''Test new span.sentiment property's default averaging behaviour'''
good = EN.vocab[u'good']
good.sentiment = 3.0
bad = EN.vocab[u'bad']
bad.sentiment = -2.0
doc = Doc(EN.vocab, [u'good', 'stuff', u'bad', u'stuff'])
doc.user_span_hooks['sentiment'] = lambda span: 10.0
good_stuff = doc[:2]
assert good_stuff.sentiment == 10.0
bad_stuff = doc[-2:]
assert bad_stuff.sentiment == 10.0
good_stuff_bad = doc[:-1]
assert good_stuff_bad.sentiment == 10.0

View File

@ -179,6 +179,13 @@ cdef class Span:
self._vector_norm = sqrt(norm) if norm != 0 else 0
return self._vector_norm
property sentiment:
def __get__(self):
if 'sentiment' in self.doc.user_span_hooks:
return self.doc.user_span_hooks['sentiment'](self)
else:
return sum([token.sentiment for token in self]) / len(self)
property text:
def __get__(self):
text = self.text_with_ws