From f6e356aadad924c72ef64d86a7c87f14dfdcee41 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 2 Dec 2016 11:05:50 +0100 Subject: [PATCH] Add (and test) Span.sentiment attribute. By default we average token.span, but can override with custom hook. Re Issue #667 --- spacy/tests/spans/test_span.py | 42 ++++++++++++++++++++++++++++++++++ spacy/tokens/span.pyx | 7 ++++++ 2 files changed, 49 insertions(+) diff --git a/spacy/tests/spans/test_span.py b/spacy/tests/spans/test_span.py index 27a88a61b..aee869c5b 100644 --- a/spacy/tests/spans/test_span.py +++ b/spacy/tests/spans/test_span.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals from spacy.attrs import HEAD from spacy.en import English +from spacy.tokens.doc import Doc import numpy as np import pytest @@ -49,3 +50,44 @@ def test_sent(doc): assert span.sent.text == 'This is a sentence.' span = doc[6:7] assert span.sent.root.left_edge.text == 'This' + + +def test_default_sentiment(EN): + '''Test new span.sentiment property's default averaging behaviour''' + good = EN.vocab[u'good'] + good.sentiment = 3.0 + bad = EN.vocab[u'bad'] + bad.sentiment = -2.0 + + doc = Doc(EN.vocab, [u'good', 'stuff', u'bad', u'stuff']) + + good_stuff = doc[:2] + assert good_stuff.sentiment == 3.0 / 2 + + bad_stuff = doc[-2:] + assert bad_stuff.sentiment == -2. / 2 + + good_stuff_bad = doc[:-1] + assert good_stuff_bad.sentiment == (3.+-2) / 3. + + + +def test_override_sentiment(EN): + '''Test new span.sentiment property's default averaging behaviour''' + good = EN.vocab[u'good'] + good.sentiment = 3.0 + bad = EN.vocab[u'bad'] + bad.sentiment = -2.0 + + doc = Doc(EN.vocab, [u'good', 'stuff', u'bad', u'stuff']) + + doc.user_span_hooks['sentiment'] = lambda span: 10.0 + + good_stuff = doc[:2] + assert good_stuff.sentiment == 10.0 + + bad_stuff = doc[-2:] + assert bad_stuff.sentiment == 10.0 + + good_stuff_bad = doc[:-1] + assert good_stuff_bad.sentiment == 10.0 diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index a4f49555a..903ef26d1 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -179,6 +179,13 @@ cdef class Span: self._vector_norm = sqrt(norm) if norm != 0 else 0 return self._vector_norm + property sentiment: + def __get__(self): + if 'sentiment' in self.doc.user_span_hooks: + return self.doc.user_span_hooks['sentiment'](self) + else: + return sum([token.sentiment for token in self]) / len(self) + property text: def __get__(self): text = self.text_with_ws