From 83f518b412da378e3ed030620b711b6ed12d1039 Mon Sep 17 00:00:00 2001 From: thomashacker Date: Mon, 31 Oct 2022 13:54:48 +0100 Subject: [PATCH] remove sentiment attribute --- spacy/lexeme.pyi | 1 - spacy/lexeme.pyx | 13 ------------- spacy/tests/README.md | 18 ++++++++++------- spacy/tests/doc/test_doc_api.py | 2 +- spacy/tests/doc/test_span.py | 26 ------------------------- spacy/tests/matcher/test_matcher_api.py | 3 --- spacy/tokens/doc.pxd | 2 -- spacy/tokens/doc.pyi | 1 - spacy/tokens/doc.pyx | 5 ----- spacy/tokens/span.pyi | 2 -- spacy/tokens/span.pyx | 10 ---------- spacy/tokens/token.pyi | 2 -- spacy/tokens/token.pyx | 8 -------- 13 files changed, 12 insertions(+), 81 deletions(-) diff --git a/spacy/lexeme.pyi b/spacy/lexeme.pyi index 4fcaa82cf..4942b18aa 100644 --- a/spacy/lexeme.pyi +++ b/spacy/lexeme.pyi @@ -20,7 +20,6 @@ class Lexeme: def vector_norm(self) -> float: ... vector: Floats1d rank: int - sentiment: float @property def orth_(self) -> str: ... @property diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index 6c66effde..73bf28dc2 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -173,19 +173,6 @@ cdef class Lexeme: def __set__(self, value): self.c.id = value - property sentiment: - """RETURNS (float): A scalar value indicating the positivity or - negativity of the lexeme.""" - def __get__(self): - sentiment_table = self.vocab.lookups.get_table("lexeme_sentiment", {}) - return sentiment_table.get(self.c.orth, 0.0) - - def __set__(self, float x): - if "lexeme_sentiment" not in self.vocab.lookups: - self.vocab.lookups.add_table("lexeme_sentiment") - sentiment_table = self.vocab.lookups.get_table("lexeme_sentiment") - sentiment_table[self.c.orth] = x - @property def orth_(self): """RETURNS (str): The original verbatim text of the lexeme diff --git a/spacy/tests/README.md b/spacy/tests/README.md index 82fabcc77..dfcd6b21b 100644 --- a/spacy/tests/README.md +++ b/spacy/tests/README.md @@ -10,12 +10,16 @@ Tests for spaCy modules and classes live in their own directories of the same na ## Table of contents -1. [Running the tests](#running-the-tests) -2. [Dos and don'ts](#dos-and-donts) -3. [Parameters](#parameters) -4. [Fixtures](#fixtures) -5. [Helpers and utilities](#helpers-and-utilities) -6. [Contributing to the tests](#contributing-to-the-tests) +- [spaCy tests](#spacy-tests) + - [Table of contents](#table-of-contents) + - [Running the tests](#running-the-tests) + - [Dos and don'ts](#dos-and-donts) + - [Parameters](#parameters) + - [Fixtures](#fixtures) + - [Helpers and utilities](#helpers-and-utilities) + - [Constructing a `Doc` object manually](#constructing-a-doc-object-manually) + - [Other utilities](#other-utilities) + - [Contributing to the tests](#contributing-to-the-tests) ## Running the tests @@ -40,7 +44,7 @@ py.test spacy/tests/tokenizer/test_exceptions.py::test_tokenizer_handles_emoji # To keep the behavior of the tests consistent and predictable, we try to follow a few basic conventions: -- **Test names** should follow a pattern of `test_[module]_[tested behaviour]`. For example: `test_tokenizer_keeps_email` or `test_spans_override_sentiment`. +- **Test names** should follow a pattern of `test_[module]_[tested behaviour]`. For example: `test_tokenizer_keeps_email`. - If you're testing for a bug reported in a specific issue, always create a **regression test**. Regression tests should be named `test_issue[ISSUE NUMBER]` and live in the [`regression`](regression) directory. - Only use `@pytest.mark.xfail` for tests that **should pass, but currently fail**. To test for desired negative behavior, use `assert not` in your test. - Very **extensive tests** that take a long time to run should be marked with `@pytest.mark.slow`. If your slow test is testing important behavior, consider adding an additional simpler version. diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py index 38003dea9..613925725 100644 --- a/spacy/tests/doc/test_doc_api.py +++ b/spacy/tests/doc/test_doc_api.py @@ -381,7 +381,7 @@ def test_doc_api_serialize(en_tokenizer, text): assert [t.orth for t in tokens] == [t.orth for t in new_tokens] new_tokens = Doc(tokens.vocab).from_bytes( - tokens.to_bytes(exclude=["sentiment"]), exclude=["sentiment"] + tokens.to_bytes() ) assert tokens.text == new_tokens.text assert [t.text for t in tokens] == [t.text for t in new_tokens] diff --git a/spacy/tests/doc/test_span.py b/spacy/tests/doc/test_span.py index 1a2f3cdcd..6bffb51de 100644 --- a/spacy/tests/doc/test_span.py +++ b/spacy/tests/doc/test_span.py @@ -304,32 +304,6 @@ def test_span_similarity_match(): assert span1.similarity(doc) == 0.0 assert span1[:1].similarity(doc.vocab["a"]) == 1.0 - -def test_spans_default_sentiment(en_tokenizer): - """Test span.sentiment property's default averaging behaviour""" - text = "good stuff bad stuff" - tokens = en_tokenizer(text) - tokens.vocab[tokens[0].text].sentiment = 3.0 - tokens.vocab[tokens[2].text].sentiment = -2.0 - doc = Doc(tokens.vocab, words=[t.text for t in tokens]) - assert doc[:2].sentiment == 3.0 / 2 - assert doc[-2:].sentiment == -2.0 / 2 - assert doc[:-1].sentiment == (3.0 + -2) / 3.0 - - -def test_spans_override_sentiment(en_tokenizer): - """Test span.sentiment property's default averaging behaviour""" - text = "good stuff bad stuff" - tokens = en_tokenizer(text) - tokens.vocab[tokens[0].text].sentiment = 3.0 - tokens.vocab[tokens[2].text].sentiment = -2.0 - doc = Doc(tokens.vocab, words=[t.text for t in tokens]) - doc.user_span_hooks["sentiment"] = lambda span: 10.0 - assert doc[:2].sentiment == 10.0 - assert doc[-2:].sentiment == 10.0 - assert doc[:-1].sentiment == 10.0 - - def test_spans_are_hashable(en_tokenizer): """Test spans can be hashed.""" text = "good stuff bad stuff" diff --git a/spacy/tests/matcher/test_matcher_api.py b/spacy/tests/matcher/test_matcher_api.py index 8a594ed7e..2f6cfdbd9 100644 --- a/spacy/tests/matcher/test_matcher_api.py +++ b/spacy/tests/matcher/test_matcher_api.py @@ -50,8 +50,6 @@ def test_matcher_from_usage_docs(en_vocab): def label_sentiment(matcher, doc, i, matches): match_id, start, end = matches[i] - if doc.vocab.strings[match_id] == "HAPPY": - doc.sentiment += 0.1 span = doc[start:end] with doc.retokenize() as retokenizer: retokenizer.merge(span) @@ -61,7 +59,6 @@ def test_matcher_from_usage_docs(en_vocab): matcher = Matcher(en_vocab) matcher.add("HAPPY", pos_patterns, on_match=label_sentiment) matcher(doc) - assert doc.sentiment != 0 assert doc[1].norm_ == "happy emoji" diff --git a/spacy/tokens/doc.pxd b/spacy/tokens/doc.pxd index 83a940cbb..b53c75a2f 100644 --- a/spacy/tokens/doc.pxd +++ b/spacy/tokens/doc.pxd @@ -48,8 +48,6 @@ cdef class Doc: cdef TokenC* c - cdef public float sentiment - cdef public dict activations cdef public dict user_hooks diff --git a/spacy/tokens/doc.pyi b/spacy/tokens/doc.pyi index b208cc871..1c7c18bf3 100644 --- a/spacy/tokens/doc.pyi +++ b/spacy/tokens/doc.pyi @@ -21,7 +21,6 @@ class Doc: spans: SpanGroups max_length: int length: int - sentiment: float activations: Dict[str, Dict[str, Union[ArrayXd, Ragged]]] cats: Dict[str, float] user_hooks: Dict[str, Callable[..., Any]] diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 2451f72dd..ee6b6041c 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -243,7 +243,6 @@ cdef class Doc: self.c = data_start + PADDING self.max_length = size self.length = 0 - self.sentiment = 0.0 self.cats = {} self.activations = {} self.user_hooks = {} @@ -1270,7 +1269,6 @@ cdef class Doc: other.tensor = copy.deepcopy(self.tensor) other.cats = copy.deepcopy(self.cats) other.user_data = copy.deepcopy(self.user_data) - other.sentiment = self.sentiment other.has_unknown_spaces = self.has_unknown_spaces other.user_hooks = dict(self.user_hooks) other.user_token_hooks = dict(self.user_token_hooks) @@ -1367,7 +1365,6 @@ cdef class Doc: "text": lambda: self.text, "array_head": lambda: array_head, "array_body": lambda: self.to_array(array_head), - "sentiment": lambda: self.sentiment, "tensor": lambda: self.tensor, "cats": lambda: self.cats, "spans": lambda: self.spans.to_bytes(), @@ -1405,8 +1402,6 @@ cdef class Doc: for key, value in zip(user_data_keys, user_data_values): self.user_data[key] = value cdef int i, start, end, has_space - if "sentiment" not in exclude and "sentiment" in msg: - self.sentiment = msg["sentiment"] if "tensor" not in exclude and "tensor" in msg: self.tensor = msg["tensor"] if "cats" not in exclude and "cats" in msg: diff --git a/spacy/tokens/span.pyi b/spacy/tokens/span.pyi index 28b627c32..abda49361 100644 --- a/spacy/tokens/span.pyi +++ b/spacy/tokens/span.pyi @@ -82,8 +82,6 @@ class Span: @property def tensor(self) -> FloatsXd: ... @property - def sentiment(self) -> float: ... - @property def text(self) -> str: ... @property def text_with_ws(self) -> str: ... diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index 89d9727e9..5530dd127 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -566,16 +566,6 @@ cdef class Span: return None return self.doc.tensor[self.start : self.end] - @property - def sentiment(self): - """RETURNS (float): A scalar value indicating the positivity or - negativity of the span. - """ - if "sentiment" in self.doc.user_span_hooks: - return self.doc.user_span_hooks["sentiment"](self) - else: - return sum([token.sentiment for token in self]) / len(self) - @property def text(self): """RETURNS (str): The original verbatim text of the span.""" diff --git a/spacy/tokens/token.pyi b/spacy/tokens/token.pyi index bd585d034..6de7e984a 100644 --- a/spacy/tokens/token.pyi +++ b/spacy/tokens/token.pyi @@ -79,8 +79,6 @@ class Token: @property def prob(self) -> float: ... @property - def sentiment(self) -> float: ... - @property def lang(self) -> int: ... @property def idx(self) -> int: ... diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx index cee903f48..64c707acd 100644 --- a/spacy/tokens/token.pyx +++ b/spacy/tokens/token.pyx @@ -283,14 +283,6 @@ cdef class Token: """RETURNS (float): Smoothed log probability estimate of token type.""" return self.vocab[self.c.lex.orth].prob - @property - def sentiment(self): - """RETURNS (float): A scalar value indicating the positivity or - negativity of the token.""" - if "sentiment" in self.doc.user_token_hooks: - return self.doc.user_token_hooks["sentiment"](self) - return self.vocab[self.c.lex.orth].sentiment - @property def lang(self): """RETURNS (uint64): ID of the language of the parent document's