mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-06 21:30:22 +03:00
remove sentiment attribute
This commit is contained in:
parent
cae4589f5a
commit
83f518b412
|
@ -20,7 +20,6 @@ class Lexeme:
|
|||
def vector_norm(self) -> float: ...
|
||||
vector: Floats1d
|
||||
rank: int
|
||||
sentiment: float
|
||||
@property
|
||||
def orth_(self) -> str: ...
|
||||
@property
|
||||
|
|
|
@ -173,19 +173,6 @@ cdef class Lexeme:
|
|||
def __set__(self, value):
|
||||
self.c.id = value
|
||||
|
||||
property sentiment:
|
||||
"""RETURNS (float): A scalar value indicating the positivity or
|
||||
negativity of the lexeme."""
|
||||
def __get__(self):
|
||||
sentiment_table = self.vocab.lookups.get_table("lexeme_sentiment", {})
|
||||
return sentiment_table.get(self.c.orth, 0.0)
|
||||
|
||||
def __set__(self, float x):
|
||||
if "lexeme_sentiment" not in self.vocab.lookups:
|
||||
self.vocab.lookups.add_table("lexeme_sentiment")
|
||||
sentiment_table = self.vocab.lookups.get_table("lexeme_sentiment")
|
||||
sentiment_table[self.c.orth] = x
|
||||
|
||||
@property
|
||||
def orth_(self):
|
||||
"""RETURNS (str): The original verbatim text of the lexeme
|
||||
|
|
|
@ -10,12 +10,16 @@ Tests for spaCy modules and classes live in their own directories of the same na
|
|||
|
||||
## Table of contents
|
||||
|
||||
1. [Running the tests](#running-the-tests)
|
||||
2. [Dos and don'ts](#dos-and-donts)
|
||||
3. [Parameters](#parameters)
|
||||
4. [Fixtures](#fixtures)
|
||||
5. [Helpers and utilities](#helpers-and-utilities)
|
||||
6. [Contributing to the tests](#contributing-to-the-tests)
|
||||
- [spaCy tests](#spacy-tests)
|
||||
- [Table of contents](#table-of-contents)
|
||||
- [Running the tests](#running-the-tests)
|
||||
- [Dos and don'ts](#dos-and-donts)
|
||||
- [Parameters](#parameters)
|
||||
- [Fixtures](#fixtures)
|
||||
- [Helpers and utilities](#helpers-and-utilities)
|
||||
- [Constructing a `Doc` object manually](#constructing-a-doc-object-manually)
|
||||
- [Other utilities](#other-utilities)
|
||||
- [Contributing to the tests](#contributing-to-the-tests)
|
||||
|
||||
## Running the tests
|
||||
|
||||
|
@ -40,7 +44,7 @@ py.test spacy/tests/tokenizer/test_exceptions.py::test_tokenizer_handles_emoji #
|
|||
|
||||
To keep the behavior of the tests consistent and predictable, we try to follow a few basic conventions:
|
||||
|
||||
- **Test names** should follow a pattern of `test_[module]_[tested behaviour]`. For example: `test_tokenizer_keeps_email` or `test_spans_override_sentiment`.
|
||||
- **Test names** should follow a pattern of `test_[module]_[tested behaviour]`. For example: `test_tokenizer_keeps_email`.
|
||||
- If you're testing for a bug reported in a specific issue, always create a **regression test**. Regression tests should be named `test_issue[ISSUE NUMBER]` and live in the [`regression`](regression) directory.
|
||||
- Only use `@pytest.mark.xfail` for tests that **should pass, but currently fail**. To test for desired negative behavior, use `assert not` in your test.
|
||||
- Very **extensive tests** that take a long time to run should be marked with `@pytest.mark.slow`. If your slow test is testing important behavior, consider adding an additional simpler version.
|
||||
|
|
|
@ -381,7 +381,7 @@ def test_doc_api_serialize(en_tokenizer, text):
|
|||
assert [t.orth for t in tokens] == [t.orth for t in new_tokens]
|
||||
|
||||
new_tokens = Doc(tokens.vocab).from_bytes(
|
||||
tokens.to_bytes(exclude=["sentiment"]), exclude=["sentiment"]
|
||||
tokens.to_bytes()
|
||||
)
|
||||
assert tokens.text == new_tokens.text
|
||||
assert [t.text for t in tokens] == [t.text for t in new_tokens]
|
||||
|
|
|
@ -304,32 +304,6 @@ def test_span_similarity_match():
|
|||
assert span1.similarity(doc) == 0.0
|
||||
assert span1[:1].similarity(doc.vocab["a"]) == 1.0
|
||||
|
||||
|
||||
def test_spans_default_sentiment(en_tokenizer):
|
||||
"""Test span.sentiment property's default averaging behaviour"""
|
||||
text = "good stuff bad stuff"
|
||||
tokens = en_tokenizer(text)
|
||||
tokens.vocab[tokens[0].text].sentiment = 3.0
|
||||
tokens.vocab[tokens[2].text].sentiment = -2.0
|
||||
doc = Doc(tokens.vocab, words=[t.text for t in tokens])
|
||||
assert doc[:2].sentiment == 3.0 / 2
|
||||
assert doc[-2:].sentiment == -2.0 / 2
|
||||
assert doc[:-1].sentiment == (3.0 + -2) / 3.0
|
||||
|
||||
|
||||
def test_spans_override_sentiment(en_tokenizer):
|
||||
"""Test span.sentiment property's default averaging behaviour"""
|
||||
text = "good stuff bad stuff"
|
||||
tokens = en_tokenizer(text)
|
||||
tokens.vocab[tokens[0].text].sentiment = 3.0
|
||||
tokens.vocab[tokens[2].text].sentiment = -2.0
|
||||
doc = Doc(tokens.vocab, words=[t.text for t in tokens])
|
||||
doc.user_span_hooks["sentiment"] = lambda span: 10.0
|
||||
assert doc[:2].sentiment == 10.0
|
||||
assert doc[-2:].sentiment == 10.0
|
||||
assert doc[:-1].sentiment == 10.0
|
||||
|
||||
|
||||
def test_spans_are_hashable(en_tokenizer):
|
||||
"""Test spans can be hashed."""
|
||||
text = "good stuff bad stuff"
|
||||
|
|
|
@ -50,8 +50,6 @@ def test_matcher_from_usage_docs(en_vocab):
|
|||
|
||||
def label_sentiment(matcher, doc, i, matches):
|
||||
match_id, start, end = matches[i]
|
||||
if doc.vocab.strings[match_id] == "HAPPY":
|
||||
doc.sentiment += 0.1
|
||||
span = doc[start:end]
|
||||
with doc.retokenize() as retokenizer:
|
||||
retokenizer.merge(span)
|
||||
|
@ -61,7 +59,6 @@ def test_matcher_from_usage_docs(en_vocab):
|
|||
matcher = Matcher(en_vocab)
|
||||
matcher.add("HAPPY", pos_patterns, on_match=label_sentiment)
|
||||
matcher(doc)
|
||||
assert doc.sentiment != 0
|
||||
assert doc[1].norm_ == "happy emoji"
|
||||
|
||||
|
||||
|
|
|
@ -48,8 +48,6 @@ cdef class Doc:
|
|||
|
||||
cdef TokenC* c
|
||||
|
||||
cdef public float sentiment
|
||||
|
||||
cdef public dict activations
|
||||
|
||||
cdef public dict user_hooks
|
||||
|
|
|
@ -21,7 +21,6 @@ class Doc:
|
|||
spans: SpanGroups
|
||||
max_length: int
|
||||
length: int
|
||||
sentiment: float
|
||||
activations: Dict[str, Dict[str, Union[ArrayXd, Ragged]]]
|
||||
cats: Dict[str, float]
|
||||
user_hooks: Dict[str, Callable[..., Any]]
|
||||
|
|
|
@ -243,7 +243,6 @@ cdef class Doc:
|
|||
self.c = data_start + PADDING
|
||||
self.max_length = size
|
||||
self.length = 0
|
||||
self.sentiment = 0.0
|
||||
self.cats = {}
|
||||
self.activations = {}
|
||||
self.user_hooks = {}
|
||||
|
@ -1270,7 +1269,6 @@ cdef class Doc:
|
|||
other.tensor = copy.deepcopy(self.tensor)
|
||||
other.cats = copy.deepcopy(self.cats)
|
||||
other.user_data = copy.deepcopy(self.user_data)
|
||||
other.sentiment = self.sentiment
|
||||
other.has_unknown_spaces = self.has_unknown_spaces
|
||||
other.user_hooks = dict(self.user_hooks)
|
||||
other.user_token_hooks = dict(self.user_token_hooks)
|
||||
|
@ -1367,7 +1365,6 @@ cdef class Doc:
|
|||
"text": lambda: self.text,
|
||||
"array_head": lambda: array_head,
|
||||
"array_body": lambda: self.to_array(array_head),
|
||||
"sentiment": lambda: self.sentiment,
|
||||
"tensor": lambda: self.tensor,
|
||||
"cats": lambda: self.cats,
|
||||
"spans": lambda: self.spans.to_bytes(),
|
||||
|
@ -1405,8 +1402,6 @@ cdef class Doc:
|
|||
for key, value in zip(user_data_keys, user_data_values):
|
||||
self.user_data[key] = value
|
||||
cdef int i, start, end, has_space
|
||||
if "sentiment" not in exclude and "sentiment" in msg:
|
||||
self.sentiment = msg["sentiment"]
|
||||
if "tensor" not in exclude and "tensor" in msg:
|
||||
self.tensor = msg["tensor"]
|
||||
if "cats" not in exclude and "cats" in msg:
|
||||
|
|
|
@ -82,8 +82,6 @@ class Span:
|
|||
@property
|
||||
def tensor(self) -> FloatsXd: ...
|
||||
@property
|
||||
def sentiment(self) -> float: ...
|
||||
@property
|
||||
def text(self) -> str: ...
|
||||
@property
|
||||
def text_with_ws(self) -> str: ...
|
||||
|
|
|
@ -566,16 +566,6 @@ cdef class Span:
|
|||
return None
|
||||
return self.doc.tensor[self.start : self.end]
|
||||
|
||||
@property
|
||||
def sentiment(self):
|
||||
"""RETURNS (float): A scalar value indicating the positivity or
|
||||
negativity of the span.
|
||||
"""
|
||||
if "sentiment" in self.doc.user_span_hooks:
|
||||
return self.doc.user_span_hooks["sentiment"](self)
|
||||
else:
|
||||
return sum([token.sentiment for token in self]) / len(self)
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
"""RETURNS (str): The original verbatim text of the span."""
|
||||
|
|
|
@ -79,8 +79,6 @@ class Token:
|
|||
@property
|
||||
def prob(self) -> float: ...
|
||||
@property
|
||||
def sentiment(self) -> float: ...
|
||||
@property
|
||||
def lang(self) -> int: ...
|
||||
@property
|
||||
def idx(self) -> int: ...
|
||||
|
|
|
@ -283,14 +283,6 @@ cdef class Token:
|
|||
"""RETURNS (float): Smoothed log probability estimate of token type."""
|
||||
return self.vocab[self.c.lex.orth].prob
|
||||
|
||||
@property
|
||||
def sentiment(self):
|
||||
"""RETURNS (float): A scalar value indicating the positivity or
|
||||
negativity of the token."""
|
||||
if "sentiment" in self.doc.user_token_hooks:
|
||||
return self.doc.user_token_hooks["sentiment"](self)
|
||||
return self.vocab[self.c.lex.orth].sentiment
|
||||
|
||||
@property
|
||||
def lang(self):
|
||||
"""RETURNS (uint64): ID of the language of the parent document's
|
||||
|
|
Loading…
Reference in New Issue
Block a user