remove sentiment from docs

2025-08-29 08:24:54 +03:00 · 2022-10-31 14:33:48 +01:00 · 2022-10-31 14:33:48 +01:00 · b00d4149a7
commit b00d4149a7
parent 83f518b412
6 changed files with 8 additions and 10 deletions
--- a/website/docs/api/doc.md
+++ b/website/docs/api/doc.md
@ -761,7 +761,6 @@ The L2 norm of the document's vector representation.
 | `user_data`                                | A generic storage area, for user custom data. ~~Dict[str, Any]~~                                                                               |
 | `lang` <Tag variant="new">2.1</Tag>        | Language of the document's vocabulary. ~~int~~                                                                                                 |
 | `lang_` <Tag variant="new">2.1</Tag>       | Language of the document's vocabulary. ~~str~~                                                                                                 |
-| `sentiment`                                | The document's positivity/negativity score, if available. ~~float~~                                                                            |
 | `user_hooks`                               | A dictionary that allows customization of the `Doc`'s properties. ~~Dict[str, Callable]~~                                                      |
 | `user_token_hooks`                         | A dictionary that allows customization of properties of `Token` children. ~~Dict[str, Callable]~~                                              |
 | `user_span_hooks`                          | A dictionary that allows customization of properties of `Span` children. ~~Dict[str, Callable]~~                                               |
@ -785,7 +784,6 @@ serialization by passing in the string names via the `exclude` argument.
 | Name               | Description                                   |
 | ------------------ | --------------------------------------------- |
 | `text`             | The value of the `Doc.text` attribute.        |
-| `sentiment`        | The value of the `Doc.sentiment` attribute.   |
 | `tensor`           | The value of the `Doc.tensor` attribute.      |
 | `user_data`        | The value of the `Doc.user_data` dictionary.  |
 | `user_data_keys`   | The keys of the `Doc.user_data` dictionary.   |
--- a/website/docs/api/lexeme.md
+++ b/website/docs/api/lexeme.md
@ -161,4 +161,3 @@ The L2 norm of the lexeme's vector representation.
 | `lang_`                                      | Language of the parent vocabulary. ~~str~~                                                                                                                                                                                                                           |
 | `prob`                                       | Smoothed log probability estimate of the lexeme's word type (context-independent entry in the vocabulary). ~~float~~                                                                                                                                                 |
 | `cluster`                                    | Brown cluster ID. ~~int~~                                                                                                                                                                                                                                            |
-| `sentiment`                                  | A scalar value indicating the positivity or negativity of the lexeme. ~~float~~                                                                                                                                                                                      |
--- a/website/docs/api/span.md
+++ b/website/docs/api/span.md
@ -565,5 +565,4 @@ overlaps with will be returned.
 | `ent_id_`                               | Alias for `id_`: the span's ID. ~~str~~                                                                                       |
 | `id`                                    | The hash value of the span's ID. ~~int~~                                                                                      |
 | `id_`                                   | The span's ID. ~~str~~                                                                                                        |
-| `sentiment`                             | A scalar value indicating the positivity or negativity of the span. ~~float~~                                                 |
 | `_`                                     | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes). ~~Underscore~~ |
--- a/website/docs/api/token.md
+++ b/website/docs/api/token.md
@ -470,7 +470,6 @@ The L2 norm of the token's vector representation.
 | `lang_`                                      | Language of the parent document's vocabulary. ~~str~~                                                                                                                                                                                                                |
 | `prob`                                       | Smoothed log probability estimate of token's word type (context-independent entry in the vocabulary). ~~float~~                                                                                                                                                      |
 | `idx`                                        | The character offset of the token within the parent document. ~~int~~                                                                                                                                                                                                |
-| `sentiment`                                  | A scalar value indicating the positivity or negativity of the token. ~~float~~                                                                                                                                                                                       |
 | `lex_id`                                     | Sequential ID of the token's lexical type, used to index into tables, e.g. for word vectors. ~~int~~                                                                                                                                                                 |
 | `rank`                                       | Sequential ID of the token's lexical type, used to index into tables, e.g. for word vectors. ~~int~~                                                                                                                                                                 |
 | `cluster`                                    | Brown cluster ID. ~~int~~                                                                                                                                                                                                                                            |
--- a/website/docs/usage/processing-pipelines.md
+++ b/website/docs/usage/processing-pipelines.md
@ -1400,7 +1400,7 @@ separation and makes it easier to ensure backwards compatibility. For example,
 if you've implemented your own `.coref` property and spaCy claims it one day,
 it'll break your code. Similarly, just by looking at the code, you'll
 immediately know what's built-in and what's custom – for example,
-`doc.sentiment` is spaCy, while `doc._.sent_score` isn't.
+`doc.lang` is spaCy, while `doc._.language` isn't.

 </Accordion>

--- a/website/docs/usage/rule-based-matching.md
+++ b/website/docs/usage/rule-based-matching.md
@ -777,6 +777,8 @@ whitespace, making them easy to match as well.
 from spacy.lang.en import English
 from spacy.matcher import Matcher

+Doc.set_extension("sentiment", default=0.0)
+
 nlp = English()  # We only want the tokenizer, so no need to load a pipeline
 matcher = Matcher(nlp.vocab)

@ -791,9 +793,9 @@ neg_patterns = [[{"ORTH": emoji}] for emoji in neg_emoji]
 def label_sentiment(matcher, doc, i, matches):
    match_id, start, end = matches[i]
    if doc.vocab.strings[match_id] == "HAPPY":  # Don't forget to get string!
-        doc.sentiment += 0.1  # Add 0.1 for positive sentiment
+        doc._.sentiment += 0.1  # Add 0.1 for positive sentiment
    elif doc.vocab.strings[match_id] == "SAD":
-        doc.sentiment -= 0.1  # Subtract 0.1 for negative sentiment
+        doc._.sentiment -= 0.1  # Subtract 0.1 for negative sentiment

 matcher.add("HAPPY", pos_patterns, on_match=label_sentiment)  # Add positive pattern
 matcher.add("SAD", neg_patterns, on_match=label_sentiment)  # Add negative pattern
@ -826,13 +828,14 @@ from emojipedia import Emojipedia  # Installation: pip install emojipedia
 from spacy.tokens import Span  # Get the global Span object

 Span.set_extension("emoji_desc", default=None)  # Register the custom attribute
+Doc.set_extension("sentiment", default=0.0)

 def label_sentiment(matcher, doc, i, matches):
    match_id, start, end = matches[i]
    if doc.vocab.strings[match_id] == "HAPPY":  # Don't forget to get string!
-        doc.sentiment += 0.1  # Add 0.1 for positive sentiment
+        doc._.sentiment += 0.1  # Add 0.1 for positive sentiment
    elif doc.vocab.strings[match_id] == "SAD":
-        doc.sentiment -= 0.1  # Subtract 0.1 for negative sentiment
+        doc._.sentiment -= 0.1  # Subtract 0.1 for negative sentiment
    span = doc[start:end]
    emoji = Emojipedia.search(span[0].text)  # Get data for emoji
    span._.emoji_desc = emoji.title  # Assign emoji description