mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-29 08:24:54 +03:00
remove sentiment from docs
This commit is contained in:
parent
83f518b412
commit
b00d4149a7
|
@ -761,7 +761,6 @@ The L2 norm of the document's vector representation.
|
||||||
| `user_data` | A generic storage area, for user custom data. ~~Dict[str, Any]~~ |
|
| `user_data` | A generic storage area, for user custom data. ~~Dict[str, Any]~~ |
|
||||||
| `lang` <Tag variant="new">2.1</Tag> | Language of the document's vocabulary. ~~int~~ |
|
| `lang` <Tag variant="new">2.1</Tag> | Language of the document's vocabulary. ~~int~~ |
|
||||||
| `lang_` <Tag variant="new">2.1</Tag> | Language of the document's vocabulary. ~~str~~ |
|
| `lang_` <Tag variant="new">2.1</Tag> | Language of the document's vocabulary. ~~str~~ |
|
||||||
| `sentiment` | The document's positivity/negativity score, if available. ~~float~~ |
|
|
||||||
| `user_hooks` | A dictionary that allows customization of the `Doc`'s properties. ~~Dict[str, Callable]~~ |
|
| `user_hooks` | A dictionary that allows customization of the `Doc`'s properties. ~~Dict[str, Callable]~~ |
|
||||||
| `user_token_hooks` | A dictionary that allows customization of properties of `Token` children. ~~Dict[str, Callable]~~ |
|
| `user_token_hooks` | A dictionary that allows customization of properties of `Token` children. ~~Dict[str, Callable]~~ |
|
||||||
| `user_span_hooks` | A dictionary that allows customization of properties of `Span` children. ~~Dict[str, Callable]~~ |
|
| `user_span_hooks` | A dictionary that allows customization of properties of `Span` children. ~~Dict[str, Callable]~~ |
|
||||||
|
@ -785,7 +784,6 @@ serialization by passing in the string names via the `exclude` argument.
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ------------------ | --------------------------------------------- |
|
| ------------------ | --------------------------------------------- |
|
||||||
| `text` | The value of the `Doc.text` attribute. |
|
| `text` | The value of the `Doc.text` attribute. |
|
||||||
| `sentiment` | The value of the `Doc.sentiment` attribute. |
|
|
||||||
| `tensor` | The value of the `Doc.tensor` attribute. |
|
| `tensor` | The value of the `Doc.tensor` attribute. |
|
||||||
| `user_data` | The value of the `Doc.user_data` dictionary. |
|
| `user_data` | The value of the `Doc.user_data` dictionary. |
|
||||||
| `user_data_keys` | The keys of the `Doc.user_data` dictionary. |
|
| `user_data_keys` | The keys of the `Doc.user_data` dictionary. |
|
||||||
|
|
|
@ -161,4 +161,3 @@ The L2 norm of the lexeme's vector representation.
|
||||||
| `lang_` | Language of the parent vocabulary. ~~str~~ |
|
| `lang_` | Language of the parent vocabulary. ~~str~~ |
|
||||||
| `prob` | Smoothed log probability estimate of the lexeme's word type (context-independent entry in the vocabulary). ~~float~~ |
|
| `prob` | Smoothed log probability estimate of the lexeme's word type (context-independent entry in the vocabulary). ~~float~~ |
|
||||||
| `cluster` | Brown cluster ID. ~~int~~ |
|
| `cluster` | Brown cluster ID. ~~int~~ |
|
||||||
| `sentiment` | A scalar value indicating the positivity or negativity of the lexeme. ~~float~~ |
|
|
||||||
|
|
|
@ -565,5 +565,4 @@ overlaps with will be returned.
|
||||||
| `ent_id_` | Alias for `id_`: the span's ID. ~~str~~ |
|
| `ent_id_` | Alias for `id_`: the span's ID. ~~str~~ |
|
||||||
| `id` | The hash value of the span's ID. ~~int~~ |
|
| `id` | The hash value of the span's ID. ~~int~~ |
|
||||||
| `id_` | The span's ID. ~~str~~ |
|
| `id_` | The span's ID. ~~str~~ |
|
||||||
| `sentiment` | A scalar value indicating the positivity or negativity of the span. ~~float~~ |
|
|
||||||
| `_` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes). ~~Underscore~~ |
|
| `_` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes). ~~Underscore~~ |
|
||||||
|
|
|
@ -470,7 +470,6 @@ The L2 norm of the token's vector representation.
|
||||||
| `lang_` | Language of the parent document's vocabulary. ~~str~~ |
|
| `lang_` | Language of the parent document's vocabulary. ~~str~~ |
|
||||||
| `prob` | Smoothed log probability estimate of token's word type (context-independent entry in the vocabulary). ~~float~~ |
|
| `prob` | Smoothed log probability estimate of token's word type (context-independent entry in the vocabulary). ~~float~~ |
|
||||||
| `idx` | The character offset of the token within the parent document. ~~int~~ |
|
| `idx` | The character offset of the token within the parent document. ~~int~~ |
|
||||||
| `sentiment` | A scalar value indicating the positivity or negativity of the token. ~~float~~ |
|
|
||||||
| `lex_id` | Sequential ID of the token's lexical type, used to index into tables, e.g. for word vectors. ~~int~~ |
|
| `lex_id` | Sequential ID of the token's lexical type, used to index into tables, e.g. for word vectors. ~~int~~ |
|
||||||
| `rank` | Sequential ID of the token's lexical type, used to index into tables, e.g. for word vectors. ~~int~~ |
|
| `rank` | Sequential ID of the token's lexical type, used to index into tables, e.g. for word vectors. ~~int~~ |
|
||||||
| `cluster` | Brown cluster ID. ~~int~~ |
|
| `cluster` | Brown cluster ID. ~~int~~ |
|
||||||
|
|
|
@ -1400,7 +1400,7 @@ separation and makes it easier to ensure backwards compatibility. For example,
|
||||||
if you've implemented your own `.coref` property and spaCy claims it one day,
|
if you've implemented your own `.coref` property and spaCy claims it one day,
|
||||||
it'll break your code. Similarly, just by looking at the code, you'll
|
it'll break your code. Similarly, just by looking at the code, you'll
|
||||||
immediately know what's built-in and what's custom – for example,
|
immediately know what's built-in and what's custom – for example,
|
||||||
`doc.sentiment` is spaCy, while `doc._.sent_score` isn't.
|
`doc.lang` is spaCy, while `doc._.language` isn't.
|
||||||
|
|
||||||
</Accordion>
|
</Accordion>
|
||||||
|
|
||||||
|
|
|
@ -777,6 +777,8 @@ whitespace, making them easy to match as well.
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.matcher import Matcher
|
from spacy.matcher import Matcher
|
||||||
|
|
||||||
|
Doc.set_extension("sentiment", default=0.0)
|
||||||
|
|
||||||
nlp = English() # We only want the tokenizer, so no need to load a pipeline
|
nlp = English() # We only want the tokenizer, so no need to load a pipeline
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
|
|
||||||
|
@ -791,9 +793,9 @@ neg_patterns = [[{"ORTH": emoji}] for emoji in neg_emoji]
|
||||||
def label_sentiment(matcher, doc, i, matches):
|
def label_sentiment(matcher, doc, i, matches):
|
||||||
match_id, start, end = matches[i]
|
match_id, start, end = matches[i]
|
||||||
if doc.vocab.strings[match_id] == "HAPPY": # Don't forget to get string!
|
if doc.vocab.strings[match_id] == "HAPPY": # Don't forget to get string!
|
||||||
doc.sentiment += 0.1 # Add 0.1 for positive sentiment
|
doc._.sentiment += 0.1 # Add 0.1 for positive sentiment
|
||||||
elif doc.vocab.strings[match_id] == "SAD":
|
elif doc.vocab.strings[match_id] == "SAD":
|
||||||
doc.sentiment -= 0.1 # Subtract 0.1 for negative sentiment
|
doc._.sentiment -= 0.1 # Subtract 0.1 for negative sentiment
|
||||||
|
|
||||||
matcher.add("HAPPY", pos_patterns, on_match=label_sentiment) # Add positive pattern
|
matcher.add("HAPPY", pos_patterns, on_match=label_sentiment) # Add positive pattern
|
||||||
matcher.add("SAD", neg_patterns, on_match=label_sentiment) # Add negative pattern
|
matcher.add("SAD", neg_patterns, on_match=label_sentiment) # Add negative pattern
|
||||||
|
@ -826,13 +828,14 @@ from emojipedia import Emojipedia # Installation: pip install emojipedia
|
||||||
from spacy.tokens import Span # Get the global Span object
|
from spacy.tokens import Span # Get the global Span object
|
||||||
|
|
||||||
Span.set_extension("emoji_desc", default=None) # Register the custom attribute
|
Span.set_extension("emoji_desc", default=None) # Register the custom attribute
|
||||||
|
Doc.set_extension("sentiment", default=0.0)
|
||||||
|
|
||||||
def label_sentiment(matcher, doc, i, matches):
|
def label_sentiment(matcher, doc, i, matches):
|
||||||
match_id, start, end = matches[i]
|
match_id, start, end = matches[i]
|
||||||
if doc.vocab.strings[match_id] == "HAPPY": # Don't forget to get string!
|
if doc.vocab.strings[match_id] == "HAPPY": # Don't forget to get string!
|
||||||
doc.sentiment += 0.1 # Add 0.1 for positive sentiment
|
doc._.sentiment += 0.1 # Add 0.1 for positive sentiment
|
||||||
elif doc.vocab.strings[match_id] == "SAD":
|
elif doc.vocab.strings[match_id] == "SAD":
|
||||||
doc.sentiment -= 0.1 # Subtract 0.1 for negative sentiment
|
doc._.sentiment -= 0.1 # Subtract 0.1 for negative sentiment
|
||||||
span = doc[start:end]
|
span = doc[start:end]
|
||||||
emoji = Emojipedia.search(span[0].text) # Get data for emoji
|
emoji = Emojipedia.search(span[0].text) # Get data for emoji
|
||||||
span._.emoji_desc = emoji.title # Assign emoji description
|
span._.emoji_desc = emoji.title # Assign emoji description
|
||||||
|
|
Loading…
Reference in New Issue
Block a user