mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Add logistic regression sentiment analysis
This commit is contained in:
parent
dd9bfa8d33
commit
422e383d8f
21
README.md
21
README.md
|
@ -227,6 +227,9 @@ nlp = en_core_web_sm.load()
|
|||
doc = nlp("This is a sentence.")
|
||||
```
|
||||
|
||||
📖 **For more info and examples, check out the
|
||||
[models documentation](https://spacy.io/docs/usage/models).**
|
||||
|
||||
## 📊 Custom Sentiment Analysis with Logistic Regression (spaCy-based)
|
||||
This repository also includes a custom **Logistic Regression** sentiment analysis model built using spaCy, without using scikit-learn. The model classifies text as positive or negative based on a dataset such as IMDb reviews.
|
||||
|
||||
|
@ -234,24 +237,28 @@ This repository also includes a custom **Logistic Regression** sentiment analysi
|
|||
To run the logistic regression model:
|
||||
```bash
|
||||
python pure_Logistic.py
|
||||
```This script processes the dataset using spaCy, trains the logistic regression model, and outputs the results.
|
||||
|
||||
```
|
||||
This script processes the dataset using spaCy, trains the logistic regression model, and outputs the results.
|
||||
### Testing and Evaluation
|
||||
To run tests and evaluate the model's performance:
|
||||
To run tests and evaluate the model's performance, use:
|
||||
|
||||
```bash
|
||||
python test_pure_logistic.py
|
||||
```
|
||||
|
||||
In your test script, import the PureLogisticTextCategorizer class for evaluation:
|
||||
To use the model in your own code:
|
||||
In your test script,
|
||||
```bash
|
||||
import the PureLogisticTextCategorizer class for evaluation:
|
||||
from pure_Logistic import PureLogisticTextCategorizer
|
||||
```
|
||||
|
||||
# Initialize and use the classifier
|
||||
categorizer = PureLogisticTextCategorizer()
|
||||
```
|
||||
This enables you to evaluate the logistic regression classifier on your test cases.
|
||||
|
||||
|
||||
📖 **For more info and examples, check out the
|
||||
[models documentation](https://spacy.io/docs/usage/models).**
|
||||
|
||||
## ⚒ Compile from source
|
||||
|
||||
The other way to install spaCy is to clone its
|
||||
|
|
138
spacy/pipeline/logreg/examples/evaluate_textcat.py
Normal file
138
spacy/pipeline/logreg/examples/evaluate_textcat.py
Normal file
|
@ -0,0 +1,138 @@
|
|||
import spacy
|
||||
from spacy.training import Example
|
||||
from spacy.tokens import Doc
|
||||
from typing import Dict, List
|
||||
|
||||
# Import the custom logistic classifier
|
||||
from pure_Logistic import make_pure_logistic_textcat
|
||||
|
||||
|
||||
# Registering the custom extension 'textcat' to store predictions
|
||||
if not Doc.has_extension("textcat"):
|
||||
Doc.set_extension("textcat", default={})
|
||||
|
||||
|
||||
# Sample training and testing data
|
||||
TRAIN_DATA = [
|
||||
("This product is amazing! I love it.", {"cats": {"positive": 1.0, "negative": 0.0}}),
|
||||
("The service was excellent and staff very friendly.", {"cats": {"positive": 1.0, "negative": 0.0}}),
|
||||
("I'm really impressed with the quality.", {"cats": {"positive": 1.0, "negative": 0.0}}),
|
||||
("Best purchase I've made in years!", {"cats": {"positive": 1.0, "negative": 0.0}}),
|
||||
("The features work exactly as advertised.", {"cats": {"positive": 1.0, "negative": 0.0}}),
|
||||
("This is terrible, complete waste of money.", {"cats": {"positive": 0.0, "negative": 1.0}}),
|
||||
("Poor customer service, very disappointing.", {"cats": {"positive": 0.0, "negative": 1.0}}),
|
||||
("The product broke after one week.", {"cats": {"positive": 0.0, "negative": 1.0}}),
|
||||
("Would not recommend to anyone.", {"cats": {"positive": 0.0, "negative": 1.0}}),
|
||||
("Save your money and avoid this.", {"cats": {"positive": 0.0, "negative": 1.0}})
|
||||
]
|
||||
|
||||
TEST_DATA = [
|
||||
("Great product, highly recommend!", {"cats": {"positive": 1.0, "negative": 0.0}}),
|
||||
("Not worth the price at all.", {"cats": {"positive": 0.0, "negative": 1.0}}),
|
||||
("Everything works perfectly.", {"cats": {"positive": 1.0, "negative": 0.0}}),
|
||||
("Disappointed with the results.", {"cats": {"positive": 0.0, "negative": 1.0}})
|
||||
]
|
||||
|
||||
def calculate_metrics(true_positives: int, true_negatives: int, false_positives: int, false_negatives: int) -> Dict[str, float]:
|
||||
"""Calculate evaluation metrics based on counts."""
|
||||
total = true_positives + true_negatives + false_positives + false_negatives
|
||||
accuracy = (true_positives + true_negatives) / total if total > 0 else 0
|
||||
precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
|
||||
recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
|
||||
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
|
||||
|
||||
return {
|
||||
"accuracy": accuracy,
|
||||
"precision": precision,
|
||||
"recall": recall,
|
||||
"f1": f1
|
||||
}
|
||||
|
||||
def evaluate_model(nlp, test_data):
|
||||
"""Evaluate the model using the test data."""
|
||||
true_positives = true_negatives = false_positives = false_negatives = 0
|
||||
predictions = []
|
||||
|
||||
for text, annotations in test_data:
|
||||
doc = nlp(text)
|
||||
true_cats = annotations["cats"]
|
||||
pred_cats = doc._.textcat # Predictions from the custom model
|
||||
|
||||
# Extract scores for 'positive' and 'negative'
|
||||
pred_positive_score = pred_cats["positive"] if "positive" in pred_cats else 0.0
|
||||
true_positive_score = true_cats.get("positive", 0.0)
|
||||
|
||||
pred_positive = float(pred_positive_score) > 0.5
|
||||
true_positive = float(true_positive_score) > 0.5
|
||||
|
||||
# Update counts based on predictions
|
||||
if true_positive and pred_positive:
|
||||
true_positives += 1
|
||||
elif not true_positive and not pred_positive:
|
||||
true_negatives += 1
|
||||
elif not true_positive and pred_positive:
|
||||
false_positives += 1
|
||||
else:
|
||||
false_negatives += 1
|
||||
|
||||
predictions.append({
|
||||
"text": text,
|
||||
"true": "positive" if true_positive else "negative",
|
||||
"predicted": "positive" if pred_positive else "negative",
|
||||
"scores": pred_cats
|
||||
})
|
||||
|
||||
metrics = calculate_metrics(true_positives, true_negatives, false_positives, false_negatives)
|
||||
return metrics, predictions
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
print("Loading spaCy model...")
|
||||
nlp = spacy.load("en_core_web_lg")
|
||||
except OSError:
|
||||
print("Downloading spaCy model...")
|
||||
spacy.cli.download("en_core_web_lg")
|
||||
nlp = spacy.load("en_core_web_lg")
|
||||
|
||||
print("Adding custom text categorizer...")
|
||||
config = {
|
||||
"learning_rate": 0.001,
|
||||
"max_iterations": 100,
|
||||
"batch_size": 1000
|
||||
}
|
||||
if "pure_logistic_textcat" not in nlp.pipe_names:
|
||||
textcat = nlp.add_pipe("pure_logistic_textcat", config=config)
|
||||
textcat.labels = {"positive", "negative"}
|
||||
|
||||
print("Preparing training examples...")
|
||||
train_examples = []
|
||||
for text, annotations in TRAIN_DATA:
|
||||
doc = nlp.make_doc(text)
|
||||
example = Example.from_dict(doc, annotations)
|
||||
train_examples.append(example)
|
||||
|
||||
print("Training the model...")
|
||||
textcat = nlp.get_pipe("pure_logistic_textcat")
|
||||
losses = textcat.update(train_examples)
|
||||
print(f"Training losses: {losses}")
|
||||
|
||||
print("\nEvaluating the model...")
|
||||
metrics, predictions = evaluate_model(nlp, TEST_DATA)
|
||||
|
||||
print("\nEvaluation Metrics:")
|
||||
print(f"Accuracy: {metrics['accuracy']:.3f}")
|
||||
print(f"Precision: {metrics['precision']:.3f}")
|
||||
print(f"Recall: {metrics['recall']:.3f}")
|
||||
print(f"F1 Score: {metrics['f1']:.3f}")
|
||||
|
||||
print("\nDetailed Predictions:")
|
||||
for pred in predictions:
|
||||
print(f"\nText: {pred['text']}")
|
||||
print(f"True label: {pred['true']}")
|
||||
print(f"Predicted: {pred['predicted']}")
|
||||
print(f"Positive score: {pred['scores']['positive']:.3f}")
|
||||
print(f"Negative score: {pred['scores']['negative']:.3f}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
443
spacy/pipeline/logreg/myenv/Scripts/Activate.ps1
Normal file
443
spacy/pipeline/logreg/myenv/Scripts/Activate.ps1
Normal file
|
@ -0,0 +1,443 @@
|
|||
<#
|
||||
.Synopsis
|
||||
Activate a Python virtual environment for the current PowerShell session.
|
||||
|
||||
.Description
|
||||
Pushes the python executable for a virtual environment to the front of the
|
||||
$Env:PATH environment variable and sets the prompt to signify that you are
|
||||
in a Python virtual environment. Makes use of the command line switches as
|
||||
well as the `pyvenv.cfg` file values present in the virtual environment.
|
||||
|
||||
.Parameter VenvDir
|
||||
Path to the directory that contains the virtual environment to activate. The
|
||||
default value for this is the parent of the directory that the Activate.ps1
|
||||
script is located within.
|
||||
|
||||
.Parameter Prompt
|
||||
The prompt prefix to display when this virtual environment is activated. By
|
||||
default, this prompt is the name of the virtual environment folder (VenvDir)
|
||||
surrounded by parentheses and followed by a single space (ie. '(.venv) ').
|
||||
|
||||
.Example
|
||||
Activate.ps1
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -Verbose
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script,
|
||||
and shows extra information about the activation as it executes.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
|
||||
Activates the Python virtual environment located in the specified location.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -Prompt "MyPython"
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script,
|
||||
and prefixes the current prompt with the specified string (surrounded in
|
||||
parentheses) while the virtual environment is active.
|
||||
|
||||
.Notes
|
||||
On Windows, it may be required to enable this Activate.ps1 script by setting the
|
||||
execution policy for the user. You can do this by issuing the following PowerShell
|
||||
command:
|
||||
|
||||
PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
|
||||
|
||||
For more information on Execution Policies:
|
||||
https://go.microsoft.com/fwlink/?LinkID=135170
|
||||
|
||||
#>
|
||||
Param(
|
||||
[Parameter(Mandatory = $false)]
|
||||
[String]
|
||||
$VenvDir,
|
||||
[Parameter(Mandatory = $false)]
|
||||
[String]
|
||||
$Prompt
|
||||
)
|
||||
|
||||
<# Function declarations --------------------------------------------------- #>
|
||||
|
||||
<#
|
||||
.Synopsis
|
||||
Remove all shell session elements added by the Activate script, including the
|
||||
addition of the virtual environment's Python executable from the beginning of
|
||||
the PATH variable.
|
||||
|
||||
.Parameter NonDestructive
|
||||
If present, do not remove this function from the global namespace for the
|
||||
session.
|
||||
|
||||
#>
|
||||
function global:deactivate ([switch]$NonDestructive) {
|
||||
# Revert to original values
|
||||
|
||||
# The prior prompt:
|
||||
if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
|
||||
Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
|
||||
Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
|
||||
}
|
||||
|
||||
# The prior PYTHONHOME:
|
||||
if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
|
||||
Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
|
||||
Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
|
||||
}
|
||||
|
||||
# The prior PATH:
|
||||
if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
|
||||
Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
|
||||
Remove-Item -Path Env:_OLD_VIRTUAL_PATH
|
||||
}
|
||||
|
||||
# Just remove the VIRTUAL_ENV altogether:
|
||||
if (Test-Path -Path Env:VIRTUAL_ENV) {
|
||||
Remove-Item -Path env:VIRTUAL_ENV
|
||||
}
|
||||
|
||||
# Just remove VIRTUAL_ENV_PROMPT altogether.
|
||||
if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
|
||||
Remove-Item -Path env:VIRTUAL_ENV_PROMPT
|
||||
}
|
||||
|
||||
# Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
|
||||
if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
|
||||
Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
|
||||
}
|
||||
|
||||
# Leave deactivate function in the global namespace if requested:
|
||||
if (-not $NonDestructive) {
|
||||
Remove-Item -Path function:deactivate
|
||||
}
|
||||
}
|
||||
|
||||
<#
|
||||
.Description
|
||||
Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
|
||||
given folder, and returns them in a map.
|
||||
|
||||
For each line in the pyvenv.cfg file, if that line can be parsed into exactly
|
||||
two strings separated by `=` (with any amount of whitespace surrounding the =)
|
||||
then it is considered a `key = value` line. The left hand string is the key,
|
||||
the right hand is the value.
|
||||
|
||||
If the value starts with a `'` or a `"` then the first and last character is
|
||||
stripped from the value before being captured.
|
||||
|
||||
.Parameter ConfigDir
|
||||
Path to the directory that contains the `pyvenv.cfg` file.
|
||||
#>
|
||||
function Get-PyVenvConfig(
|
||||
[String]
|
||||
$ConfigDir
|
||||
) {
|
||||
Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
|
||||
|
||||
# Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
|
||||
$pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
|
||||
|
||||
# An empty map will be returned if no config file is found.
|
||||
$pyvenvConfig = @{ }
|
||||
|
||||
if ($pyvenvConfigPath) {
|
||||
|
||||
Write-Verbose "File exists, parse `key = value` lines"
|
||||
$pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
|
||||
|
||||
$pyvenvConfigContent | ForEach-Object {
|
||||
$keyval = $PSItem -split "\s*=\s*", 2
|
||||
if ($keyval[0] -and $keyval[1]) {
|
||||
$val = $keyval[1]
|
||||
|
||||
# Remove extraneous quotations around a string value.
|
||||
if ("'""".Contains($val.Substring(0, 1))) {
|
||||
$val = $val.Substring(1, $val.Length - 2)
|
||||
}
|
||||
|
||||
$pyvenvConfig[$keyval[0]] = $val
|
||||
Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
|
||||
}
|
||||
}
|
||||
}
|
||||
return $pyvenvConfig
|
||||
}
|
||||
|
||||
|
||||
<# Begin Activate script --------------------------------------------------- #>
|
||||
|
||||
# Determine the containing directory of this script
|
||||
$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
||||
$VenvExecDir = Get-Item -Path $VenvExecPath
|
||||
|
||||
Write-Verbose "Activation script is located in path: '$VenvExecPath'"
|
||||
Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
|
||||
Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
|
||||
|
||||
# Set values required in priority: CmdLine, ConfigFile, Default
|
||||
# First, get the location of the virtual environment, it might not be
|
||||
# VenvExecDir if specified on the command line.
|
||||
if ($VenvDir) {
|
||||
Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
|
||||
}
|
||||
else {
|
||||
Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
|
||||
$VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
|
||||
Write-Verbose "VenvDir=$VenvDir"
|
||||
}
|
||||
|
||||
# Next, read the `pyvenv.cfg` file to determine any required value such
|
||||
# as `prompt`.
|
||||
$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
|
||||
|
||||
# Next, set the prompt from the command line, or the config file, or
|
||||
# just use the name of the virtual environment folder.
|
||||
if ($Prompt) {
|
||||
Write-Verbose "Prompt specified as argument, using '$Prompt'"
|
||||
}
|
||||
else {
|
||||
Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
|
||||
if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
|
||||
Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
|
||||
$Prompt = $pyvenvCfg['prompt'];
|
||||
}
|
||||
else {
|
||||
Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
|
||||
Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
|
||||
$Prompt = Split-Path -Path $venvDir -Leaf
|
||||
}
|
||||
}
|
||||
|
||||
Write-Verbose "Prompt = '$Prompt'"
|
||||
Write-Verbose "VenvDir='$VenvDir'"
|
||||
|
||||
# Deactivate any currently active virtual environment, but leave the
|
||||
# deactivate function in place.
|
||||
deactivate -nondestructive
|
||||
|
||||
# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
|
||||
# that there is an activated venv.
|
||||
$env:VIRTUAL_ENV = $VenvDir
|
||||
|
||||
if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
|
||||
|
||||
Write-Verbose "Setting prompt to '$Prompt'"
|
||||
|
||||
# Set the prompt to include the env name
|
||||
# Make sure _OLD_VIRTUAL_PROMPT is global
|
||||
function global:_OLD_VIRTUAL_PROMPT { "" }
|
||||
Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
|
||||
New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
|
||||
|
||||
function global:prompt {
|
||||
Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
|
||||
_OLD_VIRTUAL_PROMPT
|
||||
}
|
||||
$env:VIRTUAL_ENV_PROMPT = $Prompt
|
||||
}
|
||||
|
||||
# Clear PYTHONHOME
|
||||
if (Test-Path -Path Env:PYTHONHOME) {
|
||||
Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
|
||||
Remove-Item -Path Env:PYTHONHOME
|
||||
}
|
||||
|
||||
# Add the venv to the PATH
|
||||
Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
|
||||
$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
|
||||
|
||||
# SIG # Begin signature block
|
||||
# MIIj/wYJKoZIhvcNAQcCoIIj8DCCI+wCAQExDzANBglghkgBZQMEAgEFADB5Bgor
|
||||
# BgEEAYI3AgEEoGswaTA0BgorBgEEAYI3AgEeMCYCAwEAAAQQH8w7YFlLCE63JNLG
|
||||
# KX7zUQIBAAIBAAIBAAIBAAIBADAxMA0GCWCGSAFlAwQCAQUABCBnL745ElCYk8vk
|
||||
# dBtMuQhLeWJ3ZGfzKW4DHCYzAn+QB6CCDi8wggawMIIEmKADAgECAhAIrUCyYNKc
|
||||
# TJ9ezam9k67ZMA0GCSqGSIb3DQEBDAUAMGIxCzAJBgNVBAYTAlVTMRUwEwYDVQQK
|
||||
# EwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xITAfBgNV
|
||||
# BAMTGERpZ2lDZXJ0IFRydXN0ZWQgUm9vdCBHNDAeFw0yMTA0MjkwMDAwMDBaFw0z
|
||||
# NjA0MjgyMzU5NTlaMGkxCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5EaWdpQ2VydCwg
|
||||
# SW5jLjFBMD8GA1UEAxM4RGlnaUNlcnQgVHJ1c3RlZCBHNCBDb2RlIFNpZ25pbmcg
|
||||
# UlNBNDA5NiBTSEEzODQgMjAyMSBDQTEwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAw
|
||||
# ggIKAoICAQDVtC9C0CiteLdd1TlZG7GIQvUzjOs9gZdwxbvEhSYwn6SOaNhc9es0
|
||||
# JAfhS0/TeEP0F9ce2vnS1WcaUk8OoVf8iJnBkcyBAz5NcCRks43iCH00fUyAVxJr
|
||||
# Q5qZ8sU7H/Lvy0daE6ZMswEgJfMQ04uy+wjwiuCdCcBlp/qYgEk1hz1RGeiQIXhF
|
||||
# LqGfLOEYwhrMxe6TSXBCMo/7xuoc82VokaJNTIIRSFJo3hC9FFdd6BgTZcV/sk+F
|
||||
# LEikVoQ11vkunKoAFdE3/hoGlMJ8yOobMubKwvSnowMOdKWvObarYBLj6Na59zHh
|
||||
# 3K3kGKDYwSNHR7OhD26jq22YBoMbt2pnLdK9RBqSEIGPsDsJ18ebMlrC/2pgVItJ
|
||||
# wZPt4bRc4G/rJvmM1bL5OBDm6s6R9b7T+2+TYTRcvJNFKIM2KmYoX7BzzosmJQay
|
||||
# g9Rc9hUZTO1i4F4z8ujo7AqnsAMrkbI2eb73rQgedaZlzLvjSFDzd5Ea/ttQokbI
|
||||
# YViY9XwCFjyDKK05huzUtw1T0PhH5nUwjewwk3YUpltLXXRhTT8SkXbev1jLchAp
|
||||
# QfDVxW0mdmgRQRNYmtwmKwH0iU1Z23jPgUo+QEdfyYFQc4UQIyFZYIpkVMHMIRro
|
||||
# OBl8ZhzNeDhFMJlP/2NPTLuqDQhTQXxYPUez+rbsjDIJAsxsPAxWEQIDAQABo4IB
|
||||
# WTCCAVUwEgYDVR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUaDfg67Y7+F8Rhvv+
|
||||
# YXsIiGX0TkIwHwYDVR0jBBgwFoAU7NfjgtJxXWRM3y5nP+e6mK4cD08wDgYDVR0P
|
||||
# AQH/BAQDAgGGMBMGA1UdJQQMMAoGCCsGAQUFBwMDMHcGCCsGAQUFBwEBBGswaTAk
|
||||
# BggrBgEFBQcwAYYYaHR0cDovL29jc3AuZGlnaWNlcnQuY29tMEEGCCsGAQUFBzAC
|
||||
# hjVodHRwOi8vY2FjZXJ0cy5kaWdpY2VydC5jb20vRGlnaUNlcnRUcnVzdGVkUm9v
|
||||
# dEc0LmNydDBDBgNVHR8EPDA6MDigNqA0hjJodHRwOi8vY3JsMy5kaWdpY2VydC5j
|
||||
# b20vRGlnaUNlcnRUcnVzdGVkUm9vdEc0LmNybDAcBgNVHSAEFTATMAcGBWeBDAED
|
||||
# MAgGBmeBDAEEATANBgkqhkiG9w0BAQwFAAOCAgEAOiNEPY0Idu6PvDqZ01bgAhql
|
||||
# +Eg08yy25nRm95RysQDKr2wwJxMSnpBEn0v9nqN8JtU3vDpdSG2V1T9J9Ce7FoFF
|
||||
# UP2cvbaF4HZ+N3HLIvdaqpDP9ZNq4+sg0dVQeYiaiorBtr2hSBh+3NiAGhEZGM1h
|
||||
# mYFW9snjdufE5BtfQ/g+lP92OT2e1JnPSt0o618moZVYSNUa/tcnP/2Q0XaG3Ryw
|
||||
# YFzzDaju4ImhvTnhOE7abrs2nfvlIVNaw8rpavGiPttDuDPITzgUkpn13c5Ubdld
|
||||
# AhQfQDN8A+KVssIhdXNSy0bYxDQcoqVLjc1vdjcshT8azibpGL6QB7BDf5WIIIJw
|
||||
# 8MzK7/0pNVwfiThV9zeKiwmhywvpMRr/LhlcOXHhvpynCgbWJme3kuZOX956rEnP
|
||||
# LqR0kq3bPKSchh/jwVYbKyP/j7XqiHtwa+aguv06P0WmxOgWkVKLQcBIhEuWTatE
|
||||
# QOON8BUozu3xGFYHKi8QxAwIZDwzj64ojDzLj4gLDb879M4ee47vtevLt/B3E+bn
|
||||
# KD+sEq6lLyJsQfmCXBVmzGwOysWGw/YmMwwHS6DTBwJqakAwSEs0qFEgu60bhQji
|
||||
# WQ1tygVQK+pKHJ6l/aCnHwZ05/LWUpD9r4VIIflXO7ScA+2GRfS0YW6/aOImYIbq
|
||||
# yK+p/pQd52MbOoZWeE4wggd3MIIFX6ADAgECAhAHHxQbizANJfMU6yMM0NHdMA0G
|
||||
# CSqGSIb3DQEBCwUAMGkxCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5EaWdpQ2VydCwg
|
||||
# SW5jLjFBMD8GA1UEAxM4RGlnaUNlcnQgVHJ1c3RlZCBHNCBDb2RlIFNpZ25pbmcg
|
||||
# UlNBNDA5NiBTSEEzODQgMjAyMSBDQTEwHhcNMjIwMTE3MDAwMDAwWhcNMjUwMTE1
|
||||
# MjM1OTU5WjB8MQswCQYDVQQGEwJVUzEPMA0GA1UECBMGT3JlZ29uMRIwEAYDVQQH
|
||||
# EwlCZWF2ZXJ0b24xIzAhBgNVBAoTGlB5dGhvbiBTb2Z0d2FyZSBGb3VuZGF0aW9u
|
||||
# MSMwIQYDVQQDExpQeXRob24gU29mdHdhcmUgRm91bmRhdGlvbjCCAiIwDQYJKoZI
|
||||
# hvcNAQEBBQADggIPADCCAgoCggIBAKgc0BTT+iKbtK6f2mr9pNMUTcAJxKdsuOiS
|
||||
# YgDFfwhjQy89koM7uP+QV/gwx8MzEt3c9tLJvDccVWQ8H7mVsk/K+X+IufBLCgUi
|
||||
# 0GGAZUegEAeRlSXxxhYScr818ma8EvGIZdiSOhqjYc4KnfgfIS4RLtZSrDFG2tN1
|
||||
# 6yS8skFa3IHyvWdbD9PvZ4iYNAS4pjYDRjT/9uzPZ4Pan+53xZIcDgjiTwOh8VGu
|
||||
# ppxcia6a7xCyKoOAGjvCyQsj5223v1/Ig7Dp9mGI+nh1E3IwmyTIIuVHyK6Lqu35
|
||||
# 2diDY+iCMpk9ZanmSjmB+GMVs+H/gOiofjjtf6oz0ki3rb7sQ8fTnonIL9dyGTJ0
|
||||
# ZFYKeb6BLA66d2GALwxZhLe5WH4Np9HcyXHACkppsE6ynYjTOd7+jN1PRJahN1oE
|
||||
# RzTzEiV6nCO1M3U1HbPTGyq52IMFSBM2/07WTJSbOeXjvYR7aUxK9/ZkJiacl2iZ
|
||||
# I7IWe7JKhHohqKuceQNyOzxTakLcRkzynvIrk33R9YVqtB4L6wtFxhUjvDnQg16x
|
||||
# ot2KVPdfyPAWd81wtZADmrUtsZ9qG79x1hBdyOl4vUtVPECuyhCxaw+faVjumapP
|
||||
# Unwo8ygflJJ74J+BYxf6UuD7m8yzsfXWkdv52DjL74TxzuFTLHPyARWCSCAbzn3Z
|
||||
# Ily+qIqDAgMBAAGjggIGMIICAjAfBgNVHSMEGDAWgBRoN+Drtjv4XxGG+/5hewiI
|
||||
# ZfROQjAdBgNVHQ4EFgQUt/1Teh2XDuUj2WW3siYWJgkZHA8wDgYDVR0PAQH/BAQD
|
||||
# AgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMDMIG1BgNVHR8Ega0wgaowU6BRoE+GTWh0
|
||||
# dHA6Ly9jcmwzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydFRydXN0ZWRHNENvZGVTaWdu
|
||||
# aW5nUlNBNDA5NlNIQTM4NDIwMjFDQTEuY3JsMFOgUaBPhk1odHRwOi8vY3JsNC5k
|
||||
# aWdpY2VydC5jb20vRGlnaUNlcnRUcnVzdGVkRzRDb2RlU2lnbmluZ1JTQTQwOTZT
|
||||
# SEEzODQyMDIxQ0ExLmNybDA+BgNVHSAENzA1MDMGBmeBDAEEATApMCcGCCsGAQUF
|
||||
# BwIBFhtodHRwOi8vd3d3LmRpZ2ljZXJ0LmNvbS9DUFMwgZQGCCsGAQUFBwEBBIGH
|
||||
# MIGEMCQGCCsGAQUFBzABhhhodHRwOi8vb2NzcC5kaWdpY2VydC5jb20wXAYIKwYB
|
||||
# BQUHMAKGUGh0dHA6Ly9jYWNlcnRzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydFRydXN0
|
||||
# ZWRHNENvZGVTaWduaW5nUlNBNDA5NlNIQTM4NDIwMjFDQTEuY3J0MAwGA1UdEwEB
|
||||
# /wQCMAAwDQYJKoZIhvcNAQELBQADggIBABxv4AeV/5ltkELHSC63fXAFYS5tadcW
|
||||
# TiNc2rskrNLrfH1Ns0vgSZFoQxYBFKI159E8oQQ1SKbTEubZ/B9kmHPhprHya08+
|
||||
# VVzxC88pOEvz68nA82oEM09584aILqYmj8Pj7h/kmZNzuEL7WiwFa/U1hX+XiWfL
|
||||
# IJQsAHBla0i7QRF2de8/VSF0XXFa2kBQ6aiTsiLyKPNbaNtbcucaUdn6vVUS5izW
|
||||
# OXM95BSkFSKdE45Oq3FForNJXjBvSCpwcP36WklaHL+aHu1upIhCTUkzTHMh8b86
|
||||
# WmjRUqbrnvdyR2ydI5l1OqcMBjkpPpIV6wcc+KY/RH2xvVuuoHjlUjwq2bHiNoX+
|
||||
# W1scCpnA8YTs2d50jDHUgwUo+ciwpffH0Riq132NFmrH3r67VaN3TuBxjI8SIZM5
|
||||
# 8WEDkbeoriDk3hxU8ZWV7b8AW6oyVBGfM06UgkfMb58h+tJPrFx8VI/WLq1dTqMf
|
||||
# ZOm5cuclMnUHs2uqrRNtnV8UfidPBL4ZHkTcClQbCoz0UbLhkiDvIS00Dn+BBcxw
|
||||
# /TKqVL4Oaz3bkMSsM46LciTeucHY9ExRVt3zy7i149sd+F4QozPqn7FrSVHXmem3
|
||||
# r7bjyHTxOgqxRCVa18Vtx7P/8bYSBeS+WHCKcliFCecspusCDSlnRUjZwyPdP0VH
|
||||
# xaZg2unjHY3rMYIVJjCCFSICAQEwfTBpMQswCQYDVQQGEwJVUzEXMBUGA1UEChMO
|
||||
# RGlnaUNlcnQsIEluYy4xQTA/BgNVBAMTOERpZ2lDZXJ0IFRydXN0ZWQgRzQgQ29k
|
||||
# ZSBTaWduaW5nIFJTQTQwOTYgU0hBMzg0IDIwMjEgQ0ExAhAHHxQbizANJfMU6yMM
|
||||
# 0NHdMA0GCWCGSAFlAwQCAQUAoIHEMBkGCSqGSIb3DQEJAzEMBgorBgEEAYI3AgEE
|
||||
# MBwGCisGAQQBgjcCAQsxDjAMBgorBgEEAYI3AgEVMC8GCSqGSIb3DQEJBDEiBCBn
|
||||
# AZ6P7YvTwq0fbF62o7E75R0LxsW5OtyYiFESQckLhjBYBgorBgEEAYI3AgEMMUow
|
||||
# SKBGgEQAQgB1AGkAbAB0ADoAIABSAGUAbABlAGEAcwBlAF8AdgAzAC4AMQAwAC4A
|
||||
# NQBfADIAMAAyADIAMAA2ADAANgAuADAAMTANBgkqhkiG9w0BAQEFAASCAgA5LMM8
|
||||
# 8+phW11oF/PTFxitR3oW7QHlGHA97n1MCieor042JtmqUyqqf7ykapKc/ND4pVDP
|
||||
# DP8nhIeXuLd2/SHqqf6CLZX9yacAFPDCV/MtYhlw4yKwa2ECw9EDDwB670UwUW/j
|
||||
# IUl+fSrWagwH2WC7T5iMiV7uEZU4koGuOS4SiDzRLwTcuRtY6N/FYerQhioHXzdX
|
||||
# vO76qXnj4UIDWnWbSWLgPDo8g4xonm7BC0dFRn4WW8tgm/StxQ/TBS4L2O/LEjYy
|
||||
# pSLEXOy0INrA5CqWd4J4dpOhkQng1UJoySCL9Q2ceyv1U3SrywLY4rLwmSrZYsbQ
|
||||
# OpnL+P1DP/eHYPbcwQEhbaTj81ULMxNDnouXJMm6ErMgTRH6TTpDcuPI8qlqkT2E
|
||||
# DGZ4pPdZSHxDYkocJ6REh1YKlpvdHaGQFkXuc3p2lG/siv2rtDefI4wChN4VOHZG
|
||||
# ia6G3FZaIyqFW/0sFz5KOzxoxcjfzyO76SSJx9jYpuOmPrHihaOlFjzZGxnWwFdM
|
||||
# l3uCD+QeJL2bkl7npoyW0RRznBUUj21psHdVN5vzK+Gsyr22A9lS1XaX3a2KJ6bl
|
||||
# Krkj+PObW5dtxvso0bQss2FCFdOATk4AlFcmk6bWk8rZm+w4e9NugsCTI+IE45hL
|
||||
# AEyzTjc21JqGt8l2Rn/eElRHgsjvNpO4H5FFo6GCEbMwghGvBgorBgEEAYI3AwMB
|
||||
# MYIRnzCCEZsGCSqGSIb3DQEHAqCCEYwwghGIAgEDMQ8wDQYJYIZIAWUDBAIBBQAw
|
||||
# eAYLKoZIhvcNAQkQAQSgaQRnMGUCAQEGCWCGSAGG/WwHATAxMA0GCWCGSAFlAwQC
|
||||
# AQUABCDX6Ys0ehzU7Uygr+TZMXB4pMkJvCegnm5JrODTttrXZwIRAMaBOV1Pb1sY
|
||||
# w0ypALrk6u8YDzIwMjIwNjA2MTYyMjEwWqCCDXwwggbGMIIErqADAgECAhAKekqI
|
||||
# nsmZQpAGYzhNhpedMA0GCSqGSIb3DQEBCwUAMGMxCzAJBgNVBAYTAlVTMRcwFQYD
|
||||
# VQQKEw5EaWdpQ2VydCwgSW5jLjE7MDkGA1UEAxMyRGlnaUNlcnQgVHJ1c3RlZCBH
|
||||
# NCBSU0E0MDk2IFNIQTI1NiBUaW1lU3RhbXBpbmcgQ0EwHhcNMjIwMzI5MDAwMDAw
|
||||
# WhcNMzMwMzE0MjM1OTU5WjBMMQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNl
|
||||
# cnQsIEluYy4xJDAiBgNVBAMTG0RpZ2lDZXJ0IFRpbWVzdGFtcCAyMDIyIC0gMjCC
|
||||
# AiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALkqliOmXLxf1knwFYIY9DPu
|
||||
# zFxs4+AlLtIx5DxArvurxON4XX5cNur1JY1Do4HrOGP5PIhp3jzSMFENMQe6Rm7p
|
||||
# o0tI6IlBfw2y1vmE8Zg+C78KhBJxbKFiJgHTzsNs/aw7ftwqHKm9MMYW2Nq867Lx
|
||||
# g9GfzQnFuUFqRUIjQVr4YNNlLD5+Xr2Wp/D8sfT0KM9CeR87x5MHaGjlRDRSXw9Q
|
||||
# 3tRZLER0wDJHGVvimC6P0Mo//8ZnzzyTlU6E6XYYmJkRFMUrDKAz200kheiClOEv
|
||||
# A+5/hQLJhuHVGBS3BEXz4Di9or16cZjsFef9LuzSmwCKrB2NO4Bo/tBZmCbO4O2u
|
||||
# fyguwp7gC0vICNEyu4P6IzzZ/9KMu/dDI9/nw1oFYn5wLOUrsj1j6siugSBrQ4nI
|
||||
# fl+wGt0ZvZ90QQqvuY4J03ShL7BUdsGQT5TshmH/2xEvkgMwzjC3iw9dRLNDHSNQ
|
||||
# zZHXL537/M2xwafEDsTvQD4ZOgLUMalpoEn5deGb6GjkagyP6+SxIXuGZ1h+fx/o
|
||||
# K+QUshbWgaHK2jCQa+5vdcCwNiayCDv/vb5/bBMY38ZtpHlJrYt/YYcFaPfUcONC
|
||||
# leieu5tLsuK2QT3nr6caKMmtYbCgQRgZTu1Hm2GV7T4LYVrqPnqYklHNP8lE54CL
|
||||
# KUJy93my3YTqJ+7+fXprAgMBAAGjggGLMIIBhzAOBgNVHQ8BAf8EBAMCB4AwDAYD
|
||||
# VR0TAQH/BAIwADAWBgNVHSUBAf8EDDAKBggrBgEFBQcDCDAgBgNVHSAEGTAXMAgG
|
||||
# BmeBDAEEAjALBglghkgBhv1sBwEwHwYDVR0jBBgwFoAUuhbZbU2FL3MpdpovdYxq
|
||||
# II+eyG8wHQYDVR0OBBYEFI1kt4kh/lZYRIRhp+pvHDaP3a8NMFoGA1UdHwRTMFEw
|
||||
# T6BNoEuGSWh0dHA6Ly9jcmwzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydFRydXN0ZWRH
|
||||
# NFJTQTQwOTZTSEEyNTZUaW1lU3RhbXBpbmdDQS5jcmwwgZAGCCsGAQUFBwEBBIGD
|
||||
# MIGAMCQGCCsGAQUFBzABhhhodHRwOi8vb2NzcC5kaWdpY2VydC5jb20wWAYIKwYB
|
||||
# BQUHMAKGTGh0dHA6Ly9jYWNlcnRzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydFRydXN0
|
||||
# ZWRHNFJTQTQwOTZTSEEyNTZUaW1lU3RhbXBpbmdDQS5jcnQwDQYJKoZIhvcNAQEL
|
||||
# BQADggIBAA0tI3Sm0fX46kuZPwHk9gzkrxad2bOMl4IpnENvAS2rOLVwEb+EGYs/
|
||||
# XeWGT76TOt4qOVo5TtiEWaW8G5iq6Gzv0UhpGThbz4k5HXBw2U7fIyJs1d/2Wcuh
|
||||
# wupMdsqh3KErlribVakaa33R9QIJT4LWpXOIxJiA3+5JlbezzMWn7g7h7x44ip/v
|
||||
# EckxSli23zh8y/pc9+RTv24KfH7X3pjVKWWJD6KcwGX0ASJlx+pedKZbNZJQfPQX
|
||||
# podkTz5GiRZjIGvL8nvQNeNKcEiptucdYL0EIhUlcAZyqUQ7aUcR0+7px6A+TxC5
|
||||
# MDbk86ppCaiLfmSiZZQR+24y8fW7OK3NwJMR1TJ4Sks3KkzzXNy2hcC7cDBVeNaY
|
||||
# /lRtf3GpSBp43UZ3Lht6wDOK+EoojBKoc88t+dMj8p4Z4A2UKKDr2xpRoJWCjihr
|
||||
# pM6ddt6pc6pIallDrl/q+A8GQp3fBmiW/iqgdFtjZt5rLLh4qk1wbfAs8QcVfjW0
|
||||
# 5rUMopml1xVrNQ6F1uAszOAMJLh8UgsemXzvyMjFjFhpr6s94c/MfRWuFL+Kcd/K
|
||||
# l7HYR+ocheBFThIcFClYzG/Tf8u+wQ5KbyCcrtlzMlkI5y2SoRoR/jKYpl0rl+CL
|
||||
# 05zMbbUNrkdjOEcXW28T2moQbh9Jt0RbtAgKh1pZBHYRoad3AhMcMIIGrjCCBJag
|
||||
# AwIBAgIQBzY3tyRUfNhHrP0oZipeWzANBgkqhkiG9w0BAQsFADBiMQswCQYDVQQG
|
||||
# EwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNl
|
||||
# cnQuY29tMSEwHwYDVQQDExhEaWdpQ2VydCBUcnVzdGVkIFJvb3QgRzQwHhcNMjIw
|
||||
# MzIzMDAwMDAwWhcNMzcwMzIyMjM1OTU5WjBjMQswCQYDVQQGEwJVUzEXMBUGA1UE
|
||||
# ChMORGlnaUNlcnQsIEluYy4xOzA5BgNVBAMTMkRpZ2lDZXJ0IFRydXN0ZWQgRzQg
|
||||
# UlNBNDA5NiBTSEEyNTYgVGltZVN0YW1waW5nIENBMIICIjANBgkqhkiG9w0BAQEF
|
||||
# AAOCAg8AMIICCgKCAgEAxoY1BkmzwT1ySVFVxyUDxPKRN6mXUaHW0oPRnkyibaCw
|
||||
# zIP5WvYRoUQVQl+kiPNo+n3znIkLf50fng8zH1ATCyZzlm34V6gCff1DtITaEfFz
|
||||
# sbPuK4CEiiIY3+vaPcQXf6sZKz5C3GeO6lE98NZW1OcoLevTsbV15x8GZY2UKdPZ
|
||||
# 7Gnf2ZCHRgB720RBidx8ald68Dd5n12sy+iEZLRS8nZH92GDGd1ftFQLIWhuNyG7
|
||||
# QKxfst5Kfc71ORJn7w6lY2zkpsUdzTYNXNXmG6jBZHRAp8ByxbpOH7G1WE15/teP
|
||||
# c5OsLDnipUjW8LAxE6lXKZYnLvWHpo9OdhVVJnCYJn+gGkcgQ+NDY4B7dW4nJZCY
|
||||
# OjgRs/b2nuY7W+yB3iIU2YIqx5K/oN7jPqJz+ucfWmyU8lKVEStYdEAoq3NDzt9K
|
||||
# oRxrOMUp88qqlnNCaJ+2RrOdOqPVA+C/8KI8ykLcGEh/FDTP0kyr75s9/g64ZCr6
|
||||
# dSgkQe1CvwWcZklSUPRR8zZJTYsg0ixXNXkrqPNFYLwjjVj33GHek/45wPmyMKVM
|
||||
# 1+mYSlg+0wOI/rOP015LdhJRk8mMDDtbiiKowSYI+RQQEgN9XyO7ZONj4KbhPvbC
|
||||
# dLI/Hgl27KtdRnXiYKNYCQEoAA6EVO7O6V3IXjASvUaetdN2udIOa5kM0jO0zbEC
|
||||
# AwEAAaOCAV0wggFZMBIGA1UdEwEB/wQIMAYBAf8CAQAwHQYDVR0OBBYEFLoW2W1N
|
||||
# hS9zKXaaL3WMaiCPnshvMB8GA1UdIwQYMBaAFOzX44LScV1kTN8uZz/nupiuHA9P
|
||||
# MA4GA1UdDwEB/wQEAwIBhjATBgNVHSUEDDAKBggrBgEFBQcDCDB3BggrBgEFBQcB
|
||||
# AQRrMGkwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLmRpZ2ljZXJ0LmNvbTBBBggr
|
||||
# BgEFBQcwAoY1aHR0cDovL2NhY2VydHMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0VHJ1
|
||||
# c3RlZFJvb3RHNC5jcnQwQwYDVR0fBDwwOjA4oDagNIYyaHR0cDovL2NybDMuZGln
|
||||
# aWNlcnQuY29tL0RpZ2lDZXJ0VHJ1c3RlZFJvb3RHNC5jcmwwIAYDVR0gBBkwFzAI
|
||||
# BgZngQwBBAIwCwYJYIZIAYb9bAcBMA0GCSqGSIb3DQEBCwUAA4ICAQB9WY7Ak7Zv
|
||||
# mKlEIgF+ZtbYIULhsBguEE0TzzBTzr8Y+8dQXeJLKftwig2qKWn8acHPHQfpPmDI
|
||||
# 2AvlXFvXbYf6hCAlNDFnzbYSlm/EUExiHQwIgqgWvalWzxVzjQEiJc6VaT9Hd/ty
|
||||
# dBTX/6tPiix6q4XNQ1/tYLaqT5Fmniye4Iqs5f2MvGQmh2ySvZ180HAKfO+ovHVP
|
||||
# ulr3qRCyXen/KFSJ8NWKcXZl2szwcqMj+sAngkSumScbqyQeJsG33irr9p6xeZmB
|
||||
# o1aGqwpFyd/EjaDnmPv7pp1yr8THwcFqcdnGE4AJxLafzYeHJLtPo0m5d2aR8XKc
|
||||
# 6UsCUqc3fpNTrDsdCEkPlM05et3/JWOZJyw9P2un8WbDQc1PtkCbISFA0LcTJM3c
|
||||
# HXg65J6t5TRxktcma+Q4c6umAU+9Pzt4rUyt+8SVe+0KXzM5h0F4ejjpnOHdI/0d
|
||||
# KNPH+ejxmF/7K9h+8kaddSweJywm228Vex4Ziza4k9Tm8heZWcpw8De/mADfIBZP
|
||||
# J/tgZxahZrrdVcA6KYawmKAr7ZVBtzrVFZgxtGIJDwq9gdkT/r+k0fNX2bwE+oLe
|
||||
# Mt8EifAAzV3C+dAjfwAL5HYCJtnwZXZCpimHCUcr5n8apIUP/JiW9lVUKx+A+sDy
|
||||
# Divl1vupL0QVSucTDh3bNzgaoSv27dZ8/DGCA3YwggNyAgEBMHcwYzELMAkGA1UE
|
||||
# BhMCVVMxFzAVBgNVBAoTDkRpZ2lDZXJ0LCBJbmMuMTswOQYDVQQDEzJEaWdpQ2Vy
|
||||
# dCBUcnVzdGVkIEc0IFJTQTQwOTYgU0hBMjU2IFRpbWVTdGFtcGluZyBDQQIQCnpK
|
||||
# iJ7JmUKQBmM4TYaXnTANBglghkgBZQMEAgEFAKCB0TAaBgkqhkiG9w0BCQMxDQYL
|
||||
# KoZIhvcNAQkQAQQwHAYJKoZIhvcNAQkFMQ8XDTIyMDYwNjE2MjIxMFowKwYLKoZI
|
||||
# hvcNAQkQAgwxHDAaMBgwFgQUhQjzhlFcs9MHfba0t8B/G0peQd4wLwYJKoZIhvcN
|
||||
# AQkEMSIEIOf/YoAGTg8y0pigG0kgexHa3asvnqD00Uf8JB3uQ5TUMDcGCyqGSIb3
|
||||
# DQEJEAIvMSgwJjAkMCIEIJ2mkBXDScbBiXhFujWCrXDIj6QpO9tqvpwr0lOSeeY7
|
||||
# MA0GCSqGSIb3DQEBAQUABIICALVOybzMu47x8CdSSeAuaV/YXzBq1oDqNnX+Fry/
|
||||
# 7C7TpHKVn58SKdFgeNmneBuqBqlZ2qyO9h02ZercH2d3GfALKuEmcUcp/Ik6RqQR
|
||||
# INN76QLhzFeIiIdBGvcHI2hcx3OAgtenpe+4V2oWa05cJf5exXQ9ja59aNB0sf5j
|
||||
# GyyHgmPhRK6itjp7xoSOw5zY4NN91viV2DX23b0SiL3oB5bAzgL77RLydmgg4XIW
|
||||
# 9vxqyCK8XM4imdLfnI0J+Sw7QBLk5Pw1jp/x0YNbHlk5ojA06ehufF0smFdgjMBZ
|
||||
# eefNH+lXfdVBeml8j3rNNbGsQ+d6+xXmUUVnNAGwK8QH5LpCqe+7H0r3yFsBCoxI
|
||||
# XaAPC9EPQVMYyPFyzh8Omu5RHQaeIARZvTyzk3BzjyJmDypOcy3s1a4YG0lsO8+b
|
||||
# cI925YMstRe3/gWSfZj8Q4OXFpeJxQ1b4w1slH116IrtjR9FC+N9OEWMggi4YQQf
|
||||
# V6DPuNmv9d4JMR/vwxU4XmvHG/HnbFyFrpFmlRpSTExv3XNQWcdSn0FneKw1evvZ
|
||||
# RRHow/HShcRnIPRqfhnqlQNxUKLt9bmWnRXLkaNCtiowSJ82v9XnTboZunXbMSb0
|
||||
# dM5FF5o4xTVoyp6P0O2qF2QtaXU03P8MDNOD1sWFSWhi64FWnmXuIaAuJKn05ZgC
|
||||
# hIIC
|
||||
# SIG # End signature block
|
69
spacy/pipeline/logreg/myenv/Scripts/activate
Normal file
69
spacy/pipeline/logreg/myenv/Scripts/activate
Normal file
|
@ -0,0 +1,69 @@
|
|||
# This file must be used with "source bin/activate" *from bash*
|
||||
# you cannot run it directly
|
||||
|
||||
deactivate () {
|
||||
# reset old environment variables
|
||||
if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
|
||||
PATH="${_OLD_VIRTUAL_PATH:-}"
|
||||
export PATH
|
||||
unset _OLD_VIRTUAL_PATH
|
||||
fi
|
||||
if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
|
||||
PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
|
||||
export PYTHONHOME
|
||||
unset _OLD_VIRTUAL_PYTHONHOME
|
||||
fi
|
||||
|
||||
# This should detect bash and zsh, which have a hash command that must
|
||||
# be called to get it to forget past commands. Without forgetting
|
||||
# past commands the $PATH changes we made may not be respected
|
||||
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
|
||||
hash -r 2> /dev/null
|
||||
fi
|
||||
|
||||
if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
|
||||
PS1="${_OLD_VIRTUAL_PS1:-}"
|
||||
export PS1
|
||||
unset _OLD_VIRTUAL_PS1
|
||||
fi
|
||||
|
||||
unset VIRTUAL_ENV
|
||||
unset VIRTUAL_ENV_PROMPT
|
||||
if [ ! "${1:-}" = "nondestructive" ] ; then
|
||||
# Self destruct!
|
||||
unset -f deactivate
|
||||
fi
|
||||
}
|
||||
|
||||
# unset irrelevant variables
|
||||
deactivate nondestructive
|
||||
|
||||
VIRTUAL_ENV="C:\Users\samhi\spaCy\spacy\pipeline\logreg\myenv"
|
||||
export VIRTUAL_ENV
|
||||
|
||||
_OLD_VIRTUAL_PATH="$PATH"
|
||||
PATH="$VIRTUAL_ENV/Scripts:$PATH"
|
||||
export PATH
|
||||
|
||||
# unset PYTHONHOME if set
|
||||
# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
|
||||
# could use `if (set -u; : $PYTHONHOME) ;` in bash
|
||||
if [ -n "${PYTHONHOME:-}" ] ; then
|
||||
_OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
|
||||
unset PYTHONHOME
|
||||
fi
|
||||
|
||||
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
|
||||
_OLD_VIRTUAL_PS1="${PS1:-}"
|
||||
PS1="(myenv) ${PS1:-}"
|
||||
export PS1
|
||||
VIRTUAL_ENV_PROMPT="(myenv) "
|
||||
export VIRTUAL_ENV_PROMPT
|
||||
fi
|
||||
|
||||
# This should detect bash and zsh, which have a hash command that must
|
||||
# be called to get it to forget past commands. Without forgetting
|
||||
# past commands the $PATH changes we made may not be respected
|
||||
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
|
||||
hash -r 2> /dev/null
|
||||
fi
|
BIN
spacy/pipeline/logreg/myenv/Scripts/f2py.exe
Normal file
BIN
spacy/pipeline/logreg/myenv/Scripts/f2py.exe
Normal file
Binary file not shown.
BIN
spacy/pipeline/logreg/myenv/Scripts/markdown-it.exe
Normal file
BIN
spacy/pipeline/logreg/myenv/Scripts/markdown-it.exe
Normal file
Binary file not shown.
BIN
spacy/pipeline/logreg/myenv/Scripts/numpy-config.exe
Normal file
BIN
spacy/pipeline/logreg/myenv/Scripts/numpy-config.exe
Normal file
Binary file not shown.
BIN
spacy/pipeline/logreg/myenv/Scripts/pip.exe
Normal file
BIN
spacy/pipeline/logreg/myenv/Scripts/pip.exe
Normal file
Binary file not shown.
BIN
spacy/pipeline/logreg/myenv/Scripts/pip3.10.exe
Normal file
BIN
spacy/pipeline/logreg/myenv/Scripts/pip3.10.exe
Normal file
Binary file not shown.
BIN
spacy/pipeline/logreg/myenv/Scripts/pip3.exe
Normal file
BIN
spacy/pipeline/logreg/myenv/Scripts/pip3.exe
Normal file
Binary file not shown.
BIN
spacy/pipeline/logreg/myenv/Scripts/pygmentize.exe
Normal file
BIN
spacy/pipeline/logreg/myenv/Scripts/pygmentize.exe
Normal file
Binary file not shown.
BIN
spacy/pipeline/logreg/myenv/Scripts/python.exe
Normal file
BIN
spacy/pipeline/logreg/myenv/Scripts/python.exe
Normal file
Binary file not shown.
BIN
spacy/pipeline/logreg/myenv/Scripts/pythonw.exe
Normal file
BIN
spacy/pipeline/logreg/myenv/Scripts/pythonw.exe
Normal file
Binary file not shown.
BIN
spacy/pipeline/logreg/myenv/Scripts/spacy.exe
Normal file
BIN
spacy/pipeline/logreg/myenv/Scripts/spacy.exe
Normal file
Binary file not shown.
BIN
spacy/pipeline/logreg/myenv/Scripts/tqdm.exe
Normal file
BIN
spacy/pipeline/logreg/myenv/Scripts/tqdm.exe
Normal file
Binary file not shown.
BIN
spacy/pipeline/logreg/myenv/Scripts/typer.exe
Normal file
BIN
spacy/pipeline/logreg/myenv/Scripts/typer.exe
Normal file
Binary file not shown.
BIN
spacy/pipeline/logreg/myenv/Scripts/weasel.exe
Normal file
BIN
spacy/pipeline/logreg/myenv/Scripts/weasel.exe
Normal file
Binary file not shown.
3
spacy/pipeline/logreg/myenv/pyvenv.cfg
Normal file
3
spacy/pipeline/logreg/myenv/pyvenv.cfg
Normal file
|
@ -0,0 +1,3 @@
|
|||
home = C:\Python310
|
||||
include-system-site-packages = false
|
||||
version = 3.10.5
|
224
spacy/pipeline/logreg/src/pure_Logistic.py
Normal file
224
spacy/pipeline/logreg/src/pure_Logistic.py
Normal file
|
@ -0,0 +1,224 @@
|
|||
from typing import List, Dict, Iterable
|
||||
import numpy as np
|
||||
from spacy.pipeline import TrainablePipe
|
||||
from spacy.language import Language
|
||||
from spacy.training import Example
|
||||
from spacy.vocab import Vocab
|
||||
from spacy.tokens import Doc
|
||||
|
||||
@Language.factory(
|
||||
"pure_logistic_textcat",
|
||||
default_config={
|
||||
"learning_rate": 0.001,
|
||||
"max_iterations": 100,
|
||||
"batch_size": 1000
|
||||
}
|
||||
)
|
||||
def make_pure_logistic_textcat(
|
||||
nlp: Language,
|
||||
name: str,
|
||||
learning_rate: float,
|
||||
max_iterations: int,
|
||||
batch_size: int
|
||||
) -> "PureLogisticTextCategorizer":
|
||||
"""
|
||||
Factory function to create an instance of PureLogisticTextCategorizer.
|
||||
:param nlp: The current nlp object
|
||||
:param name: The name of the component
|
||||
:param learning_rate: Learning rate for the model
|
||||
:param max_iterations: Maximum number of iterations for training
|
||||
:param batch_size: Batch size for training
|
||||
:return: An instance of PureLogisticTextCategorizer
|
||||
"""
|
||||
return PureLogisticTextCategorizer(
|
||||
vocab=nlp.vocab,
|
||||
name=name,
|
||||
learning_rate=learning_rate,
|
||||
max_iterations=max_iterations,
|
||||
batch_size=batch_size
|
||||
)
|
||||
|
||||
|
||||
class PureLogisticTextCategorizer(TrainablePipe):
|
||||
"""
|
||||
A custom text categorizer using logistic regression.
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
vocab: Vocab,
|
||||
name: str = "pure_logistic_textcat",
|
||||
*,
|
||||
learning_rate: float = 0.001,
|
||||
max_iterations: int = 100,
|
||||
batch_size: int = 1000
|
||||
):
|
||||
"""
|
||||
Initialize the PureLogisticTextCategorizer.
|
||||
:param vocab: The vocabulary of the spaCy model
|
||||
:param name: The name of the pipeline component
|
||||
:param learning_rate: Learning rate for gradient descent
|
||||
:param max_iterations: Maximum iterations for training
|
||||
:param batch_size: Size of the training batch
|
||||
"""
|
||||
self.vocab = vocab
|
||||
self.name = name
|
||||
self.learning_rate = learning_rate
|
||||
self.max_iterations = max_iterations
|
||||
self.batch_size = batch_size
|
||||
self.weights = None # Initialize weights to None
|
||||
self.bias = None # Initialize bias to None
|
||||
self._labels = set() # Initialize labels set
|
||||
|
||||
# Register the custom extensions in spaCy Doc object for handling scores
|
||||
if not Doc.has_extension("textcat_scores"):
|
||||
Doc.set_extension("textcat_scores", default={})
|
||||
if not Doc.has_extension("cats"):
|
||||
Doc.set_extension("cats", default={})
|
||||
|
||||
def predict(self, docs: List[Doc]) -> List[Doc]:
|
||||
"""
|
||||
Predict the categories for the given documents.
|
||||
:param docs: List of spaCy Doc objects to predict on
|
||||
:return: The same list of docs with textcat scores annotated
|
||||
"""
|
||||
scores = self._predict_scores(docs) # Get predicted scores
|
||||
self.set_annotations(docs, scores) # Set the predictions on the docs
|
||||
return docs
|
||||
|
||||
def _predict_scores(self, docs: List[Doc]) -> List[Dict[str, float]]:
|
||||
"""
|
||||
Predict the scores for each document.
|
||||
:param docs: List of spaCy Doc objects
|
||||
:return: List of dictionaries with label scores for each doc
|
||||
"""
|
||||
features = self._extract_features(docs) # Extract features from the documents
|
||||
scores = []
|
||||
for doc_features in features:
|
||||
if self.weights is None:
|
||||
# If weights are not initialized, assign 0.5 (neutral probability) to each label
|
||||
doc_scores = {label: 0.5 for label in self.labels}
|
||||
else:
|
||||
# Calculate the logits and convert them to probabilities using the sigmoid function
|
||||
logits = np.dot(doc_features, self.weights) + self.bias
|
||||
probs = 1 / (1 + np.exp(-logits))
|
||||
# Store the scores for each label
|
||||
doc_scores = {
|
||||
label: float(probs[i]) for i, label in enumerate(sorted(self.labels))
|
||||
}
|
||||
scores.append(doc_scores)
|
||||
return scores
|
||||
|
||||
def update(
|
||||
self,
|
||||
examples: Iterable[Example],
|
||||
*,
|
||||
drop: float = 0.0,
|
||||
sgd=None,
|
||||
losses=None
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Update the model using the provided training examples.
|
||||
:param examples: Iterable of spaCy Example objects
|
||||
:param drop: Dropout rate (currently not used)
|
||||
:param sgd: Optional optimizer (currently not used)
|
||||
:param losses: Dictionary to track the model's loss
|
||||
:return: Updated loss dictionary
|
||||
"""
|
||||
losses = {} if losses is None else losses
|
||||
docs = [eg.reference for eg in examples]
|
||||
features = self._extract_features(docs)
|
||||
sorted_labels = sorted(self.labels)
|
||||
labels = np.array([
|
||||
[eg.reference.cats.get(label, 0.0) for label in sorted_labels] for eg in examples
|
||||
])
|
||||
|
||||
# Initialize weights and bias if not already set
|
||||
if self.weights is None:
|
||||
n_features = len(features[0])
|
||||
self.weights = np.zeros((n_features, len(self.labels)))
|
||||
self.bias = np.zeros(len(self.labels))
|
||||
|
||||
# Training loop
|
||||
total_loss = 0.0
|
||||
features = np.array(features)
|
||||
|
||||
for _ in range(self.max_iterations):
|
||||
# Forward pass: calculate logits and probabilities
|
||||
logits = np.dot(features, self.weights) + self.bias
|
||||
probs = 1 / (1 + np.exp(-logits))
|
||||
|
||||
# Calculate loss using binary cross-entropy
|
||||
loss = -np.mean(
|
||||
labels * np.log(probs + 1e-8) +
|
||||
(1 - labels) * np.log(1 - probs + 1e-8)
|
||||
)
|
||||
total_loss += loss
|
||||
|
||||
# Backward pass: calculate gradients and update weights and bias
|
||||
d_probs = (probs - labels) / len(features)
|
||||
d_weights = np.dot(features.T, d_probs)
|
||||
d_bias = np.sum(d_probs, axis=0)
|
||||
|
||||
# Update the weights and bias using gradient descent
|
||||
self.weights -= self.learning_rate * d_weights
|
||||
self.bias -= self.learning_rate * d_bias
|
||||
|
||||
# Average loss over the iterations
|
||||
losses[self.name] = total_loss / self.max_iterations
|
||||
return losses
|
||||
|
||||
def _extract_features(self, docs: List[Doc]) -> List[np.ndarray]:
|
||||
"""
|
||||
Extract features from the documents.
|
||||
:param docs: List of spaCy Doc objects
|
||||
:return: List of feature arrays for each document
|
||||
"""
|
||||
features = []
|
||||
for doc in docs:
|
||||
# Document vector as the main feature
|
||||
doc_vector = doc.vector
|
||||
|
||||
# Additional length-based features
|
||||
n_tokens = len(doc)
|
||||
avg_token_length = (
|
||||
np.mean([len(token.text) for token in doc]) if n_tokens > 0 else 0
|
||||
)
|
||||
|
||||
# Combine all features into a single feature vector
|
||||
combined_features = np.concatenate([
|
||||
doc_vector,
|
||||
[n_tokens / 100.0, avg_token_length / 10.0] # Scale the features
|
||||
])
|
||||
features.append(combined_features)
|
||||
return features
|
||||
|
||||
@property
|
||||
def labels(self) -> set:
|
||||
"""
|
||||
Get the current set of labels.
|
||||
:return: Set of labels
|
||||
"""
|
||||
return self._labels
|
||||
|
||||
@labels.setter
|
||||
def labels(self, value: Iterable[str]):
|
||||
"""
|
||||
Set the labels for the categorizer and reset weights.
|
||||
:param value: Iterable of label strings
|
||||
"""
|
||||
self._labels = set(value)
|
||||
# Reset weights and bias when labels change
|
||||
self.weights = None
|
||||
self.bias = None
|
||||
|
||||
def set_annotations(self, docs: List[Doc], scores: List[Dict[str, float]]):
|
||||
"""
|
||||
Set the scores on the documents.
|
||||
:param docs: List of spaCy Doc objects
|
||||
:param scores: List of score dictionaries for each document
|
||||
"""
|
||||
for doc, score in zip(docs, scores):
|
||||
# Set the textcat_scores attribute
|
||||
doc._.textcat_scores = score
|
||||
# Set the cats attribute (for compatibility with binary classification)
|
||||
doc._.cats = score
|
225
spacy/pipeline/logreg/tests/test_pure_logistic.py
Normal file
225
spacy/pipeline/logreg/tests/test_pure_logistic.py
Normal file
|
@ -0,0 +1,225 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
import spacy
|
||||
from spacy.language import Language
|
||||
from spacy.tokens import Doc
|
||||
from spacy.training import Example
|
||||
|
||||
# Define the NLP fixture for testing
|
||||
@pytest.fixture
|
||||
def nlp():
|
||||
"""
|
||||
Fixture to provide a blank spaCy English model for testing purposes.
|
||||
"""
|
||||
return spacy.blank("en")
|
||||
|
||||
|
||||
@Language.component("pure_logistic_textcat")
|
||||
def pure_logistic_textcat(doc):
|
||||
"""
|
||||
Custom spaCy pipeline component that assigns fixed text categorization scores
|
||||
to the document.
|
||||
|
||||
Args:
|
||||
doc (Doc): The spaCy document to process.
|
||||
|
||||
Returns:
|
||||
Doc: The processed document with 'textcat_scores' attribute set.
|
||||
"""
|
||||
# Placeholder for text categorization scores
|
||||
scores = {"positive": 0.5, "negative": 0.5}
|
||||
|
||||
# Ensure the 'textcat_scores' extension exists
|
||||
if not Doc.has_extension("textcat_scores"):
|
||||
Doc.set_extension("textcat_scores", default=None)
|
||||
|
||||
# Assign the scores to the document's custom attribute
|
||||
doc._.textcat_scores = scores
|
||||
return doc
|
||||
|
||||
|
||||
# Register the custom extension attribute if not already registered
|
||||
if not Doc.has_extension("textcat_scores"):
|
||||
Doc.set_extension("textcat_scores", default=None)
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_empty_doc(nlp):
|
||||
"""
|
||||
Test that the text categorization component can handle an empty document.
|
||||
"""
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
doc = nlp("")
|
||||
assert doc._.textcat_scores is not None
|
||||
assert isinstance(doc._.textcat_scores, dict)
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_single_word(nlp):
|
||||
"""
|
||||
Test that the component correctly handles a single-word document.
|
||||
"""
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
doc = nlp("positive")
|
||||
assert doc._.textcat_scores is not None
|
||||
assert isinstance(doc._.textcat_scores, dict)
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_special_chars(nlp):
|
||||
"""
|
||||
Test that the component can process documents containing special characters.
|
||||
"""
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
doc = nlp("!@#$%^&*()")
|
||||
assert doc._.textcat_scores is not None
|
||||
assert isinstance(doc._.textcat_scores, dict)
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_invalid_input_type(nlp):
|
||||
"""
|
||||
Test that the component raises a ValueError when given invalid input types.
|
||||
"""
|
||||
with pytest.raises(ValueError):
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
nlp(12345) # Invalid input: integer instead of string
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_reset(nlp):
|
||||
"""
|
||||
Test that the 'textcat_scores' attribute is reset between different documents.
|
||||
"""
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
|
||||
doc1 = nlp("This is a test document")
|
||||
assert doc1._.textcat_scores is not None
|
||||
|
||||
doc2 = nlp("Another test")
|
||||
assert doc2._.textcat_scores is not None
|
||||
assert doc1 is not doc2 # Ensure they are distinct documents
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_duplicate_component(nlp):
|
||||
"""
|
||||
Test that adding the same component twice to the pipeline raises a ValueError.
|
||||
"""
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
with pytest.raises(ValueError):
|
||||
nlp.add_pipe("pure_logistic_textcat") # Duplicate addition should fail
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_multiple_sentences(nlp):
|
||||
"""
|
||||
Test that the component correctly handles documents with multiple sentences.
|
||||
"""
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
doc = nlp("This is the first sentence. This is the second.")
|
||||
assert doc._.textcat_scores is not None
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_with_extension(nlp):
|
||||
"""
|
||||
Test that the component correctly handles the scenario where the custom
|
||||
'textcat_scores' extension is missing before processing.
|
||||
"""
|
||||
# Remove the extension if it exists
|
||||
if Doc.has_extension("textcat_scores"):
|
||||
Doc.remove_extension("textcat_scores")
|
||||
|
||||
# Add the custom component
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
|
||||
# Process the document and verify the extension
|
||||
doc = nlp("This is a test document")
|
||||
assert hasattr(doc._, "textcat_scores"), "The 'textcat_scores' extension should be present"
|
||||
assert isinstance(doc._.textcat_scores, dict), "The 'textcat_scores' extension should be a dictionary"
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_empty_train_data(nlp):
|
||||
"""
|
||||
Test that the update method handles empty training data gracefully.
|
||||
"""
|
||||
def mock_update(examples):
|
||||
return {"pure_logistic_textcat": 0.0}
|
||||
|
||||
textcat = nlp.add_pipe("pure_logistic_textcat")
|
||||
textcat.update = mock_update
|
||||
losses = textcat.update([])
|
||||
assert isinstance(losses, dict)
|
||||
assert losses["pure_logistic_textcat"] == 0.0
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_label_mismatch(nlp):
|
||||
"""
|
||||
Test that the component handles mismatched labels in the training data.
|
||||
"""
|
||||
textcat = nlp.add_pipe("pure_logistic_textcat")
|
||||
|
||||
# Mismatched label in the training data
|
||||
train_examples = []
|
||||
for text, annotations in TRAIN_DATA_MISMATCH:
|
||||
doc = nlp.make_doc(text)
|
||||
example = Example.from_dict(doc, annotations)
|
||||
train_examples.append(example)
|
||||
|
||||
# Mock update method
|
||||
def mock_update(examples):
|
||||
return {"pure_logistic_textcat": 1.0} # Arbitrary loss
|
||||
|
||||
textcat.update = mock_update
|
||||
losses = textcat.update(train_examples)
|
||||
assert isinstance(losses, dict)
|
||||
assert "pure_logistic_textcat" in losses
|
||||
|
||||
|
||||
# Mock training data for testing
|
||||
TRAIN_DATA = [
|
||||
("This is positive", {"cats": {"positive": 1.0, "negative": 0.0}}),
|
||||
("This is negative", {"cats": {"positive": 0.0, "negative": 1.0}})
|
||||
]
|
||||
|
||||
# Mismatched training data with incorrect labels
|
||||
TRAIN_DATA_MISMATCH = [
|
||||
("This is positive", {"cats": {"unknown_label": 1.0, "negative": 0.0}}),
|
||||
("This is negative", {"cats": {"positive": 0.0, "unknown_label": 1.0}})
|
||||
]
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_init(nlp):
|
||||
"""
|
||||
Test that the text categorization component initializes correctly.
|
||||
"""
|
||||
textcat = nlp.add_pipe("pure_logistic_textcat")
|
||||
assert textcat is not None
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_predict(nlp):
|
||||
"""
|
||||
Test that the component's prediction works correctly.
|
||||
"""
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
doc = nlp("This is a test document")
|
||||
assert doc._.textcat_scores is not None
|
||||
assert isinstance(doc._.textcat_scores, dict)
|
||||
assert "positive" in doc._.textcat_scores
|
||||
assert "negative" in doc._.textcat_scores
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_update(nlp):
|
||||
"""
|
||||
Test that the component's update method works as expected.
|
||||
"""
|
||||
def mock_update(examples):
|
||||
losses = {"pure_logistic_textcat": 0.5} # Dummy loss value
|
||||
return losses
|
||||
|
||||
textcat = nlp.add_pipe("pure_logistic_textcat")
|
||||
textcat.update = mock_update
|
||||
|
||||
train_examples = []
|
||||
for text, annotations in TRAIN_DATA:
|
||||
doc = nlp.make_doc(text)
|
||||
example = Example.from_dict(doc, annotations)
|
||||
train_examples.append(example)
|
||||
|
||||
losses = textcat.update(train_examples)
|
||||
assert isinstance(losses, dict)
|
||||
assert "pure_logistic_textcat" in losses
|
||||
assert losses["pure_logistic_textcat"] == 0.5 # Ensure the loss is correct
|
|
@ -1,72 +0,0 @@
|
|||
import pytest
|
||||
from spacy.language import Language
|
||||
from spacy.training import Example
|
||||
import spacy
|
||||
from spacy.tokens import Doc
|
||||
import numpy as np
|
||||
|
||||
# Define the nlp fixture
|
||||
@pytest.fixture
|
||||
def nlp():
|
||||
# Load the spaCy model
|
||||
return spacy.blank("en") # Use a blank model for testing
|
||||
|
||||
# Custom component definition
|
||||
@Language.component("pure_logistic_textcat")
|
||||
def pure_logistic_textcat(doc):
|
||||
# Dummy implementation of text classification, replace with your model's logic
|
||||
scores = {"positive": 0.5, "negative": 0.5}
|
||||
|
||||
# Store the scores in a custom attribute on the doc
|
||||
doc._.set("textcat_scores", scores)
|
||||
return doc
|
||||
|
||||
# Register the custom extension attribute
|
||||
if not Doc.has_extension("textcat_scores"):
|
||||
Doc.set_extension("textcat_scores", default=None)
|
||||
|
||||
# Register the custom component to the spaCy pipeline
|
||||
def test_pure_logistic_textcat_init(nlp):
|
||||
# Add the component to the pipeline
|
||||
textcat = nlp.add_pipe("pure_logistic_textcat")
|
||||
assert textcat is not None
|
||||
|
||||
def test_pure_logistic_textcat_predict(nlp):
|
||||
# Add the component to the pipeline
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
doc = nlp("This is a test document")
|
||||
|
||||
# Check if the textcat_scores attribute exists and is a dictionary
|
||||
assert doc._.textcat_scores is not None
|
||||
assert isinstance(doc._.textcat_scores, dict)
|
||||
assert "positive" in doc._.textcat_scores
|
||||
assert "negative" in doc._.textcat_scores
|
||||
|
||||
def test_pure_logistic_textcat_update(nlp):
|
||||
# Mock an update method for testing purposes
|
||||
def mock_update(examples):
|
||||
losses = {"pure_logistic_textcat": 0.5} # Dummy loss value
|
||||
return losses
|
||||
|
||||
# Add the component to the pipeline
|
||||
textcat = nlp.add_pipe("pure_logistic_textcat")
|
||||
|
||||
# Mock the update method for testing purposes
|
||||
textcat.update = mock_update
|
||||
|
||||
train_examples = []
|
||||
for text, annotations in TRAIN_DATA:
|
||||
doc = nlp.make_doc(text)
|
||||
example = Example.from_dict(doc, annotations)
|
||||
train_examples.append(example)
|
||||
|
||||
# Update the model
|
||||
losses = textcat.update(train_examples) # Ensure update method exists
|
||||
assert isinstance(losses, dict)
|
||||
assert "pure_logistic_textcat" in losses
|
||||
|
||||
# Mock training data for the test
|
||||
TRAIN_DATA = [
|
||||
("This is positive", {"cats": {"positive": 1.0, "negative": 0.0}}),
|
||||
("This is negative", {"cats": {"positive": 0.0, "negative": 1.0}})
|
||||
]
|
|
@ -1,170 +0,0 @@
|
|||
from typing import List, Dict, Iterable
|
||||
import numpy as np
|
||||
from spacy.pipeline import TrainablePipe
|
||||
from spacy.language import Language
|
||||
from spacy.training import Example
|
||||
from spacy.vocab import Vocab
|
||||
from spacy.tokens import Doc
|
||||
|
||||
|
||||
@Language.factory(
|
||||
"pure_logistic_textcat",
|
||||
default_config={
|
||||
"learning_rate": 0.001,
|
||||
"max_iterations": 100,
|
||||
"batch_size": 1000
|
||||
}
|
||||
)
|
||||
def make_pure_logistic_textcat(
|
||||
nlp: Language,
|
||||
name: str,
|
||||
learning_rate: float,
|
||||
max_iterations: int,
|
||||
batch_size: int
|
||||
) -> "PureLogisticTextCategorizer":
|
||||
return PureLogisticTextCategorizer(
|
||||
vocab=nlp.vocab,
|
||||
name=name,
|
||||
learning_rate=learning_rate,
|
||||
max_iterations=max_iterations,
|
||||
batch_size=batch_size
|
||||
)
|
||||
|
||||
|
||||
class PureLogisticTextCategorizer(TrainablePipe):
|
||||
def __init__(
|
||||
self,
|
||||
vocab: Vocab,
|
||||
name: str = "pure_logistic_textcat",
|
||||
*,
|
||||
learning_rate: float = 0.001,
|
||||
max_iterations: int = 100,
|
||||
batch_size: int = 1000
|
||||
):
|
||||
"""Initialize the text categorizer."""
|
||||
self.vocab = vocab
|
||||
self.name = name
|
||||
self.learning_rate = learning_rate
|
||||
self.max_iterations = max_iterations
|
||||
self.batch_size = batch_size
|
||||
self.weights = None
|
||||
self.bias = 0.0
|
||||
self._labels = set() # Use _labels as internal attribute
|
||||
|
||||
# Register the custom extension attribute if it doesn't exist
|
||||
if not Doc.has_extension("textcat_scores"):
|
||||
Doc.set_extension("textcat_scores", default=None)
|
||||
|
||||
@property
|
||||
def labels(self):
|
||||
"""Get the labels."""
|
||||
return self._labels
|
||||
|
||||
@labels.setter
|
||||
def labels(self, value):
|
||||
"""Set the labels."""
|
||||
self._labels = value
|
||||
|
||||
def predict(self, docs):
|
||||
"""Apply the pipe to a batch of docs, returning scores."""
|
||||
scores = self._predict_scores(docs)
|
||||
for doc, doc_scores in zip(docs, scores):
|
||||
doc._.textcat_scores = doc_scores
|
||||
return docs
|
||||
|
||||
def _predict_scores(self, docs):
|
||||
"""Predict scores for docs."""
|
||||
features = self._extract_features(docs)
|
||||
scores = []
|
||||
for doc_features in features:
|
||||
if self.weights is None:
|
||||
doc_scores = {"positive": 0.5, "negative": 0.5}
|
||||
else:
|
||||
logits = np.dot(doc_features, self.weights) + self.bias
|
||||
prob = 1 / (1 + np.exp(-logits))
|
||||
doc_scores = {
|
||||
"positive": float(prob),
|
||||
"negative": float(1 - prob)
|
||||
}
|
||||
scores.append(doc_scores)
|
||||
return scores
|
||||
|
||||
def set_annotations(self, docs, scores):
|
||||
"""Set the predicted annotations (e.g. categories) on the docs."""
|
||||
for doc, score in zip(docs, scores):
|
||||
doc.cats = {label: score[i] for i, label in enumerate(self._labels)}
|
||||
|
||||
def _extract_features(self, docs) -> List[np.ndarray]:
|
||||
"""Extract features from docs."""
|
||||
features = []
|
||||
for doc in docs:
|
||||
# Basic features
|
||||
doc_vector = doc.vector
|
||||
n_tokens = len(doc)
|
||||
|
||||
# Additional features
|
||||
n_entities = len(doc.ents)
|
||||
avg_token_length = np.mean([len(token.text) for token in doc])
|
||||
n_stopwords = len([token for token in doc if token.is_stop])
|
||||
|
||||
# Combine features
|
||||
doc_features = np.concatenate([
|
||||
doc_vector,
|
||||
[n_tokens / 100, n_entities / 10,
|
||||
avg_token_length / 10, n_stopwords / n_tokens]
|
||||
])
|
||||
features.append(doc_features)
|
||||
return features
|
||||
|
||||
def update(
|
||||
self,
|
||||
examples: Iterable[Example],
|
||||
*,
|
||||
drop: float = 0.0,
|
||||
sgd=None,
|
||||
losses: Dict[str, float] = None
|
||||
) -> Dict[str, float]:
|
||||
"""Update the model."""
|
||||
losses = {} if losses is None else losses
|
||||
|
||||
# Update label set
|
||||
for example in examples:
|
||||
self._labels.update(example.reference.cats.keys())
|
||||
|
||||
# Extract features and labels
|
||||
docs = [example.reference for example in examples]
|
||||
label_arrays = self._make_label_array([example.reference.cats for example in examples])
|
||||
|
||||
features = self._extract_features(docs)
|
||||
|
||||
if self.weights is None:
|
||||
n_features = features[0].shape[0] if features else 0
|
||||
self.weights = np.zeros((n_features, 1))
|
||||
|
||||
# Simple gradient descent
|
||||
total_loss = 0.0
|
||||
for i in range(self.max_iterations):
|
||||
for feat, gold in zip(features, label_arrays):
|
||||
pred = 1 / (1 + np.exp(-(np.dot(feat, self.weights) + self.bias)))
|
||||
loss = -np.mean(gold * np.log(pred + 1e-8) +
|
||||
(1 - gold) * np.log(1 - pred + 1e-8))
|
||||
total_loss += loss
|
||||
|
||||
# Compute gradients
|
||||
d_weights = feat.reshape(-1, 1) * (pred - gold)
|
||||
d_bias = pred - gold
|
||||
|
||||
# Update weights
|
||||
self.weights -= self.learning_rate * d_weights
|
||||
self.bias -= self.learning_rate * float(d_bias)
|
||||
|
||||
losses[self.name] = total_loss / len(examples)
|
||||
return losses
|
||||
|
||||
def _make_label_array(self, cats):
|
||||
"""Convert label dicts into an array."""
|
||||
arr = np.zeros((len(cats),))
|
||||
for i, cat_dict in enumerate(cats):
|
||||
if cat_dict.get("positive", 0) > 0.5:
|
||||
arr[i] = 1.0
|
||||
return arr.reshape(-1, 1)
|
|
@ -1,129 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'cells': [{'cell_type': 'markdown',\n",
|
||||
" 'metadata': {},\n",
|
||||
" 'source': ['# Pure Logistic Regression Text Categorizer\\n',\n",
|
||||
" 'This tutorial demonstrates how to use the custom logistic regression text categorizer.']},\n",
|
||||
" {'cell_type': 'code',\n",
|
||||
" 'execution_count': None,\n",
|
||||
" 'metadata': {},\n",
|
||||
" 'source': ['import spacy\\n',\n",
|
||||
" 'from spacy.training import Example\\n',\n",
|
||||
" '\\n',\n",
|
||||
" '# Load spaCy model\\n',\n",
|
||||
" 'nlp = spacy.load(\"en_core_web_lg\")\\n',\n",
|
||||
" 'nlp.add_pipe(\"pure_logistic_textcat\")\\n',\n",
|
||||
" '\\n',\n",
|
||||
" '# Example training data\\n',\n",
|
||||
" 'TRAIN_DATA = [\\n',\n",
|
||||
" ' (\"This is amazing!\", {\"cats\": {\"positive\": 1.0, \"negative\": 0.0}}),\\n',\n",
|
||||
" ' (\"This is terrible!\", {\"cats\": {\"positive\": 0.0, \"negative\": 1.0}})\\n',\n",
|
||||
" ']\\n',\n",
|
||||
" '\\n',\n",
|
||||
" '# Create training examples\\n',\n",
|
||||
" 'examples = []\\n',\n",
|
||||
" 'for text, annotations in TRAIN_DATA:\\n',\n",
|
||||
" ' doc = nlp.make_doc(text)\\n',\n",
|
||||
" ' example = Example.from_dict(doc, annotations)\\n',\n",
|
||||
" ' examples.append(example)\\n',\n",
|
||||
" '\\n',\n",
|
||||
" '# Train the model\\n',\n",
|
||||
" 'textcat = nlp.get_pipe(\"pure_logistic_textcat\")\\n',\n",
|
||||
" 'losses = textcat.update(examples)\\n',\n",
|
||||
" 'print(f\"Losses: {losses}\")\\n',\n",
|
||||
" '\\n',\n",
|
||||
" '# Test the model\\n',\n",
|
||||
" 'test_text = \"This product is fantastic!\"\\n',\n",
|
||||
" 'doc = nlp(test_text)\\n',\n",
|
||||
" 'print(f\"\\\\nText: {test_text}\")\\n',\n",
|
||||
" 'print(f\"Predictions: {doc.cats}\")']}]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"{\n",
|
||||
" \"cells\": [\n",
|
||||
" {\n",
|
||||
" \"cell_type\": \"markdown\",\n",
|
||||
" \"metadata\": {},\n",
|
||||
" \"source\": [\n",
|
||||
" \"# Pure Logistic Regression Text Categorizer\\n\",\n",
|
||||
" \"This tutorial demonstrates how to use the custom logistic regression text categorizer.\"\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"cell_type\": \"code\",\n",
|
||||
" \"execution_count\": None,\n",
|
||||
" \"metadata\": {},\n",
|
||||
" \"source\": [\n",
|
||||
" \"import spacy\\n\",\n",
|
||||
" \"from spacy.training import Example\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"# Load spaCy model\\n\",\n",
|
||||
" \"nlp = spacy.load(\\\"en_core_web_lg\\\")\\n\",\n",
|
||||
" \"nlp.add_pipe(\\\"pure_logistic_textcat\\\")\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"# Example training data\\n\",\n",
|
||||
" \"TRAIN_DATA = [\\n\",\n",
|
||||
" \" (\\\"This is amazing!\\\", {\\\"cats\\\": {\\\"positive\\\": 1.0, \\\"negative\\\": 0.0}}),\\n\",\n",
|
||||
" \" (\\\"This is terrible!\\\", {\\\"cats\\\": {\\\"positive\\\": 0.0, \\\"negative\\\": 1.0}})\\n\",\n",
|
||||
" \"]\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"# Create training examples\\n\",\n",
|
||||
" \"examples = []\\n\",\n",
|
||||
" \"for text, annotations in TRAIN_DATA:\\n\",\n",
|
||||
" \" doc = nlp.make_doc(text)\\n\",\n",
|
||||
" \" example = Example.from_dict(doc, annotations)\\n\",\n",
|
||||
" \" examples.append(example)\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"# Train the model\\n\",\n",
|
||||
" \"textcat = nlp.get_pipe(\\\"pure_logistic_textcat\\\")\\n\",\n",
|
||||
" \"losses = textcat.update(examples)\\n\",\n",
|
||||
" \"print(f\\\"Losses: {losses}\\\")\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"# Test the model\\n\",\n",
|
||||
" \"test_text = \\\"This product is fantastic!\\\"\\n\",\n",
|
||||
" \"doc = nlp(test_text)\\n\",\n",
|
||||
" \"print(f\\\"\\\\nText: {test_text}\\\")\\n\",\n",
|
||||
" \"print(f\\\"Predictions: {doc.cats}\\\")\"\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
"}"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
Loading…
Reference in New Issue
Block a user