added spacy whisper to universe (#13418) [ci skip]

Co-authored-by: Ines Montani <ines@ines.io>
This commit is contained in:
William Mattingly 2024-09-10 08:28:00 -04:00 committed by GitHub
parent c80dacd046
commit f1a5ff9dba
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -5489,6 +5489,47 @@
"text analysis",
"semantic search"
]
},
{
"id": "spacy-whisper",
"title": "spaCy Whisper",
"slogan": "Seamless Integration of Whisper with spaCy NLP",
"description": "spaCy Whisper is a Python package designed for integrating Whisper transcriptions with spaCy's NLP capabilities. It provides users with the ability to process and analyze transcribed text using spaCy's features like tokenization, entity recognition, and part-of-speech tagging. Key features include word and segment level processing with custom attributes, as well as custom token, span, and document extensions, enriching the NLP analysis of transcribed texts.",
"github": "theirstory/spacy-whisper",
"pip": "spacy-whisper",
"code_example": [
"from spacy_whisper import SpacyWhisper",
"import json",
"",
"# Load a Whisper Output (see repo for sample file):",
"with open('whisper_output.json', 'r', encoding='utf-8') as f:",
" whisper_output = json.load(f)",
"",
"# Initialize SpacyWhisper",
"sw = SpacyWhisper(lang='en', model='en_core_web_sm', segments_key='segments', word_level=True)",
"doc = sw.create_doc(whisper_output)",
"",
"# Access custom attributes",
"for token in doc:",
" print(token.text, token._.start_time, token._.end_time, token._.probability)"
],
"code_language": "python",
"url": "https://github.com/theirstory/spacy-whisper",
"thumb": "https://github.com/theirstory/spacy-whisper/raw/main/images/spacy_whisper.jpeg",
"image": "https://github.com/theirstory/spacy-whisper/raw/main/images/spacy_whisper.jpeg",
"author": "TheirStory",
"author_links": {
"website": "https://theirstory.io"
},
"category": [
"standalone"
],
"tags": [
"spacy",
"whisper",
"transcription",
"nlp"
]
}
],
"categories": [