From 2a5e71232b599fa1d82127266d2e5f008dedba78 Mon Sep 17 00:00:00 2001 From: Nipun Sadvilkar Date: Wed, 30 Oct 2019 16:43:29 +0530 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20=20project:=20pySBD=20-=20Python=20?= =?UTF-8?q?Sentence=20Boundary=20Disambiguation=20(#4455)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ✨ project: pySBD - Python Sentence Boundary Disambiguation * 📝 Update links and description * 🐛 Fix missing comma * Update universe.json pysbd as a spacy component through entrypoints * 🚨 Fix universe.json * 📝 Update code_example --- website/meta/universe.json | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/website/meta/universe.json b/website/meta/universe.json index bc8a27a1a..e64e462d8 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -1801,6 +1801,32 @@ "github": "microsoft" } }, + { + "id": "python-sentence-boundary-disambiguation", + "title": "pySBD - python Sentence Boundary Disambiguation", + "slogan": "a rule-based sentence boundary detection that works out-of-the-box", + "github": "nipunsadvilkar/pySBD", + "description": "pySBD is 'real-world' sentence segmenter which extracts a reasonable sentences when the format and domain of the input text are unknown. It is a rules-based algorithm based on [The Golden Rules](https://s3.amazonaws.com/tm-town-nlp-resources/golden_rules.txt) - a set of tests to check accuracy of segmenter in regards to edge case scenarios developed by [TM-Town](https://www.tm-town.com/) dev team. pySBD is python port of ruby gem [Pragmatic Segmenter](https://github.com/diasks2/pragmatic_segmenter).", + "pip": "pysbd", + "category": ["scientific"], + "tags": ["sentence segmentation"], + "code_example": [ + "from pysbd.util import PySBDFactory", + "", + "nlp = spacy.blank('en')", + "nlp.add_pipe(PySBDFactory(nlp))", + "", + "doc = nlp('My name is Jonas E. Smith. Please turn to p. 55.')", + "print(list(doc.sents))", + "# [My name is Jonas E. Smith., Please turn to p. 55.]" + ], + "author": "Nipun Sadvilkar", + "author_links": { + "twitter": "nipunsadvilkar", + "github": "nipunsadvilkar", + "website": "https://nipunsadvilkar.github.io" + } + }, { "id": "cookiecutter-spacy-fastapi", "title": "cookiecutter-spacy-fastapi",