From 5934378ab07afe0e2a993f3c398c412592b7ac2e Mon Sep 17 00:00:00 2001 From: Ruan Chaves Rodrigues Date: Thu, 8 Jan 2026 07:20:49 -0300 Subject: [PATCH] feat: introduce hashformers at universe.json --- website/meta/universe.json | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/website/meta/universe.json b/website/meta/universe.json index 1f55d9616..4b733e610 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -69,6 +69,40 @@ "dbpedia" ] }, + { + "id": "hashformers", + "title": "Hashformers", + "slogan": "Word segmentation with transformers and beam search", + "description": "Hashformers uses transformers and beam search to segment text without spaces into words. It fills the gap between heuristic-based splitters and LLM prompt-based segmentation, supporting any Hugging Face model. Recognized as state-of-the-art for hashtag segmentation at LREC 2022.", + "github": "ruanchaves/hashformers", + "pip": "hashformers", + "code_example": [ + "import spacy", + "import hashformers.spacy # registers the 'hashformers' component", + "", + "nlp = spacy.blank('en')", + "nlp.add_pipe('hashformers', config={'model': 'distilgpt2'})", + "", + "doc = nlp('#weneedanationalpark')", + "print(doc._.segmented) # 'we need a national park'" + ], + "code_language": "python", + "url": "https://github.com/ruanchaves/hashformers", + "author": "Ruan Chaves Rodrigues", + "author_links": { + "github": "ruanchaves" + }, + "category": [ + "pipeline", + "standalone" + ], + "tags": [ + "word-segmentation", + "hashtags", + "transformers", + "beam-search" + ] + }, { "id": "spacy-vscode", "title": "spaCy Visual Studio Code Extension", @@ -2739,7 +2773,6 @@ "courses" ] }, - { "type": "education", "id": "video-spacys-ner-model", @@ -5814,4 +5847,4 @@ ] } ] -} +} \ No newline at end of file