Merge a71ec2a5c5 into c1e7cb2ebf

2026-01-09 18:21:14 +03:00 · 2025-11-29 17:12:15 -05:00 · 2025-11-29 17:12:15 -05:00 · ed6a6d3623
commit ed6a6d3623
parent c1e7cb2ebf a71ec2a5c5
1 changed files with 50 additions and 0 deletions
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@ -1,5 +1,55 @@
 {
    "resources": [
+        {
+            "id": "MorphSeg",
+            "title": "MorphSeg",
+            "thumb": "https://raw.githubusercontent.com/TheWelcomer/MorphSeg/master/library/logo.png",
+            "url": "https://pypi.org/project/morphseg/",
+            "slogan": "Morpheme Segmentation for Several Languages using SpaCy",
+            "description": "[MorphSeg](https://github.com/TheWelcomer/MorphSeg) is a morpheme segmentation library and SpaCy pipeline which supports segmentation for 8 languages (english, spanish, russian, french, italian, czech, hungarian, and latin). The pretrained models are high-accuracy, small (~3M Params), and efficient (~500 words/second on a Macbook GPU) neural nets. The interface is designed to be simple, just use spacy as usual and add the morpheme_segmenter pipeline to get segmentations!\n- [Demo Website](https://huggingface.co/spaces/Morphological-Segmentation/Morpheme_Segmentation_Demo)\n- [Demo Colab Notebook](https://colab.research.google.com/drive/1alisdnbCQCRhvdT9DhMnRNRuDLIZLZho#scrollTo=QB6uAXxWoffA)\n- [GitHub](https://github.com/TheWelcomer/MorphSeg)\n- [PyPI Package](https://pypi.org/project/morphseg/)\n- [Hugging Face Repository](https://huggingface.co/MorphSeg)",
+            "github": "TheWelcomer/MorphSeg",
+            "pip": "morphseg",
+            "code_example": [
+                "import morphseg",
+                "import spacy",
+                "",
+                "# Load your existing spaCy model or a blank NLP object",
+                "nlp = spacy.blank('en')",
+                "",
+                "# Add the morpheme segmenter to the pipeline",
+                "nlp.add_pipe('morpheme_segmenter')",
+                "",
+                "# Process the text",
+                "doc = nlp('The unbelievably disagreeable preprocessor unsuccessfully reprocessed the unquestionably irreversible decontextualization')",
+                "",
+                "# Access morphemes for each token",
+                "for token in doc:",
+                "    print(f'{token.text}: {token._.morphemes}')",
+                "",
+                "# Access morphemes for spans",
+                "span = doc[1:3]",
+                "print(f'Span morphemes: {span._.morphemes}')",
+                "",
+                "# Access morphemes for the entire document",
+                "print(f'All morphemes: {doc._.morphemes}') # [['the'], ['un', 'believe', 'able', 'ly'], ['dis', 'agree', 'able'], ['pre', 'process', 'or'], ['un', 'success', 'ful', 'ly'], ['re', 'process', 'ed'], ['the'], ['un', 'question', 'able', 'ly'], ['in', 'reverse', 'ible'], ['decontextual', 'ization']]"
+            ],
+            "code_language": "python",
+            "author": "Donald Winkelman",
+            "author_links": {
+                "github": "TheWelcomer",
+                "website": "https://dwink.dev"
+            },
+            "category": [
+                "pipeline",
+                "standalone"
+            ],
+            "tags": [
+                "morphology",
+                "segmentation",
+                "linguistics",
+                "multilingual"
+            ]
+        },
        {
            "id": "TeNs",
            "title": "Temporal Expressions Normalization spaCy",