Adding rolegal model to the spaCy universe (#13017)

* adding rolegal model to the spaCy universe

* Fix formatting

* Use raw URL

* update image url and example

* fix pip and update url to raw

* okay, let's add thumb instead of image 🐙

* Update website/meta/universe.json

---------

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
Sergiu Nisioi 2023-09-28 16:06:50 +07:00 committed by GitHub
parent b4501db6f8
commit 6255e38695
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -4469,6 +4469,37 @@
}, },
"category": ["pipeline", "standalone"], "category": ["pipeline", "standalone"],
"tags": ["spans", "rules", "ner"] "tags": ["spans", "rules", "ner"]
},
{
"id": "rolegal",
"title": "A spaCy Package for Romanian Legal Document Processing",
"thumb": "https://raw.githubusercontent.com/senisioi/rolegal/main/img/paper200x200.jpeg",
"slogan": "rolegal: a spaCy Package for Noisy Romanian Legal Document Processing",
"description": "This is a spaCy language model for Romanian legal domain trained with floret 4-gram to 5-gram embeddings and `LEGAL` entity recognition. Useful for processing OCR-resulted noisy legal documents.",
"github": "senisioi/rolegal",
"pip": "ro-legal-fl",
"tags": ["legal", "floret", "ner", "romanian"],
"code_example": [
"import spacy",
"nlp = spacy.load(\"ro_legal_fl\")",
"",
"doc = nlp(\"Titlul III din LEGEA nr. 255 din 19 iulie 2013, publicată în MONITORUL OFICIAL\")",
"# legal entity identification",
"for entity in doc.ents:",
" print('entity: ', entity, '; entity type: ', entity.label_)",
"",
"# floret n-gram embeddings robust to typos",
"print(nlp('achizit1e public@').similarity(nlp('achiziții publice')))",
"# 0.7393895566928835",
"print(nlp('achizitii publice').similarity(nlp('achiziții publice')))",
"# 0.8996480808279399"
],
"author": "Sergiu Nisioi",
"author_links": {
"github": "senisioi",
"website": "https://nlp.unibuc.ro/people/snisioi.html"
},
"category": ["pipeline", "training", "models"]
} }
], ],