mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Add SpanMarker for NER to spaCy universe (#12730)
* Add SpanMarker for NER to spaCy universe * Escape the newlines in the text in the code example Or at least, attempt to * Remove now unnecessary import * Disable NER pipeline component in code example
This commit is contained in:
		
							parent
							
								
									53c400bd7a
								
							
						
					
					
						commit
						93983f08fc
					
				|  | @ -4361,6 +4361,37 @@ | ||||||
|             }, |             }, | ||||||
|             "category": ["apis", "standalone"], |             "category": ["apis", "standalone"], | ||||||
|             "tags": ["apis", "deployment"] |             "tags": ["apis", "deployment"] | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             "id": "span_marker", | ||||||
|  |             "title": "SpanMarker", | ||||||
|  |             "slogan": "Effortless state-of-the-art NER in spaCy", | ||||||
|  |             "description": "The SpanMarker integration with spaCy allows you to seamlessly replace the default spaCy `\"ner\"` pipeline component with any [SpanMarker model available on the Hugging Face Hub](https://huggingface.co/models?library=span-marker). Through this, you can take advantage of the advanced Named Entity Recognition capabilities of SpanMarker within the familiar and powerful spaCy framework.\n\nBy default, the `span_marker` pipeline component uses a [SpanMarker model using RoBERTa-large trained on OntoNotes v5.0](https://huggingface.co/tomaarsen/span-marker-roberta-large-ontonotes5). This model reaches a competitive 91.54 F1, notably higher than the [85.5 and 89.8 F1](https://spacy.io/usage/facts-figures#section-benchmarks) from `en_core_web_lg` and `en_core_web_trf`, respectively. A short head-to-head between this SpanMarker model and the `trf` spaCy model has been posted [here](https://github.com/tomaarsen/SpanMarkerNER/pull/12).\n\nAdditionally, see [here](https://tomaarsen.github.io/SpanMarkerNER/notebooks/spacy_integration.html) for documentation on using SpanMarker with spaCy.", | ||||||
|  |             "github": "tomaarsen/SpanMarkerNER", | ||||||
|  |             "pip": "span_marker", | ||||||
|  |             "code_example": [ | ||||||
|  |                 "import spacy", | ||||||
|  |                 "", | ||||||
|  |                 "nlp = spacy.load(\"en_core_web_sm\", disable=[\"ner\"])", | ||||||
|  |                 "nlp.add_pipe(\"span_marker\", config={\"model\": \"tomaarsen/span-marker-roberta-large-ontonotes5\"})", | ||||||
|  |                 "", | ||||||
|  |                 "text = \"\"\"Cleopatra VII, also known as Cleopatra the Great, was the last active ruler of the \\", | ||||||
|  |                 "Ptolemaic Kingdom of Egypt. She was born in 69 BCE and ruled Egypt from 51 BCE until her \\", | ||||||
|  |                 "death in 30 BCE.\"\"\"", | ||||||
|  |                 "doc = nlp(text)", | ||||||
|  |                 "print([(entity, entity.label_) for entity in doc.ents])", | ||||||
|  |                 "# [(Cleopatra VII, \"PERSON\"), (Cleopatra the Great, \"PERSON\"), (the Ptolemaic Kingdom of Egypt, \"GPE\"),", | ||||||
|  |                 "# (69 BCE, \"DATE\"), (Egypt, \"GPE\"), (51 BCE, \"DATE\"), (30 BCE, \"DATE\")]" | ||||||
|  |             ], | ||||||
|  |             "code_language": "python", | ||||||
|  |             "url": "https://tomaarsen.github.io/SpanMarkerNER", | ||||||
|  |             "author": "Tom Aarsen", | ||||||
|  |             "author_links": { | ||||||
|  |                 "github": "tomaarsen", | ||||||
|  |                 "website": "https://www.linkedin.com/in/tomaarsen" | ||||||
|  |             }, | ||||||
|  |             "category": ["pipeline", "standalone", "scientific"], | ||||||
|  |             "tags": ["ner"] | ||||||
|         } |         } | ||||||
|     ], |     ], | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user