mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Update NER annotation scheme
Add note on training data sources and include coarse-grained Wikipedia scheme
This commit is contained in:
parent
df149455f9
commit
5453821a9f
|
@ -300,5 +300,15 @@ GLOSSARY = {
|
|||
'MONEY': 'Monetary values, including unit',
|
||||
'QUANTITY': 'Measurements, as of weight or distance',
|
||||
'ORDINAL': '"first", "second", etc.',
|
||||
'CARDINAL': 'Numerals that do not fall under another type'
|
||||
'CARDINAL': 'Numerals that do not fall under another type',
|
||||
|
||||
|
||||
# Named Entity Recognition
|
||||
# Wikipedia
|
||||
# http://www.sciencedirect.com/science/article/pii/S0004370212000276
|
||||
# https://pdfs.semanticscholar.org/5744/578cc243d92287f47448870bb426c66cc941.pdf
|
||||
|
||||
'PER': 'Named person or family.',
|
||||
'MISC': ('Miscellaneous entities, e.g. events, nationalities, '
|
||||
'products or works of art'),
|
||||
}
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
//- 💫 DOCS > API > ANNOTATION > NAMED ENTITIES
|
||||
|
||||
+table([ "Type", "Description" ])
|
||||
p
|
||||
| Models trained on the
|
||||
| #[+a("https://catalog.ldc.upenn.edu/ldc2013t19") OntoNotes 5] corpus
|
||||
| support the following entity types:
|
||||
|
||||
+table(["Type", "Description"])
|
||||
+row
|
||||
+cell #[code PERSON]
|
||||
+cell People, including fictional.
|
||||
|
@ -45,9 +50,6 @@
|
|||
+cell #[code LANGUAGE]
|
||||
+cell Any named language.
|
||||
|
||||
p The following values are also annotated in a style similar to names:
|
||||
|
||||
+table([ "Type", "Description" ])
|
||||
+row
|
||||
+cell #[code DATE]
|
||||
+cell Absolute or relative dates or periods.
|
||||
|
@ -75,3 +77,33 @@ p The following values are also annotated in a style similar to names:
|
|||
+row
|
||||
+cell #[code CARDINAL]
|
||||
+cell Numerals that do not fall under another type.
|
||||
|
||||
+h(4, "ner-wikipedia-scheme") Wikipedia scheme
|
||||
|
||||
p
|
||||
| Models trained on Wikipedia corpus
|
||||
| (#[+a("http://www.sciencedirect.com/science/article/pii/S0004370212000276") Nothman et al., 2013])
|
||||
| use a less fine-grained NER annotation scheme and recognise the
|
||||
| following entities:
|
||||
|
||||
+table(["Type", "Description"])
|
||||
+row
|
||||
+cell #[code PER]
|
||||
+cell Named person or family.
|
||||
|
||||
+row
|
||||
+cell #[code LOC]
|
||||
+cell
|
||||
| Name of politically or geographically defined location (cities,
|
||||
| provinces, countries, international regions, bodies of water,
|
||||
| mountains).
|
||||
|
||||
+row
|
||||
+cell #[code ORG]
|
||||
+cell Named corporate, governmental, or other organizational entity.
|
||||
|
||||
+row
|
||||
+cell #[code MISC]
|
||||
+cell
|
||||
| Miscellaneous entities, e.g. events, nationalities, products or
|
||||
| works of art.
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
+h(2, "changelog") Changelog
|
||||
+button(gh("spacy") + "/releases", false, "secondary", "small").u-float-right.u-nowrap View releases
|
||||
|
||||
div(data-tpl="changelog" data-tpl-key="error")
|
||||
div(data-tpl="changelog" data-tpl-key="error" style="display: none")
|
||||
+infobox
|
||||
| Unable to load changelog from GitHub. Please see the
|
||||
| #[+a(gh("spacy") + "/releases") releases page] instead.
|
||||
|
|
Loading…
Reference in New Issue
Block a user