mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Document offsets_from_biluo_tags
This commit is contained in:
		
							parent
							
								
									fb663f9b7d
								
							
						
					
					
						commit
						b078e276e6
					
				|  | @ -163,3 +163,41 @@ p | ||||||
|         +cell |         +cell | ||||||
|             |  Unicode strings, describing the |             |  Unicode strings, describing the | ||||||
|             |  #[+a("/api/annotation#biluo") BILUO] tags. |             |  #[+a("/api/annotation#biluo") BILUO] tags. | ||||||
|  | 
 | ||||||
|  | +h(3, "offsets_from_biluo_tags") gold.offsets_from_biluo_tags | ||||||
|  | 
 | ||||||
|  | p | ||||||
|  |     |  Encode per-token tags following the | ||||||
|  |     |  #[+a("/api/annotation#biluo") BILUO scheme] into entity offsets. | ||||||
|  | 
 | ||||||
|  | +aside-code("Example"). | ||||||
|  |     from spacy.gold import offsets_from_biluo_tags | ||||||
|  | 
 | ||||||
|  |     doc = nlp('I like London.') | ||||||
|  |     tags = ['O', 'O', 'U-LOC', 'O'] | ||||||
|  |     entities = offsets_from_biluo_tags(doc, tags) | ||||||
|  |     assert entities == [(7, 13, 'LOC')] | ||||||
|  | 
 | ||||||
|  | +table(["Name", "Type", "Description"]) | ||||||
|  |     +row | ||||||
|  |         +cell #[code doc] | ||||||
|  |         +cell #[code Doc] | ||||||
|  |         +cell The document that the BILUO tags refer to. | ||||||
|  | 
 | ||||||
|  |     +row | ||||||
|  |         +cell #[code entities] | ||||||
|  |         +cell iterable | ||||||
|  |         +cell | ||||||
|  |             |  A sequence of #[+a("/api/annotation#biluo") BILUO] tags with | ||||||
|  |             |  each tag describing one token. Each tag string will be of the | ||||||
|  |             |  form of either #[code ""], #[code "O"] or | ||||||
|  |             |  #[code "{action}-{label}"], where action is one of #[code "B"], | ||||||
|  |             |  #[code "I"], #[code "L"], #[code "U"]. | ||||||
|  | 
 | ||||||
|  |     +row("foot") | ||||||
|  |         +cell returns | ||||||
|  |         +cell list | ||||||
|  |         +cell | ||||||
|  |             |  A sequence of #[code (start, end, label)] triples. #[code start] | ||||||
|  |             |  and #[code end] will be character-offset integers denoting the | ||||||
|  |             |  slice into the original string. | ||||||
|  |  | ||||||
|  | @ -21,6 +21,13 @@ p | ||||||
|     |  #[strong experiment on your data] to find a solution that works best |     |  #[strong experiment on your data] to find a solution that works best | ||||||
|     |  for you. |     |  for you. | ||||||
| 
 | 
 | ||||||
|  | +aside("Tip: Converting entity annotations", "💡") | ||||||
|  |     |  You can train the entity recognizer with entity offsets or | ||||||
|  |     |  annotations in the #[+a("/api/annotation#biluo") BILUO scheme]. The | ||||||
|  |     |  #[code spacy.gold] module also exposes | ||||||
|  |     |  #[+a("/api/goldparse#util") two helper functions] to convert offsets to | ||||||
|  |     |  BILUO tags, and BILUO tags to entity offsets. | ||||||
|  | 
 | ||||||
| +h(3, "example-train-ner") Updating the Named Entity Recognizer | +h(3, "example-train-ner") Updating the Named Entity Recognizer | ||||||
| 
 | 
 | ||||||
| p | p | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user