Update entity setting section

2025-11-07 19:37:38 +03:00 · 2021-03-20 11:38:55 +01:00 · 2021-03-20 11:38:55 +01:00 · 0d2b723e8d
commit 0d2b723e8d
parent 6a9a467766
1 changed files with 18 additions and 8 deletions
--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@ -599,18 +599,27 @@ ents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents]
 print('Before', ents)
 # The model didn't recognize "fb" as an entity :(
-fb_ent = Span(doc, 0, 1, label="ORG") # create a Span for the new entity
+# Create a span for the new entity
 fb_ent = Span(doc, 0, 1, label="ORG")
 # Option 1: Modify the provided entity spans, leaving the rest unmodified
 doc.set_ents([fb_ent], default="unmodified")
 # Option 2: Assign a complete list of ents to doc.ents
 doc.ents = list(doc.ents) + [fb_ent]
-ents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents]
+ents = [(e.text, e.start, e.end, e.label_) for e in doc.ents]
 print('After', ents)
-# [('fb', 0, 2, 'ORG')] 🎉
+# [('fb', 0, 1, 'ORG')] 🎉
 ```
-Keep in mind that you need to create a `Span` with the start and end index of
+Keep in mind that `Span` is initialized with the start and end **token**
-the **token**, not the start and end index of the entity in the document. In
+indices, not the character offsets. To create a span from character offsets, use
-this case, "fb" is token `(0, 1)` – but at the document level, the entity will
+[`Doc.char_span`](/api/doc#char_span):
-have the start and end indices `(0, 2)`.
+
 ```python
 fb_ent = doc.char_span(0, 2, label="ORG")
 ```
 #### Setting entity annotations from array {#setting-from-array}
@ -645,9 +654,10 @@ write efficient native code.
 ```python
 # cython: infer_types=True
 from spacy.typedefs cimport attr_t
 from spacy.tokens.doc cimport Doc
-cpdef set_entity(Doc doc, int start, int end, int ent_type):
+cpdef set_entity(Doc doc, int start, int end, attr_t ent_type):
    for i in range(start, end):
        doc.c[i].ent_type = ent_type
    doc.c[start].ent_iob = 3