Document new Doc.char_span() method

2025-08-10 07:04:53 +03:00 · 2017-08-19 12:45:00 +02:00 · 2017-08-19 12:45:00 +02:00 · 404d3067b8
commit 404d3067b8
parent d53cbf369f
1 changed files with 38 additions and 0 deletions
--- a/website/docs/api/doc.jade
+++ b/website/docs/api/doc.jade
@ -140,6 +140,44 @@ p Get the number of tokens in the document.
        +cell int
        +cell The number of tokens in the document.

+h(2, "char_span") Doc.char_span
+    +tag method
+    +tag-new(2)
+
+p Create a #[code Span] object from the slice #[code doc.text[start : end]].
+
+aside-code("Example").
+    doc = nlp(u'I like New York')
+    label = doc.vocab.strings['GPE']
+    span = doc.char_span(7, 15, label=label)
+    assert span.text == 'New York'
+
+table(["Name", "Type", "Description"])
+    +row
+        +cell #[code start]
+        +cell int
+        +cell The index of the first character of the span.
+
+    +row
+        +cell #[code end]
+        +cell int
+        +cell The index of the first character after the span.
+
+    +row
+        +cell #[code label]
+        +cell uint64
+        +cell A label to attach to the Span, e.g. for named entities.
+
+    +row
+        +cell #[code vector]
+        +cell #[code.u-break numpy.ndarray[ndim=1, dtype='float32']]
+        +cell A meaning representation of the span.
+
+    +footrow
+        +cell returns
+        +cell #[code Span]
+        +cell The newly constructed object.
+
 +h(2, "similarity") Doc.similarity
    +tag method
    +tag-model("vectors")