Update documentation on doc.to_array

2025-08-09 14:44:52 +03:00 · 2017-10-20 14:23:48 +05:30 · 2017-10-20 14:23:48 +05:30 · fbccc8c87d
commit fbccc8c87d
parent 5941aa96a1
1 changed files with 19 additions and 7 deletions
--- a/website/docs/api/doc.jade
+++ b/website/docs/api/doc.jade
@ -176,9 +176,14 @@ p
    +tag method

 p
-    |  Export the document annotations to a numpy array of shape #[code N*M]
-    |  where #[code N] is the length of the document and #[code M] is the number
-    |  of attribute IDs to export. The values will be 32-bit integers.
+    |  Export given token attributes to a numpy #[code ndarray].
+    |  If #[code attr_ids] is a sequence of #[code M] attributes,
+    |  the output array will  be of shape #[code (N, M)], where #[code N]
+    |  is the length of the #[code Doc] (in tokens). If #[code attr_ids] is
+    |  a single attribute, the output shape will be #[code (N,)]. You can
+    |  specify attributes by integer ID (e.g. #[code spacy.attrs.LEMMA])
+    |  or string name (e.g. 'LEMMA' or 'lemma'). The values will be 32-bit
+    |  integers.

 +aside-code("Example").
    from spacy import attrs
@ -186,19 +191,26 @@ p
    # All strings mapped to integers, for easy export to numpy
    np_array = doc.to_array([attrs.LOWER, attrs.POS,
                             attrs.ENT_TYPE, attrs.IS_ALPHA])
+    np_array = doc.to_array("POS")

 +table(["Name", "Type", "Description"])
    +row
        +cell #[code attr_ids]
-        +cell ints
-        +cell A list of attribute ID ints.
+        +cell int or string
+        +cell
+            | A list of attributes (int IDs or string names) or
+            | a single attribute (int ID or string name)

    +footrow
        +cell return
-        +cell #[code numpy.ndarray[ndim=2, dtype='int32']]
+        +cell
+            | #[code numpy.ndarray[ndim=2, dtype='int32']] or
+            | #[code numpy.ndarray[ndim=1, dtype='int32']]
        +cell
            |  The exported attributes as a 2D numpy array, with one row per
-            |  token and one column per attribute.
+            |  token and one column per attribute (when #[code attr_ids] is a
+            |  list), or as a 1D numpy array, with one item per attribute (when
+            |  #[code attr_ids] is a single value).

 +h(2, "count_by") Doc.count_by
    +tag method