Update documentation on doc.to_array

This commit is contained in:
Ramanan Balakrishnan 2017-10-20 14:23:48 +05:30
parent 5941aa96a1
commit fbccc8c87d
No known key found for this signature in database
GPG Key ID: 57283041B6B6D1D1

View File

@ -176,9 +176,14 @@ p
+tag method
p
| Export the document annotations to a numpy array of shape #[code N*M]
| where #[code N] is the length of the document and #[code M] is the number
| of attribute IDs to export. The values will be 32-bit integers.
| Export given token attributes to a numpy #[code ndarray].
| If #[code attr_ids] is a sequence of #[code M] attributes,
| the output array will be of shape #[code (N, M)], where #[code N]
| is the length of the #[code Doc] (in tokens). If #[code attr_ids] is
| a single attribute, the output shape will be #[code (N,)]. You can
| specify attributes by integer ID (e.g. #[code spacy.attrs.LEMMA])
| or string name (e.g. 'LEMMA' or 'lemma'). The values will be 32-bit
| integers.
+aside-code("Example").
from spacy import attrs
@ -186,19 +191,26 @@ p
# All strings mapped to integers, for easy export to numpy
np_array = doc.to_array([attrs.LOWER, attrs.POS,
attrs.ENT_TYPE, attrs.IS_ALPHA])
np_array = doc.to_array("POS")
+table(["Name", "Type", "Description"])
+row
+cell #[code attr_ids]
+cell ints
+cell A list of attribute ID ints.
+cell int or string
+cell
| A list of attributes (int IDs or string names) or
| a single attribute (int ID or string name)
+footrow
+cell return
+cell #[code numpy.ndarray[ndim=2, dtype='int32']]
+cell
| #[code numpy.ndarray[ndim=2, dtype='int32']] or
| #[code numpy.ndarray[ndim=1, dtype='int32']]
+cell
| The exported attributes as a 2D numpy array, with one row per
| token and one column per attribute.
| token and one column per attribute (when #[code attr_ids] is a
| list), or as a 1D numpy array, with one item per attribute (when
| #[code attr_ids] is a single value).
+h(2, "count_by") Doc.count_by
+tag method