mirror of
https://github.com/explosion/spaCy.git
synced 2025-10-24 04:31:17 +03:00
Update documentation on doc.to_array
This commit is contained in:
parent
b3ab124fc5
commit
d44a079fe3
|
@ -336,28 +336,40 @@ p
|
|||
+tag method
|
||||
|
||||
p
|
||||
| Export the document annotations to a numpy array of shape #[code N*M]
|
||||
| where #[code N] is the length of the document and #[code M] is the number
|
||||
| of attribute IDs to export. The values will be 32-bit integers.
|
||||
| Export given token attributes to a numpy #[code ndarray].
|
||||
| If #[code attr_ids] is a sequence of #[code M] attributes,
|
||||
| the output array will be of shape #[code (N, M)], where #[code N]
|
||||
| is the length of the #[code Doc] (in tokens). If #[code attr_ids] is
|
||||
| a single attribute, the output shape will be #[code (N,)]. You can
|
||||
| specify attributes by integer ID (e.g. #[code spacy.attrs.LEMMA])
|
||||
| or string name (e.g. 'LEMMA' or 'lemma'). The values will be 64-bit
|
||||
| integers.
|
||||
|
||||
+aside-code("Example").
|
||||
from spacy.attrs import LOWER, POS, ENT_TYPE, IS_ALPHA
|
||||
doc = nlp(text)
|
||||
# All strings mapped to integers, for easy export to numpy
|
||||
np_array = doc.to_array([LOWER, POS, ENT_TYPE, IS_ALPHA])
|
||||
np_array = doc.to_array("POS")
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code attr_ids]
|
||||
+cell list
|
||||
+cell A list of attribute ID ints.
|
||||
+cell list or int or string
|
||||
+cell
|
||||
| A list of attributes (int IDs or string names) or
|
||||
| a single attribute (int ID or string name)
|
||||
|
||||
+row("foot")
|
||||
+cell returns
|
||||
+cell #[code.u-break numpy.ndarray[ndim=2, dtype='int32']]
|
||||
+cell
|
||||
| #[code.u-break numpy.ndarray[ndim=2, dtype='uint64']] or
|
||||
| #[code.u-break numpy.ndarray[ndim=1, dtype='uint64']] or
|
||||
+cell
|
||||
| The exported attributes as a 2D numpy array, with one row per
|
||||
| token and one column per attribute.
|
||||
| token and one column per attribute (when #[code attr_ids] is a
|
||||
| list), or as a 1D numpy array, with one item per attribute (when
|
||||
| #[code attr_ids] is a single value).
|
||||
|
||||
+h(2, "from_array") Doc.from_array
|
||||
+tag method
|
||||
|
|
Loading…
Reference in New Issue
Block a user