mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Allow passing in array vars for speedup (#8882)
* Allow passing in array vars for speedup This fixes #8845. Not sure about the docstring changes here... * Update docs Types maybe need more detail? Maybe not? * Run prettier on docs * Update spacy/tokens/span.pyx Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
parent
6029cfc391
commit
e227d24d43
|
@ -218,10 +218,12 @@ cdef class Span:
|
|||
return Underscore(Underscore.span_extensions, self,
|
||||
start=self.c.start_char, end=self.c.end_char)
|
||||
|
||||
def as_doc(self, *, bint copy_user_data=False):
|
||||
def as_doc(self, *, bint copy_user_data=False, array_head=None, array=None):
|
||||
"""Create a `Doc` object with a copy of the `Span`'s data.
|
||||
|
||||
copy_user_data (bool): Whether or not to copy the original doc's user data.
|
||||
array_head (tuple): `Doc` array attrs, can be passed in to speed up computation.
|
||||
array (ndarray): `Doc` as array, can be passed in to speed up computation.
|
||||
RETURNS (Doc): The `Doc` copy of the span.
|
||||
|
||||
DOCS: https://spacy.io/api/span#as_doc
|
||||
|
@ -229,8 +231,10 @@ cdef class Span:
|
|||
words = [t.text for t in self]
|
||||
spaces = [bool(t.whitespace_) for t in self]
|
||||
cdef Doc doc = Doc(self.doc.vocab, words=words, spaces=spaces)
|
||||
array_head = self.doc._get_array_attrs()
|
||||
array = self.doc.to_array(array_head)
|
||||
if array_head is None:
|
||||
array_head = self.doc._get_array_attrs()
|
||||
if array is None:
|
||||
array = self.doc.to_array(array_head)
|
||||
array = array[self.start : self.end]
|
||||
self._fix_dep_copy(array_head, array)
|
||||
# Fix initial IOB so the entities are valid for doc.ents below.
|
||||
|
|
|
@ -303,6 +303,10 @@ not been implemeted for the given language, a `NotImplementedError` is raised.
|
|||
|
||||
Create a new `Doc` object corresponding to the `Span`, with a copy of the data.
|
||||
|
||||
When calling this on many spans from the same doc, passing in a precomputed
|
||||
array representation of the doc using the `array_head` and `array` args can save
|
||||
time.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
|
@ -312,10 +316,12 @@ Create a new `Doc` object corresponding to the `Span`, with a copy of the data.
|
|||
> assert doc2.text == "New York"
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ---------------- | ------------------------------------------------------------- |
|
||||
| `copy_user_data` | Whether or not to copy the original doc's user data. ~~bool~~ |
|
||||
| **RETURNS** | A `Doc` object of the `Span`'s content. ~~Doc~~ |
|
||||
| Name | Description |
|
||||
| ---------------- | -------------------------------------------------------------------------------------------------------------------- |
|
||||
| `copy_user_data` | Whether or not to copy the original doc's user data. ~~bool~~ |
|
||||
| `array_head` | Precomputed array attributes (headers) of the original doc, as generated by `Doc._get_array_attrs()`. ~~Tuple~~ |
|
||||
| `array` | Precomputed array version of the original doc as generated by [`Doc.to_array`](/api/doc#to_array). ~~numpy.ndarray~~ |
|
||||
| **RETURNS** | A `Doc` object of the `Span`'s content. ~~Doc~~ |
|
||||
|
||||
## Span.root {#root tag="property" model="parser"}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user