mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Allow passing in array vars for speedup (#8882)
* Allow passing in array vars for speedup This fixes #8845. Not sure about the docstring changes here... * Update docs Types maybe need more detail? Maybe not? * Run prettier on docs * Update spacy/tokens/span.pyx Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
parent
6029cfc391
commit
e227d24d43
|
@ -218,10 +218,12 @@ cdef class Span:
|
||||||
return Underscore(Underscore.span_extensions, self,
|
return Underscore(Underscore.span_extensions, self,
|
||||||
start=self.c.start_char, end=self.c.end_char)
|
start=self.c.start_char, end=self.c.end_char)
|
||||||
|
|
||||||
def as_doc(self, *, bint copy_user_data=False):
|
def as_doc(self, *, bint copy_user_data=False, array_head=None, array=None):
|
||||||
"""Create a `Doc` object with a copy of the `Span`'s data.
|
"""Create a `Doc` object with a copy of the `Span`'s data.
|
||||||
|
|
||||||
copy_user_data (bool): Whether or not to copy the original doc's user data.
|
copy_user_data (bool): Whether or not to copy the original doc's user data.
|
||||||
|
array_head (tuple): `Doc` array attrs, can be passed in to speed up computation.
|
||||||
|
array (ndarray): `Doc` as array, can be passed in to speed up computation.
|
||||||
RETURNS (Doc): The `Doc` copy of the span.
|
RETURNS (Doc): The `Doc` copy of the span.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#as_doc
|
DOCS: https://spacy.io/api/span#as_doc
|
||||||
|
@ -229,8 +231,10 @@ cdef class Span:
|
||||||
words = [t.text for t in self]
|
words = [t.text for t in self]
|
||||||
spaces = [bool(t.whitespace_) for t in self]
|
spaces = [bool(t.whitespace_) for t in self]
|
||||||
cdef Doc doc = Doc(self.doc.vocab, words=words, spaces=spaces)
|
cdef Doc doc = Doc(self.doc.vocab, words=words, spaces=spaces)
|
||||||
array_head = self.doc._get_array_attrs()
|
if array_head is None:
|
||||||
array = self.doc.to_array(array_head)
|
array_head = self.doc._get_array_attrs()
|
||||||
|
if array is None:
|
||||||
|
array = self.doc.to_array(array_head)
|
||||||
array = array[self.start : self.end]
|
array = array[self.start : self.end]
|
||||||
self._fix_dep_copy(array_head, array)
|
self._fix_dep_copy(array_head, array)
|
||||||
# Fix initial IOB so the entities are valid for doc.ents below.
|
# Fix initial IOB so the entities are valid for doc.ents below.
|
||||||
|
|
|
@ -303,6 +303,10 @@ not been implemeted for the given language, a `NotImplementedError` is raised.
|
||||||
|
|
||||||
Create a new `Doc` object corresponding to the `Span`, with a copy of the data.
|
Create a new `Doc` object corresponding to the `Span`, with a copy of the data.
|
||||||
|
|
||||||
|
When calling this on many spans from the same doc, passing in a precomputed
|
||||||
|
array representation of the doc using the `array_head` and `array` args can save
|
||||||
|
time.
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
> ```python
|
> ```python
|
||||||
|
@ -312,10 +316,12 @@ Create a new `Doc` object corresponding to the `Span`, with a copy of the data.
|
||||||
> assert doc2.text == "New York"
|
> assert doc2.text == "New York"
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ---------------- | ------------------------------------------------------------- |
|
| ---------------- | -------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `copy_user_data` | Whether or not to copy the original doc's user data. ~~bool~~ |
|
| `copy_user_data` | Whether or not to copy the original doc's user data. ~~bool~~ |
|
||||||
| **RETURNS** | A `Doc` object of the `Span`'s content. ~~Doc~~ |
|
| `array_head` | Precomputed array attributes (headers) of the original doc, as generated by `Doc._get_array_attrs()`. ~~Tuple~~ |
|
||||||
|
| `array` | Precomputed array version of the original doc as generated by [`Doc.to_array`](/api/doc#to_array). ~~numpy.ndarray~~ |
|
||||||
|
| **RETURNS** | A `Doc` object of the `Span`'s content. ~~Doc~~ |
|
||||||
|
|
||||||
## Span.root {#root tag="property" model="parser"}
|
## Span.root {#root tag="property" model="parser"}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user