mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Merge branch 'master' into develop
This commit is contained in:
commit
3c3658ef9f
|
@ -1066,8 +1066,15 @@ cdef class DependencyParser(Parser):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def labels(self):
|
def labels(self):
|
||||||
|
labels = set()
|
||||||
# Get the labels from the model by looking at the available moves
|
# Get the labels from the model by looking at the available moves
|
||||||
return tuple(set(move.split("-")[1] for move in self.move_names if "-" in move))
|
for move in self.move_names:
|
||||||
|
if "-" in move:
|
||||||
|
label = move.split("-")[1]
|
||||||
|
if "||" in label:
|
||||||
|
label = label.split("||")[1]
|
||||||
|
labels.add(label)
|
||||||
|
return tuple(sorted(labels))
|
||||||
|
|
||||||
|
|
||||||
cdef class EntityRecognizer(Parser):
|
cdef class EntityRecognizer(Parser):
|
||||||
|
@ -1102,8 +1109,9 @@ cdef class EntityRecognizer(Parser):
|
||||||
def labels(self):
|
def labels(self):
|
||||||
# Get the labels from the model by looking at the available moves, e.g.
|
# Get the labels from the model by looking at the available moves, e.g.
|
||||||
# B-PERSON, I-PERSON, L-PERSON, U-PERSON
|
# B-PERSON, I-PERSON, L-PERSON, U-PERSON
|
||||||
return tuple(set(move.split("-")[1] for move in self.move_names
|
labels = set(move.split("-")[1] for move in self.move_names
|
||||||
if move[0] in ("B", "I", "L", "U")))
|
if move[0] in ("B", "I", "L", "U"))
|
||||||
|
return tuple(sorted(labels))
|
||||||
|
|
||||||
|
|
||||||
class EntityLinker(Pipe):
|
class EntityLinker(Pipe):
|
||||||
|
|
|
@ -173,6 +173,21 @@ def test_span_as_doc(doc):
|
||||||
assert span_doc[0].idx == 0
|
assert span_doc[0].idx == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_span_as_doc_user_data(doc):
|
||||||
|
"""Test that the user_data can be preserved (but not by default). """
|
||||||
|
my_key = "my_info"
|
||||||
|
my_value = 342
|
||||||
|
doc.user_data[my_key] = my_value
|
||||||
|
|
||||||
|
span = doc[4:10]
|
||||||
|
span_doc_with = span.as_doc(copy_user_data=True)
|
||||||
|
span_doc_without = span.as_doc()
|
||||||
|
|
||||||
|
assert doc.user_data.get(my_key, None) is my_value
|
||||||
|
assert span_doc_with.user_data.get(my_key, None) is my_value
|
||||||
|
assert span_doc_without.user_data.get(my_key, None) is None
|
||||||
|
|
||||||
|
|
||||||
def test_span_string_label_kb_id(doc):
|
def test_span_string_label_kb_id(doc):
|
||||||
span = Span(doc, 0, 1, label="hello", kb_id="Q342")
|
span = Span(doc, 0, 1, label="hello", kb_id="Q342")
|
||||||
assert span.label_ == "hello"
|
assert span.label_ == "hello"
|
||||||
|
|
|
@ -200,13 +200,15 @@ cdef class Span:
|
||||||
return Underscore(Underscore.span_extensions, self,
|
return Underscore(Underscore.span_extensions, self,
|
||||||
start=self.start_char, end=self.end_char)
|
start=self.start_char, end=self.end_char)
|
||||||
|
|
||||||
def as_doc(self):
|
def as_doc(self, bint copy_user_data=False):
|
||||||
"""Create a `Doc` object with a copy of the `Span`'s data.
|
"""Create a `Doc` object with a copy of the `Span`'s data.
|
||||||
|
|
||||||
|
copy_user_data (bool): Whether or not to copy the original doc's user data.
|
||||||
RETURNS (Doc): The `Doc` copy of the span.
|
RETURNS (Doc): The `Doc` copy of the span.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/span#as_doc
|
DOCS: https://spacy.io/api/span#as_doc
|
||||||
"""
|
"""
|
||||||
|
# TODO: make copy_user_data a keyword-only argument (Python 3 only)
|
||||||
words = [t.text for t in self]
|
words = [t.text for t in self]
|
||||||
spaces = [bool(t.whitespace_) for t in self]
|
spaces = [bool(t.whitespace_) for t in self]
|
||||||
cdef Doc doc = Doc(self.doc.vocab, words=words, spaces=spaces)
|
cdef Doc doc = Doc(self.doc.vocab, words=words, spaces=spaces)
|
||||||
|
@ -235,6 +237,8 @@ cdef class Span:
|
||||||
cat_start, cat_end, cat_label = key
|
cat_start, cat_end, cat_label = key
|
||||||
if cat_start == self.start_char and cat_end == self.end_char:
|
if cat_start == self.start_char and cat_end == self.end_char:
|
||||||
doc.cats[cat_label] = value
|
doc.cats[cat_label] = value
|
||||||
|
if copy_user_data:
|
||||||
|
doc.user_data = self.doc.user_data
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
def _fix_dep_copy(self, attrs, array):
|
def _fix_dep_copy(self, attrs, array):
|
||||||
|
|
|
@ -293,9 +293,10 @@ Create a new `Doc` object corresponding to the `Span`, with a copy of the data.
|
||||||
> assert doc2.text == "New York"
|
> assert doc2.text == "New York"
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Type | Description |
|
| Name | Type | Description |
|
||||||
| ----------- | ----- | --------------------------------------- |
|
| ----------------- | ----- | ---------------------------------------------------- |
|
||||||
| **RETURNS** | `Doc` | A `Doc` object of the `Span`'s content. |
|
| `copy_user_data` | bool | Whether or not to copy the original doc's user data. |
|
||||||
|
| **RETURNS** | `Doc` | A `Doc` object of the `Span`'s content. |
|
||||||
|
|
||||||
## Span.root {#root tag="property" model="parser"}
|
## Span.root {#root tag="property" model="parser"}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user