mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge branch 'master' into develop
This commit is contained in:
		
						commit
						3c3658ef9f
					
				| 
						 | 
					@ -1066,8 +1066,15 @@ cdef class DependencyParser(Parser):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @property
 | 
					    @property
 | 
				
			||||||
    def labels(self):
 | 
					    def labels(self):
 | 
				
			||||||
 | 
					        labels = set()
 | 
				
			||||||
        # Get the labels from the model by looking at the available moves
 | 
					        # Get the labels from the model by looking at the available moves
 | 
				
			||||||
        return tuple(set(move.split("-")[1] for move in self.move_names if "-" in move))
 | 
					        for move in self.move_names:
 | 
				
			||||||
 | 
					            if "-" in move:
 | 
				
			||||||
 | 
					                label = move.split("-")[1]
 | 
				
			||||||
 | 
					                if "||" in label:
 | 
				
			||||||
 | 
					                    label = label.split("||")[1]
 | 
				
			||||||
 | 
					                labels.add(label)
 | 
				
			||||||
 | 
					        return tuple(sorted(labels))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef class EntityRecognizer(Parser):
 | 
					cdef class EntityRecognizer(Parser):
 | 
				
			||||||
| 
						 | 
					@ -1102,8 +1109,9 @@ cdef class EntityRecognizer(Parser):
 | 
				
			||||||
    def labels(self):
 | 
					    def labels(self):
 | 
				
			||||||
        # Get the labels from the model by looking at the available moves, e.g.
 | 
					        # Get the labels from the model by looking at the available moves, e.g.
 | 
				
			||||||
        # B-PERSON, I-PERSON, L-PERSON, U-PERSON
 | 
					        # B-PERSON, I-PERSON, L-PERSON, U-PERSON
 | 
				
			||||||
        return tuple(set(move.split("-")[1] for move in self.move_names
 | 
					        labels = set(move.split("-")[1] for move in self.move_names
 | 
				
			||||||
                if move[0] in ("B", "I", "L", "U")))
 | 
					                     if move[0] in ("B", "I", "L", "U"))
 | 
				
			||||||
 | 
					        return tuple(sorted(labels))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class EntityLinker(Pipe):
 | 
					class EntityLinker(Pipe):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -173,6 +173,21 @@ def test_span_as_doc(doc):
 | 
				
			||||||
    assert span_doc[0].idx == 0
 | 
					    assert span_doc[0].idx == 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_span_as_doc_user_data(doc):
 | 
				
			||||||
 | 
					    """Test that the user_data can be preserved (but not by default). """
 | 
				
			||||||
 | 
					    my_key = "my_info"
 | 
				
			||||||
 | 
					    my_value = 342
 | 
				
			||||||
 | 
					    doc.user_data[my_key] = my_value
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    span = doc[4:10]
 | 
				
			||||||
 | 
					    span_doc_with = span.as_doc(copy_user_data=True)
 | 
				
			||||||
 | 
					    span_doc_without = span.as_doc()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    assert doc.user_data.get(my_key, None) is my_value
 | 
				
			||||||
 | 
					    assert span_doc_with.user_data.get(my_key, None) is my_value
 | 
				
			||||||
 | 
					    assert span_doc_without.user_data.get(my_key, None) is None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_span_string_label_kb_id(doc):
 | 
					def test_span_string_label_kb_id(doc):
 | 
				
			||||||
    span = Span(doc, 0, 1, label="hello", kb_id="Q342")
 | 
					    span = Span(doc, 0, 1, label="hello", kb_id="Q342")
 | 
				
			||||||
    assert span.label_ == "hello"
 | 
					    assert span.label_ == "hello"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -200,13 +200,15 @@ cdef class Span:
 | 
				
			||||||
        return Underscore(Underscore.span_extensions, self,
 | 
					        return Underscore(Underscore.span_extensions, self,
 | 
				
			||||||
                          start=self.start_char, end=self.end_char)
 | 
					                          start=self.start_char, end=self.end_char)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def as_doc(self):
 | 
					    def as_doc(self, bint copy_user_data=False):
 | 
				
			||||||
        """Create a `Doc` object with a copy of the `Span`'s data.
 | 
					        """Create a `Doc` object with a copy of the `Span`'s data.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        copy_user_data (bool): Whether or not to copy the original doc's user data.
 | 
				
			||||||
        RETURNS (Doc): The `Doc` copy of the span.
 | 
					        RETURNS (Doc): The `Doc` copy of the span.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        DOCS: https://spacy.io/api/span#as_doc
 | 
					        DOCS: https://spacy.io/api/span#as_doc
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
 | 
					        # TODO: make copy_user_data a keyword-only argument (Python 3 only)
 | 
				
			||||||
        words = [t.text for t in self]
 | 
					        words = [t.text for t in self]
 | 
				
			||||||
        spaces = [bool(t.whitespace_) for t in self]
 | 
					        spaces = [bool(t.whitespace_) for t in self]
 | 
				
			||||||
        cdef Doc doc = Doc(self.doc.vocab, words=words, spaces=spaces)
 | 
					        cdef Doc doc = Doc(self.doc.vocab, words=words, spaces=spaces)
 | 
				
			||||||
| 
						 | 
					@ -235,6 +237,8 @@ cdef class Span:
 | 
				
			||||||
                cat_start, cat_end, cat_label = key
 | 
					                cat_start, cat_end, cat_label = key
 | 
				
			||||||
                if cat_start == self.start_char and cat_end == self.end_char:
 | 
					                if cat_start == self.start_char and cat_end == self.end_char:
 | 
				
			||||||
                    doc.cats[cat_label] = value
 | 
					                    doc.cats[cat_label] = value
 | 
				
			||||||
 | 
					        if copy_user_data:
 | 
				
			||||||
 | 
					            doc.user_data = self.doc.user_data
 | 
				
			||||||
        return doc
 | 
					        return doc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _fix_dep_copy(self, attrs, array):
 | 
					    def _fix_dep_copy(self, attrs, array):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -293,9 +293,10 @@ Create a new `Doc` object corresponding to the `Span`, with a copy of the data.
 | 
				
			||||||
> assert doc2.text == "New York"
 | 
					> assert doc2.text == "New York"
 | 
				
			||||||
> ```
 | 
					> ```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| Name        | Type  | Description                             |
 | 
					| Name              | Type  | Description                                          |
 | 
				
			||||||
| ----------- | ----- | --------------------------------------- |
 | 
					| ----------------- | ----- | ---------------------------------------------------- |
 | 
				
			||||||
| **RETURNS** | `Doc` | A `Doc` object of the `Span`'s content. |
 | 
					| `copy_user_data`  | bool  | Whether or not to copy the original doc's user data. |
 | 
				
			||||||
 | 
					| **RETURNS**       | `Doc` | A `Doc` object of the `Span`'s content.              |
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Span.root {#root tag="property" model="parser"}
 | 
					## Span.root {#root tag="property" model="parser"}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user