mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Improve API for doc.merge() and span.merge(), to use keyword arguments.
This commit is contained in:
parent
fbb7f3f15c
commit
b67697a97b
|
@ -593,9 +593,22 @@ cdef class Doc:
|
|||
keep_reading = False
|
||||
yield n_bytes_str + data
|
||||
|
||||
def merge(self, int start_idx, int end_idx, unicode tag, unicode lemma,
|
||||
unicode ent_type):
|
||||
def merge(self, int start_idx, int end_idx, *args, **attributes):
|
||||
"""Merge a multi-word expression into a single token."""
|
||||
cdef unicode tag, lemma, ent_type
|
||||
if len(args) == 3:
|
||||
# TODO: Warn deprecation
|
||||
tag, lemma, ent_type = args
|
||||
attributes[TAG] = self.strings[tag]
|
||||
attributes[LEMMA] = self.strings[lemma]
|
||||
attributes[ENT_TYPE] = self.strings[ent_type]
|
||||
elif args:
|
||||
raise ValueError(
|
||||
"Doc.merge received %d non-keyword arguments. "
|
||||
"Expected either 3 arguments (deprecated), or 0 (use keyword arguments). "
|
||||
"Arguments supplied:\n%s\n"
|
||||
"Keyword arguments:%s\n" % (len(args), repr(args), repr(attributes)))
|
||||
|
||||
cdef int start = token_by_start(self.c, self.length, start_idx)
|
||||
if start == -1:
|
||||
return None
|
||||
|
@ -604,8 +617,11 @@ cdef class Doc:
|
|||
return None
|
||||
# Currently we have the token index, we want the range-end index
|
||||
end += 1
|
||||
|
||||
cdef Span span = self[start:end]
|
||||
tag = self.strings[attributes.get(TAG, span.root.tag)]
|
||||
lemma = self.strings[attributes.get(LEMMA, span.root.lemma)]
|
||||
ent_type = self.strings[attributes.get(ENT_TYPE, span.root.ent_type)]
|
||||
|
||||
# Get LexemeC for newly merged token
|
||||
new_orth = ''.join([t.text_with_ws for t in span])
|
||||
if span[-1].whitespace_:
|
||||
|
|
|
@ -77,8 +77,8 @@ cdef class Span:
|
|||
for i in range(self.start, self.end):
|
||||
yield self.doc[i]
|
||||
|
||||
def merge(self, unicode tag, unicode lemma, unicode ent_type):
|
||||
self.doc.merge(self.start_char, self.end_char, tag, lemma, ent_type)
|
||||
def merge(self, *args, **attributes):
|
||||
self.doc.merge(self.start_char, self.end_char, *args, **attributes)
|
||||
|
||||
def similarity(self, other):
|
||||
if 'similarity' in self.doc.getters_for_spans:
|
||||
|
|
Loading…
Reference in New Issue
Block a user