mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-25 00:34:20 +03:00
* Allow regular expressions to specify labels for merged spans
This commit is contained in:
parent
a3af6b7c3d
commit
557856e84c
|
@ -12,10 +12,11 @@ cdef class Spans:
|
|||
token_starts = {t.idx: t.i for t in self.tokens}
|
||||
token_ends = {t.idx + len(t): t.i+1 for t in self.tokens}
|
||||
tokens_str = unicode(self.tokens)
|
||||
for match in mwe_re.finditer(tokens_str):
|
||||
start = token_starts.get(match.start())
|
||||
end = token_ends.get(match.end())
|
||||
self.merge(start, end)
|
||||
for label, regex in mwe_re:
|
||||
for match in regex.finditer(tokens_str):
|
||||
start = token_starts.get(match.start())
|
||||
end = token_ends.get(match.end())
|
||||
self.merge(start, end, label=label)
|
||||
if merge_ents:
|
||||
# Merge named entities and units
|
||||
for ent in self.tokens.ents:
|
||||
|
@ -128,12 +129,16 @@ cdef class Span:
|
|||
|
||||
property orth_:
|
||||
def __get__(self):
|
||||
return ''.join([t.string for t in self]).strip()
|
||||
return ' '.join([t.string for t in self]).strip()
|
||||
|
||||
property lemma_:
|
||||
def __get__(self):
|
||||
return ' '.join([t.lemma_ for t in self]).strip()
|
||||
|
||||
property string:
|
||||
def __get__(self):
|
||||
return ''.join([t.string for t in self])
|
||||
|
||||
property label_:
|
||||
def __get__(self):
|
||||
return self._seq.vocab.strings[self.label]
|
||||
|
|
Loading…
Reference in New Issue
Block a user