mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
Add comments clarifying matcher logic for '*'
This commit is contained in:
parent
7072b395c9
commit
490bc82c27
|
@ -361,7 +361,8 @@ cdef class Matcher:
|
||||||
if action == PANIC:
|
if action == PANIC:
|
||||||
raise Exception("Error selecting action in matcher")
|
raise Exception("Error selecting action in matcher")
|
||||||
|
|
||||||
|
# ADVANCE_PLUS acts like REPEAT, but also pushes a partial that
|
||||||
|
# acts like and ADVANCE_ZERO
|
||||||
if action == ADVANCE_PLUS:
|
if action == ADVANCE_PLUS:
|
||||||
state.second += 1
|
state.second += 1
|
||||||
partials.push_back(state)
|
partials.push_back(state)
|
||||||
|
@ -372,10 +373,13 @@ cdef class Matcher:
|
||||||
if action == ADVANCE:
|
if action == ADVANCE:
|
||||||
state.second += 1
|
state.second += 1
|
||||||
|
|
||||||
|
# Check for partial matches that are at the same spec in the same pattern
|
||||||
|
# Keep the longer of the matches
|
||||||
|
# This ensures that there are never more then 2 partials for every spec
|
||||||
|
# in a pattern (one of which gets pruned in this step)
|
||||||
|
|
||||||
overlap=False
|
overlap=False
|
||||||
for i in range(q):
|
for i in range(q):
|
||||||
if ent_id != get_pattern_key(partials[i].second):
|
|
||||||
continue
|
|
||||||
if state.second == partials[i].second and state.first < partials[i].first:
|
if state.second == partials[i].second and state.first < partials[i].first:
|
||||||
partials[i] = state
|
partials[i] = state
|
||||||
j = i
|
j = i
|
||||||
|
@ -385,26 +389,12 @@ cdef class Matcher:
|
||||||
continue
|
continue
|
||||||
overlap=False
|
overlap=False
|
||||||
for i in range(q):
|
for i in range(q):
|
||||||
if ent_id != get_pattern_key(partials[i].second):
|
|
||||||
continue
|
|
||||||
if state.second == partials[i].second:
|
if state.second == partials[i].second:
|
||||||
overlap = True
|
overlap = True
|
||||||
break
|
break
|
||||||
if overlap:
|
if overlap:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# overlap=False
|
|
||||||
# for i in range(q):
|
|
||||||
# if state.second == partials[i].second:
|
|
||||||
# if state.first < partials[i].first:
|
|
||||||
# partials[i] = state
|
|
||||||
# j = i-1
|
|
||||||
# else:
|
|
||||||
# overlap=True
|
|
||||||
# break
|
|
||||||
# if overlap:
|
|
||||||
# continue
|
|
||||||
|
|
||||||
|
|
||||||
if action == REPEAT:
|
if action == REPEAT:
|
||||||
# Leave the state in the queue, and advance to next slot
|
# Leave the state in the queue, and advance to next slot
|
||||||
|
@ -425,10 +415,9 @@ cdef class Matcher:
|
||||||
# ent_id = state.second[1].attrs[0].value
|
# ent_id = state.second[1].attrs[0].value
|
||||||
# ent_id = get_pattern_key(state.second)
|
# ent_id = get_pattern_key(state.second)
|
||||||
label = state.second[1].attrs[1].value
|
label = state.second[1].attrs[1].value
|
||||||
# matches.append((ent_id, start, end))
|
|
||||||
# Check that this match doesn't overlap with an earlier match.
|
# Check that this match doesn't overlap with an earlier match.
|
||||||
# Only overwrite an earlier match if it is a substring of this
|
# Only overwrite an earlier match if it is a substring of this
|
||||||
# match.
|
# match (i.e. it starts after this match starts).
|
||||||
|
|
||||||
if ent_id not in matches_dict:
|
if ent_id not in matches_dict:
|
||||||
matches_dict[ent_id] = (start,end,len(matches))
|
matches_dict[ent_id] = (start,end,len(matches))
|
||||||
|
@ -454,23 +443,8 @@ cdef class Matcher:
|
||||||
action = get_action(pattern, token)
|
action = get_action(pattern, token)
|
||||||
if action == PANIC:
|
if action == PANIC:
|
||||||
raise Exception("Error selecting action in matcher")
|
raise Exception("Error selecting action in matcher")
|
||||||
# while acton == ADVANCE_ZERO:
|
|
||||||
# pattern += 1
|
|
||||||
# action = get_action(pattern,token)
|
|
||||||
# if action == PANIC:
|
|
||||||
# raise Exception("Error selecting action in matcher")
|
|
||||||
while action in (ADVANCE_PLUS,ADVANCE_ZERO):
|
while action in (ADVANCE_PLUS,ADVANCE_ZERO):
|
||||||
if action == ADVANCE_PLUS:
|
if action == ADVANCE_PLUS:
|
||||||
# j=0
|
|
||||||
# overlap = False
|
|
||||||
# for j in range(q):
|
|
||||||
# if pattern == partials[j].second:
|
|
||||||
# overlap = True
|
|
||||||
# break
|
|
||||||
# if overlap:
|
|
||||||
# pattern += 1
|
|
||||||
# action = get_action(pattern, token)
|
|
||||||
# continue
|
|
||||||
state.first = token_i
|
state.first = token_i
|
||||||
state.second = pattern
|
state.second = pattern
|
||||||
partials.push_back(state)
|
partials.push_back(state)
|
||||||
|
@ -483,8 +457,6 @@ cdef class Matcher:
|
||||||
j=0
|
j=0
|
||||||
overlap = False
|
overlap = False
|
||||||
for j in range(q):
|
for j in range(q):
|
||||||
if ent_id == get_pattern_key(partials[j].second):
|
|
||||||
continue
|
|
||||||
if pattern == partials[j].second:
|
if pattern == partials[j].second:
|
||||||
overlap = True
|
overlap = True
|
||||||
break
|
break
|
||||||
|
@ -508,7 +480,6 @@ cdef class Matcher:
|
||||||
start = token_i
|
start = token_i
|
||||||
end = token_i+1 if action == ACCEPT else token_i
|
end = token_i+1 if action == ACCEPT else token_i
|
||||||
ent_id = pattern[1].attrs[0].value
|
ent_id = pattern[1].attrs[0].value
|
||||||
# ent_id = get_pattern_key(state.second)
|
|
||||||
label = pattern[1].attrs[1].value
|
label = pattern[1].attrs[1].value
|
||||||
if ent_id not in matches_dict:
|
if ent_id not in matches_dict:
|
||||||
matches_dict[ent_id] = (start,end,len(matches))
|
matches_dict[ent_id] = (start,end,len(matches))
|
||||||
|
@ -531,9 +502,7 @@ cdef class Matcher:
|
||||||
start = state.first
|
start = state.first
|
||||||
end = len(doc)
|
end = len(doc)
|
||||||
ent_id = state.second.attrs[0].value
|
ent_id = state.second.attrs[0].value
|
||||||
# ent_id = get_pattern_key(state.second)
|
|
||||||
label = state.second.attrs[1].value
|
label = state.second.attrs[1].value
|
||||||
# matches.append((ent_id, start, end))
|
|
||||||
if ent_id not in matches_dict:
|
if ent_id not in matches_dict:
|
||||||
matches_dict[ent_id] = (start,end,len(matches))
|
matches_dict[ent_id] = (start,end,len(matches))
|
||||||
matches.append((ent_id,start,end))
|
matches.append((ent_id,start,end))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user