Fix class mapping for morphologizer

This commit is contained in:
Matthew Honnibal 2019-03-09 00:20:29 +00:00
parent c4df89ab90
commit 42bc3ad73b
2 changed files with 8 additions and 4 deletions

View File

@ -1066,16 +1066,20 @@ FEATURES = [
FEATURE_NAMES = {get_string_id(name): name for name in FEATURES} FEATURE_NAMES = {get_string_id(name): name for name in FEATURES}
FEATURE_FIELDS = {feature: FIELDS[feature.split('_', 1)[0]] for feature in FEATURES} FEATURE_FIELDS = {feature: FIELDS[feature.split('_', 1)[0]] for feature in FEATURES}
FIELD_SIZES = Counter(FEATURE_FIELDS.values())
for field in FIELD_SIZES:
FIELD_SIZES[field] += 1
for feat_id, name in FEATURE_NAMES.items(): for feat_id, name in FEATURE_NAMES.items():
FEATURE_FIELDS[feat_id] = FEATURE_FIELDS[name] FEATURE_FIELDS[feat_id] = FEATURE_FIELDS[name]
# Mapping of feature names to their position in total vector
FIELD_SIZES = Counter(FEATURE_FIELDS.values())
FEATURE_OFFSETS = {} FEATURE_OFFSETS = {}
# Mapping of field names to their first position in total vector.
FIELD_OFFSETS = {} FIELD_OFFSETS = {}
_seen_fields = Counter() _seen_fields = Counter()
for i, feature in enumerate(FEATURES): for i, feature in enumerate(FEATURES):
field = FEATURE_FIELDS[feature] field = FEATURE_FIELDS[feature]
FEATURE_OFFSETS[feature] = _seen_fields[field] # Add 1 for the NIL class, on each field
FEATURE_OFFSETS[feature] = _seen_fields[field] + 1
if _seen_fields[field] == 0: if _seen_fields[field] == 0:
FIELD_OFFSETS[field] = i FIELD_OFFSETS[field] = i
_seen_fields[field] += 1 _seen_fields[field] += 1

View File

@ -88,7 +88,7 @@ class Morphologizer(Pipe):
if doc_guesses[j, k] == 0: if doc_guesses[j, k] == 0:
doc_feat_ids[j, k] = 0 doc_feat_ids[j, k] = 0
else: else:
doc_feat_ids[j, k] = offset + doc_guesses[j, k] doc_feat_ids[j, k] = offset + (doc_guesses[j, k]-1)
# Get the set of feature names. # Get the set of feature names.
feats = {FEATURES[f] for f in doc_feat_ids[j] if f != 0} feats = {FEATURES[f] for f in doc_feat_ids[j] if f != 0}
# Now add the analysis, and set the hash. # Now add the analysis, and set the hash.