Fix class mapping for morphologizer

2025-12-23 18:13:13 +03:00 · 2019-03-09 00:20:29 +00:00 · 2019-03-09 00:20:29 +00:00 · 42bc3ad73b
commit 42bc3ad73b
parent c4df89ab90
2 changed files with 8 additions and 4 deletions
--- a/spacy/morphology.pyx
+++ b/spacy/morphology.pyx
@ -1066,16 +1066,20 @@ FEATURES = [
 FEATURE_NAMES = {get_string_id(name): name for name in FEATURES}
 FEATURE_FIELDS = {feature: FIELDS[feature.split('_', 1)[0]] for feature in FEATURES}
 FIELD_SIZES = Counter(FEATURE_FIELDS.values())
 for field in FIELD_SIZES:
    FIELD_SIZES[field] += 1
 for feat_id, name in FEATURE_NAMES.items():
    FEATURE_FIELDS[feat_id] = FEATURE_FIELDS[name]
-
+# Mapping of feature names to their position in total vector
 FIELD_SIZES = Counter(FEATURE_FIELDS.values())
 FEATURE_OFFSETS = {}
 # Mapping of field names to their first position in total vector.
 FIELD_OFFSETS = {}
 _seen_fields = Counter()
 for i, feature in enumerate(FEATURES):
    field = FEATURE_FIELDS[feature]
-    FEATURE_OFFSETS[feature] = _seen_fields[field]
+    # Add 1 for the NIL class, on each field
    FEATURE_OFFSETS[feature] = _seen_fields[field] + 1
    if _seen_fields[field] == 0:
        FIELD_OFFSETS[field] = i
    _seen_fields[field] += 1 
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@ -88,7 +88,7 @@ class Morphologizer(Pipe):
                    if doc_guesses[j, k] == 0:
                        doc_feat_ids[j, k] = 0
                    else:
-                        doc_feat_ids[j, k] = offset + doc_guesses[j, k]
+                        doc_feat_ids[j, k] = offset + (doc_guesses[j, k]-1)
                # Get the set of feature names.
                feats = {FEATURES[f] for f in doc_feat_ids[j] if f != 0}
                # Now add the analysis, and set the hash.