mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Fix class mapping for morphologizer
This commit is contained in:
		
							parent
							
								
									c4df89ab90
								
							
						
					
					
						commit
						42bc3ad73b
					
				| 
						 | 
					@ -1066,16 +1066,20 @@ FEATURES = [
 | 
				
			||||||
FEATURE_NAMES = {get_string_id(name): name for name in FEATURES}
 | 
					FEATURE_NAMES = {get_string_id(name): name for name in FEATURES}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
FEATURE_FIELDS = {feature: FIELDS[feature.split('_', 1)[0]] for feature in FEATURES}
 | 
					FEATURE_FIELDS = {feature: FIELDS[feature.split('_', 1)[0]] for feature in FEATURES}
 | 
				
			||||||
 | 
					FIELD_SIZES = Counter(FEATURE_FIELDS.values())
 | 
				
			||||||
 | 
					for field in FIELD_SIZES:
 | 
				
			||||||
 | 
					    FIELD_SIZES[field] += 1
 | 
				
			||||||
for feat_id, name in FEATURE_NAMES.items():
 | 
					for feat_id, name in FEATURE_NAMES.items():
 | 
				
			||||||
    FEATURE_FIELDS[feat_id] = FEATURE_FIELDS[name]
 | 
					    FEATURE_FIELDS[feat_id] = FEATURE_FIELDS[name]
 | 
				
			||||||
 | 
					# Mapping of feature names to their position in total vector
 | 
				
			||||||
FIELD_SIZES = Counter(FEATURE_FIELDS.values())
 | 
					 | 
				
			||||||
FEATURE_OFFSETS = {}
 | 
					FEATURE_OFFSETS = {}
 | 
				
			||||||
 | 
					# Mapping of field names to their first position in total vector.
 | 
				
			||||||
FIELD_OFFSETS = {}
 | 
					FIELD_OFFSETS = {}
 | 
				
			||||||
_seen_fields = Counter()
 | 
					_seen_fields = Counter()
 | 
				
			||||||
for i, feature in enumerate(FEATURES):
 | 
					for i, feature in enumerate(FEATURES):
 | 
				
			||||||
    field = FEATURE_FIELDS[feature]
 | 
					    field = FEATURE_FIELDS[feature]
 | 
				
			||||||
    FEATURE_OFFSETS[feature] = _seen_fields[field]
 | 
					    # Add 1 for the NIL class, on each field
 | 
				
			||||||
 | 
					    FEATURE_OFFSETS[feature] = _seen_fields[field] + 1
 | 
				
			||||||
    if _seen_fields[field] == 0:
 | 
					    if _seen_fields[field] == 0:
 | 
				
			||||||
        FIELD_OFFSETS[field] = i
 | 
					        FIELD_OFFSETS[field] = i
 | 
				
			||||||
    _seen_fields[field] += 1 
 | 
					    _seen_fields[field] += 1 
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -88,7 +88,7 @@ class Morphologizer(Pipe):
 | 
				
			||||||
                    if doc_guesses[j, k] == 0:
 | 
					                    if doc_guesses[j, k] == 0:
 | 
				
			||||||
                        doc_feat_ids[j, k] = 0
 | 
					                        doc_feat_ids[j, k] = 0
 | 
				
			||||||
                    else:
 | 
					                    else:
 | 
				
			||||||
                        doc_feat_ids[j, k] = offset + doc_guesses[j, k]
 | 
					                        doc_feat_ids[j, k] = offset + (doc_guesses[j, k]-1)
 | 
				
			||||||
                # Get the set of feature names.
 | 
					                # Get the set of feature names.
 | 
				
			||||||
                feats = {FEATURES[f] for f in doc_feat_ids[j] if f != 0}
 | 
					                feats = {FEATURES[f] for f in doc_feat_ids[j] if f != 0}
 | 
				
			||||||
                # Now add the analysis, and set the hash.
 | 
					                # Now add the analysis, and set the hash.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user