From e10295c9fd6b47606362fc8e9a7b7c3d27ecab6f Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Fri, 18 Dec 2020 11:51:01 +0100 Subject: [PATCH] Fix memory leak when adding empty morph (#6581) Fix lookup of empty morph in the morphology table, which fixes a memory leak where a new morphology tag was allocated each time the empty morph tag was added. --- spacy/morphology.pyx | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index cc0f61cea..a2420353e 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -29,7 +29,7 @@ cdef class Morphology: FEATURE_SEP = "|" FIELD_SEP = "=" VALUE_SEP = "," - # not an empty string so that the PreshMap key is not 0 + # not an empty string so we can distinguish unset morph from empty morph EMPTY_MORPH = symbols.NAMES[symbols._] def __init__(self, StringStore strings): @@ -50,8 +50,8 @@ cdef class Morphology: """ cdef MorphAnalysisC* tag_ptr if isinstance(features, str): - if features == self.EMPTY_MORPH: - features = "" + if features == "": + features = self.EMPTY_MORPH tag_ptr = self.tags.get(self.strings[features]) if tag_ptr != NULL: return tag_ptr.key @@ -71,13 +71,9 @@ cdef class Morphology: )) cdef MorphAnalysisC tag = self.create_morph_tag(field_feature_pairs) # the hash key for the tag is either the hash of the normalized UFEATS - # string or the hash of an empty placeholder (using the empty string - # would give a hash key of 0, which is not good for PreshMap) + # string or the hash of an empty placeholder norm_feats_string = self.normalize_features(features) - if norm_feats_string: - tag.key = self.strings.add(norm_feats_string) - else: - tag.key = self.strings.add(self.EMPTY_MORPH) + tag.key = self.strings.add(norm_feats_string) self.insert(tag) return tag.key