use subclass for parsing EXIF data

The old method (using dict() and _fixup_dict) create clones from IFD_v1,
but it read all items when iterating, losing all benefits from the lazyness
of IFD.
The new method use a custom subclass that try to prevent all iterations
as much as possible, keeping all tags undecoded.

The return value isn't a dict anymore, but a subclass of ImageFileDirectory_v1
that doesn't support tov2(). All decoded tags are formatted as before.
This commit is contained in:
Glandos 2019-02-18 16:05:24 +01:00 committed by Andrew Murray
parent 2adfea9b4e
commit 49fc09d590

View File

@ -469,18 +469,64 @@ class JpegImageFile(ImageFile.ImageFile):
return _getmp(self) return _getmp(self)
def _fixup_dict(src_dict): def _fixup(value):
# Helper function for _getexif() try:
# returns a dict with any single item tuples/lists as individual values if len(value) == 1 and not isinstance(value, dict):
def _fixup(value): return value[0]
try: except Exception:
if len(value) == 1 and not isinstance(value, dict): pass
return value[0] return value
except Exception:
pass
return value
return {k: _fixup(v) for k, v in src_dict.items()}
class ExifImageFileDirectory(TiffImagePlugin.ImageFileDirectory_v1):
'''
Specialization for parsing EXIF data:
- Remove support for v2 to avoid useless computations
- custom __setitem__ to supports IFD values
- values are "fixed up" so that 1-size tuples are expanded
- custom update() to avoid iterating and expanding non parsed data
The goal is to use the lazyness of ImageFileDirectory_v1 in _getexif().
'''
def to_v2(self):
raise NotImplementedError()
def _setitem(self, tag, value, legacy_api):
super()._setitem(tag, value, legacy_api)
if legacy_api:
val = self._tags_v1[tag]
if not isinstance(val, (tuple, bytes)):
val = val,
self._tags_v1[tag] = _fixup(val)
def __setitem__(self, tag, value):
if isinstance(value, TiffImagePlugin.ImageFileDirectory_v2):
self._tags_v1[tag] = value
else:
super().__setitem__(tag, value)
def __getitem__(self, tag):
if tag not in self._tags_v1: # unpack on the fly
data = self._tagdata[tag]
typ = self.tagtype[tag]
size, handler = self._load_dispatch[typ]
# We don't support v2
self._setitem(tag, handler(self, data, True), True)
val = self._tags_v1[tag]
# Don't try to convert as tuple, it is done in _setitem
return val
def update(self, *args, **kwds):
if args and isinstance(args[0], TiffImagePlugin.ImageFileDirectory_v2):
other = args[0]
# custom update
self._tags_v1.update(other._tags_v1)
self._tags_v2.update(other._tags_v2)
self._tagdata.update(other._tagdata)
self.tagtype.update(other.tagtype)
else:
super().update(*args, **kwds)
def _getexif(self): def _getexif(self):
@ -503,10 +549,9 @@ def _getexif(self):
fp = io.BytesIO(data[6:]) fp = io.BytesIO(data[6:])
head = fp.read(8) head = fp.read(8)
# process dictionary # process dictionary
info = TiffImagePlugin.ImageFileDirectory_v1(head) exif = ExifImageFileDirectory(head)
fp.seek(info.next) fp.seek(exif.next)
info.load(fp) exif.load(fp)
exif = dict(_fixup_dict(info))
# get exif extension # get exif extension
try: try:
# exif field 0x8769 is an offset pointer to the location # exif field 0x8769 is an offset pointer to the location
@ -516,9 +561,9 @@ def _getexif(self):
except (KeyError, TypeError): except (KeyError, TypeError):
pass pass
else: else:
info = TiffImagePlugin.ImageFileDirectory_v1(head) info = ExifImageFileDirectory(head)
info.load(fp) info.load(fp)
exif.update(_fixup_dict(info)) exif.update(info)
# get gpsinfo extension # get gpsinfo extension
try: try:
# exif field 0x8825 is an offset pointer to the location # exif field 0x8825 is an offset pointer to the location
@ -528,9 +573,9 @@ def _getexif(self):
except (KeyError, TypeError): except (KeyError, TypeError):
pass pass
else: else:
info = TiffImagePlugin.ImageFileDirectory_v1(head) info = ExifImageFileDirectory(head)
info.load(fp) info.load(fp)
exif[0x8825] = _fixup_dict(info) exif[0x8825] = info
# Cache the result for future use # Cache the result for future use
self.info["parsed_exif"] = exif self.info["parsed_exif"] = exif