mirror of
https://github.com/python-pillow/Pillow.git
synced 2025-03-03 11:35:52 +03:00
2013-05-24 v0.25 PL: - getproperties: option to not convert some timestamps
- OleMetaData: total_edit_time is now a number of seconds, not a timestamp - getproperties: added support for VT_BOOL, VT_INT, V_UINT - getproperties: filter out null chars from strings - getproperties: raise non-fatal defects instead of exceptions when properties cannot be parsed properly
This commit is contained in:
parent
90f0b6796e
commit
8e826441b2
200
PIL/OleFileIO.py
200
PIL/OleFileIO.py
|
@ -6,7 +6,7 @@ OleFileIO_PL:
|
||||||
Microsoft Compound Document File Format), such as Microsoft Office
|
Microsoft Compound Document File Format), such as Microsoft Office
|
||||||
documents, Image Composer and FlashPix files, Outlook messages, ...
|
documents, Image Composer and FlashPix files, Outlook messages, ...
|
||||||
|
|
||||||
version 0.24 2013-05-07 Philippe Lagadec - http://www.decalage.info
|
version 0.25 2013-05-24 Philippe Lagadec - http://www.decalage.info
|
||||||
|
|
||||||
Project website: http://www.decalage.info/python/olefileio
|
Project website: http://www.decalage.info/python/olefileio
|
||||||
|
|
||||||
|
@ -24,8 +24,8 @@ WARNING: THIS IS (STILL) WORK IN PROGRESS.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__author__ = "Philippe Lagadec, Fredrik Lundh (Secret Labs AB)"
|
__author__ = "Philippe Lagadec, Fredrik Lundh (Secret Labs AB)"
|
||||||
__date__ = "2013-05-07"
|
__date__ = "2013-05-24"
|
||||||
__version__ = '0.24'
|
__version__ = '0.25'
|
||||||
|
|
||||||
#--- LICENSE ------------------------------------------------------------------
|
#--- LICENSE ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -116,10 +116,19 @@ __version__ = '0.24'
|
||||||
# - new class OleMetadata to parse standard properties
|
# - new class OleMetadata to parse standard properties
|
||||||
# - added get_metadata method
|
# - added get_metadata method
|
||||||
# 2013-05-07 v0.24 PL: - a few improvements in OleMetadata
|
# 2013-05-07 v0.24 PL: - a few improvements in OleMetadata
|
||||||
|
# 2013-05-24 v0.25 PL: - getproperties: option to not convert some timestamps
|
||||||
|
# - OleMetaData: total_edit_time is now a number of seconds,
|
||||||
|
# not a timestamp
|
||||||
|
# - getproperties: added support for VT_BOOL, VT_INT, V_UINT
|
||||||
|
# - getproperties: filter out null chars from strings
|
||||||
|
# - getproperties: raise non-fatal defects instead of
|
||||||
|
# exceptions when properties cannot be parsed properly
|
||||||
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
#-----------------------------------------------------------------------------
|
||||||
# TODO (for version 1.0):
|
# TODO (for version 1.0):
|
||||||
|
# + _raise_defect: store all defects that are not raised as exceptions, and
|
||||||
|
# display them in main for information.
|
||||||
# + add path attrib to _OleDirEntry, set it once and for all in init or
|
# + add path attrib to _OleDirEntry, set it once and for all in init or
|
||||||
# append_kids (then listdir/_list can be simplified)
|
# append_kids (then listdir/_list can be simplified)
|
||||||
# - TESTS with Linux, MacOSX, Python 1.5.2, various files, PIL, ...
|
# - TESTS with Linux, MacOSX, Python 1.5.2, various files, PIL, ...
|
||||||
|
@ -138,6 +147,8 @@ __version__ = '0.24'
|
||||||
# - improve docstrings to show more sample uses
|
# - improve docstrings to show more sample uses
|
||||||
# - see also original notes and FIXME below
|
# - see also original notes and FIXME below
|
||||||
# - remove all obsolete FIXMEs
|
# - remove all obsolete FIXMEs
|
||||||
|
# - OleMetadata: fix version attrib according to
|
||||||
|
# http://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx
|
||||||
|
|
||||||
# IDEAS:
|
# IDEAS:
|
||||||
# - allow _raise_defect to raise different exceptions, not only IOError
|
# - allow _raise_defect to raise different exceptions, not only IOError
|
||||||
|
@ -548,8 +559,10 @@ class OleMetadata:
|
||||||
setattr(self, attrib, None)
|
setattr(self, attrib, None)
|
||||||
if olefile.exists("\x05SummaryInformation"):
|
if olefile.exists("\x05SummaryInformation"):
|
||||||
# get properties from the stream:
|
# get properties from the stream:
|
||||||
|
# (converting timestamps to python datetime, except total_edit_time,
|
||||||
|
# which is property #10)
|
||||||
props = olefile.getproperties("\x05SummaryInformation",
|
props = olefile.getproperties("\x05SummaryInformation",
|
||||||
convert_time=True)
|
convert_time=True, no_conversion=[10])
|
||||||
# store them into this object's attributes:
|
# store them into this object's attributes:
|
||||||
for i in range(len(self.SUMMARY_ATTRIBS)):
|
for i in range(len(self.SUMMARY_ATTRIBS)):
|
||||||
# ids for standards properties start at 0x01, until 0x13
|
# ids for standards properties start at 0x01, until 0x13
|
||||||
|
@ -572,11 +585,11 @@ class OleMetadata:
|
||||||
print 'Properties from SummaryInformation stream:'
|
print 'Properties from SummaryInformation stream:'
|
||||||
for prop in self.SUMMARY_ATTRIBS:
|
for prop in self.SUMMARY_ATTRIBS:
|
||||||
value = getattr(self, prop)
|
value = getattr(self, prop)
|
||||||
print '- %s: %s' % (prop, value)
|
print '- %s: %s' % (prop, repr(value))
|
||||||
print 'Properties from DocumentSummaryInformation stream:'
|
print 'Properties from DocumentSummaryInformation stream:'
|
||||||
for prop in self.DOCSUM_ATTRIBS:
|
for prop in self.DOCSUM_ATTRIBS:
|
||||||
value = getattr(self, prop)
|
value = getattr(self, prop)
|
||||||
print '- %s: %s' % (prop, value)
|
print '- %s: %s' % (prop, repr(value))
|
||||||
|
|
||||||
|
|
||||||
#--- _OleStream ---------------------------------------------------------------
|
#--- _OleStream ---------------------------------------------------------------
|
||||||
|
@ -1663,89 +1676,140 @@ class OleFileIO:
|
||||||
return self.root.name
|
return self.root.name
|
||||||
|
|
||||||
|
|
||||||
def getproperties(self, filename, convert_time=False):
|
def getproperties(self, filename, convert_time=False, no_conversion=None):
|
||||||
"""
|
"""
|
||||||
Return properties described in substream.
|
Return properties described in substream.
|
||||||
|
|
||||||
filename: path of stream in storage tree (see openstream for syntax)
|
filename: path of stream in storage tree (see openstream for syntax)
|
||||||
convert_time: bool, if True timestamps will be converted to Python datetime
|
convert_time: bool, if True timestamps will be converted to Python datetime
|
||||||
|
no_conversion: None or list of int, timestamps not to be converted
|
||||||
|
(for example total editing time is not a real timestamp)
|
||||||
return: a dictionary of values indexed by id (integer)
|
return: a dictionary of values indexed by id (integer)
|
||||||
"""
|
"""
|
||||||
|
# make sure no_conversion is a list, just to simplify code below:
|
||||||
|
if no_conversion == None:
|
||||||
|
no_conversion = []
|
||||||
fp = self.openstream(filename)
|
fp = self.openstream(filename)
|
||||||
|
|
||||||
data = {}
|
data = {}
|
||||||
|
|
||||||
# header
|
try:
|
||||||
s = fp.read(28)
|
# header
|
||||||
clsid = _clsid(s[8:24])
|
s = fp.read(28)
|
||||||
|
clsid = _clsid(s[8:24])
|
||||||
|
|
||||||
# format id
|
# format id
|
||||||
s = fp.read(20)
|
s = fp.read(20)
|
||||||
fmtid = _clsid(s[:16])
|
fmtid = _clsid(s[:16])
|
||||||
fp.seek(i32(s, 16))
|
fp.seek(i32(s, 16))
|
||||||
|
|
||||||
# get section
|
# get section
|
||||||
s = "****" + fp.read(i32(fp.read(4))-4)
|
s = "****" + fp.read(i32(fp.read(4))-4)
|
||||||
|
except:
|
||||||
|
# catch exception while parsing property header, and only raise
|
||||||
|
# a DEFECT_INCORRECT then return an empty dict, because this is not
|
||||||
|
# a fatal error when parsing the whole file
|
||||||
|
exctype, excvalue = sys.exc_info()[:2]
|
||||||
|
self._raise_defect(DEFECT_INCORRECT, excvalue)
|
||||||
|
return data
|
||||||
|
|
||||||
for i in range(i32(s, 4)):
|
for i in range(i32(s, 4)):
|
||||||
|
try:
|
||||||
|
id = i32(s, 8+i*8)
|
||||||
|
offset = i32(s, 12+i*8)
|
||||||
|
type = i32(s, offset)
|
||||||
|
|
||||||
id = i32(s, 8+i*8)
|
debug ('property id=%d: type=%d offset=%X' % (id, type, offset))
|
||||||
offset = i32(s, 12+i*8)
|
|
||||||
type = i32(s, offset)
|
|
||||||
|
|
||||||
debug ('property id=%d: type=%d offset=%X' % (id, type, offset))
|
# test for common types first (should perhaps use
|
||||||
|
# a dictionary instead?)
|
||||||
|
|
||||||
# test for common types first (should perhaps use
|
if type == VT_I2: # 16-bit signed integer
|
||||||
# a dictionary instead?)
|
value = i16(s, offset+4)
|
||||||
|
if value >= 32768:
|
||||||
if type == VT_I2:
|
value = value - 65536
|
||||||
value = i16(s, offset+4)
|
elif type == VT_UI2: # 2-byte unsigned integer
|
||||||
if value >= 32768:
|
value = i16(s, offset+4)
|
||||||
value = value - 65536
|
elif type in (VT_I4, VT_INT, VT_ERROR):
|
||||||
elif type == VT_UI2:
|
# VT_I4: 32-bit signed integer
|
||||||
value = i16(s, offset+4)
|
# VT_ERROR: HRESULT, similar to 32-bit signed integer,
|
||||||
elif type in (VT_I4, VT_ERROR):
|
# see http://msdn.microsoft.com/en-us/library/cc230330.aspx
|
||||||
value = i32(s, offset+4)
|
value = i32(s, offset+4)
|
||||||
elif type == VT_UI4:
|
elif type in (VT_UI4, VT_UINT): # 4-byte unsigned integer
|
||||||
value = i32(s, offset+4) # FIXME
|
value = i32(s, offset+4) # FIXME
|
||||||
elif type in (VT_BSTR, VT_LPSTR):
|
elif type in (VT_BSTR, VT_LPSTR):
|
||||||
count = i32(s, offset+4)
|
# CodePageString, see http://msdn.microsoft.com/en-us/library/dd942354.aspx
|
||||||
value = s[offset+8:offset+8+count-1]
|
# size is a 32 bits integer, including the null terminator, and
|
||||||
elif type == VT_BLOB:
|
# possibly trailing or embedded null chars
|
||||||
count = i32(s, offset+4)
|
#TODO: if codepage is unicode, the string should be converted as such
|
||||||
value = s[offset+8:offset+8+count]
|
count = i32(s, offset+4)
|
||||||
elif type == VT_LPWSTR:
|
value = s[offset+8:offset+8+count-1]
|
||||||
count = i32(s, offset+4)
|
# remove all null chars:
|
||||||
value = _unicode(s[offset+8:offset+8+count*2])
|
value = value.replace('\x00', '')
|
||||||
elif type == VT_FILETIME:
|
elif type == VT_BLOB:
|
||||||
value = long(i32(s, offset+4)) + (long(i32(s, offset+8))<<32)
|
# binary large object (BLOB)
|
||||||
# FILETIME is a 64-bit int: "number of 100ns periods
|
# see http://msdn.microsoft.com/en-us/library/dd942282.aspx
|
||||||
# since Jan 1,1601".
|
count = i32(s, offset+4)
|
||||||
if convert_time:
|
value = s[offset+8:offset+8+count]
|
||||||
# convert FILETIME to Python datetime.datetime
|
elif type == VT_LPWSTR:
|
||||||
# inspired from http://code.activestate.com/recipes/511425-filetime-to-datetime/
|
# UnicodeString
|
||||||
_FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0)
|
# see http://msdn.microsoft.com/en-us/library/dd942313.aspx
|
||||||
value = _FILETIME_null_date + datetime.timedelta(microseconds=value/10)
|
# "the string should NOT contain embedded or additional trailing
|
||||||
|
# null characters."
|
||||||
|
count = i32(s, offset+4)
|
||||||
|
value = _unicode(s[offset+8:offset+8+count*2])
|
||||||
|
elif type == VT_FILETIME:
|
||||||
|
value = long(i32(s, offset+4)) + (long(i32(s, offset+8))<<32)
|
||||||
|
# FILETIME is a 64-bit int: "number of 100ns periods
|
||||||
|
# since Jan 1,1601".
|
||||||
|
if convert_time and id not in no_conversion:
|
||||||
|
debug('Converting property #%d to python datetime, value=%d=%fs'
|
||||||
|
%(id, value, float(value)/10000000L))
|
||||||
|
# convert FILETIME to Python datetime.datetime
|
||||||
|
# inspired from http://code.activestate.com/recipes/511425-filetime-to-datetime/
|
||||||
|
_FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0)
|
||||||
|
debug('timedelta days=%d' % (value/(10*1000000*3600*24)))
|
||||||
|
value = _FILETIME_null_date + datetime.timedelta(microseconds=value/10)
|
||||||
|
else:
|
||||||
|
# legacy code kept for backward compatibility: returns a
|
||||||
|
# number of seconds since Jan 1,1601
|
||||||
|
value = value / 10000000L # seconds
|
||||||
|
elif type == VT_UI1: # 1-byte unsigned integer
|
||||||
|
value = ord(s[offset+4])
|
||||||
|
elif type == VT_CLSID:
|
||||||
|
value = _clsid(s[offset+4:offset+20])
|
||||||
|
elif type == VT_CF:
|
||||||
|
# PropertyIdentifier or ClipboardData??
|
||||||
|
# see http://msdn.microsoft.com/en-us/library/dd941945.aspx
|
||||||
|
count = i32(s, offset+4)
|
||||||
|
value = s[offset+8:offset+8+count]
|
||||||
|
elif type == VT_BOOL:
|
||||||
|
# VARIANT_BOOL, 16 bits bool, 0x0000=Fals, 0xFFFF=True
|
||||||
|
# see http://msdn.microsoft.com/en-us/library/cc237864.aspx
|
||||||
|
value = bool(i16(s, offset+4))
|
||||||
else:
|
else:
|
||||||
# legacy code kept for backward compatibility: returns a
|
value = None # everything else yields "None"
|
||||||
# number of seconds since Jan 1,1601
|
debug ('property id=%d: type=%d not implemented in parser yet' % (id, type))
|
||||||
value = value / 10000000L # seconds
|
|
||||||
elif type == VT_UI1:
|
|
||||||
value = ord(s[offset+4])
|
|
||||||
elif type == VT_CLSID:
|
|
||||||
value = _clsid(s[offset+4:offset+20])
|
|
||||||
elif type == VT_CF:
|
|
||||||
count = i32(s, offset+4)
|
|
||||||
value = s[offset+8:offset+8+count]
|
|
||||||
else:
|
|
||||||
value = None # everything else yields "None"
|
|
||||||
|
|
||||||
# FIXME: add support for VT_VECTOR
|
# missing: VT_EMPTY, VT_NULL, VT_R4, VT_R8, VT_CY, VT_DATE,
|
||||||
|
# VT_DECIMAL, VT_I1, VT_I8, VT_UI8,
|
||||||
|
# see http://msdn.microsoft.com/en-us/library/dd942033.aspx
|
||||||
|
|
||||||
#print "%08x" % id, repr(value),
|
# FIXME: add support for VT_VECTOR
|
||||||
#print "(%s)" % VT[i32(s, offset) & 0xFFF]
|
# VT_VECTOR is a 32 uint giving the number of items, followed by
|
||||||
|
# the items in sequence. The VT_VECTOR value is combined with the
|
||||||
|
# type of items, e.g. VT_VECTOR|VT_BSTR
|
||||||
|
# see http://msdn.microsoft.com/en-us/library/dd942011.aspx
|
||||||
|
|
||||||
data[id] = value
|
#print "%08x" % id, repr(value),
|
||||||
|
#print "(%s)" % VT[i32(s, offset) & 0xFFF]
|
||||||
|
|
||||||
|
data[id] = value
|
||||||
|
except:
|
||||||
|
# catch exception while parsing each property, and only raise
|
||||||
|
# a DEFECT_INCORRECT, because parsing can go on
|
||||||
|
exctype, excvalue = sys.exc_info()[:2]
|
||||||
|
self._raise_defect(DEFECT_INCORRECT, excvalue)
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
@ -1795,7 +1859,7 @@ Options:
|
||||||
check_streams = True
|
check_streams = True
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ole = OleFileIO(filename, raise_defects=DEFECT_INCORRECT)
|
ole = OleFileIO(filename)#, raise_defects=DEFECT_INCORRECT)
|
||||||
print "-" * 68
|
print "-" * 68
|
||||||
print filename
|
print filename
|
||||||
print "-" * 68
|
print "-" * 68
|
||||||
|
|
Loading…
Reference in New Issue
Block a user