- getproperties: improved exception handling

- _raise_defect: added option to set exception type
- all non-fatal issues are now recorded, and displayed when run as a script
This commit is contained in:
decalage 2013-05-27 07:27:04 +02:00 committed by Martin Panter
parent 8e826441b2
commit 7d37cf071f

View File

@ -6,7 +6,7 @@ OleFileIO_PL:
Microsoft Compound Document File Format), such as Microsoft Office Microsoft Compound Document File Format), such as Microsoft Office
documents, Image Composer and FlashPix files, Outlook messages, ... documents, Image Composer and FlashPix files, Outlook messages, ...
version 0.25 2013-05-24 Philippe Lagadec - http://www.decalage.info version 0.25 2013-05-27 Philippe Lagadec - http://www.decalage.info
Project website: http://www.decalage.info/python/olefileio Project website: http://www.decalage.info/python/olefileio
@ -24,7 +24,7 @@ WARNING: THIS IS (STILL) WORK IN PROGRESS.
""" """
__author__ = "Philippe Lagadec, Fredrik Lundh (Secret Labs AB)" __author__ = "Philippe Lagadec, Fredrik Lundh (Secret Labs AB)"
__date__ = "2013-05-24" __date__ = "2013-05-27"
__version__ = '0.25' __version__ = '0.25'
#--- LICENSE ------------------------------------------------------------------ #--- LICENSE ------------------------------------------------------------------
@ -123,12 +123,14 @@ __version__ = '0.25'
# - getproperties: filter out null chars from strings # - getproperties: filter out null chars from strings
# - getproperties: raise non-fatal defects instead of # - getproperties: raise non-fatal defects instead of
# exceptions when properties cannot be parsed properly # exceptions when properties cannot be parsed properly
# 2013-05-27 PL: - getproperties: improved exception handling
# - _raise_defect: added option to set exception type
# - all non-fatal issues are now recorded, and displayed
# when run as a script
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
# TODO (for version 1.0): # TODO (for version 1.0):
# + _raise_defect: store all defects that are not raised as exceptions, and
# display them in main for information.
# + add path attrib to _OleDirEntry, set it once and for all in init or # + add path attrib to _OleDirEntry, set it once and for all in init or
# append_kids (then listdir/_list can be simplified) # append_kids (then listdir/_list can be simplified)
# - TESTS with Linux, MacOSX, Python 1.5.2, various files, PIL, ... # - TESTS with Linux, MacOSX, Python 1.5.2, various files, PIL, ...
@ -151,9 +153,6 @@ __version__ = '0.25'
# http://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx # http://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx
# IDEAS: # IDEAS:
# - allow _raise_defect to raise different exceptions, not only IOError
# - provide a class with named attributes to get well-known properties of
# MS Office documents (title, author, ...) ?
# - in OleFileIO._open and _OleStream, use size=None instead of 0x7FFFFFFF for # - in OleFileIO._open and _OleStream, use size=None instead of 0x7FFFFFFF for
# streams with unknown size # streams with unknown size
# - use arrays of int instead of long integers for FAT/MiniFAT, to improve # - use arrays of int instead of long integers for FAT/MiniFAT, to improve
@ -979,12 +978,16 @@ class OleFileIO:
(use DEFECT_FATAL for a typical application, DEFECT_INCORRECT for a (use DEFECT_FATAL for a typical application, DEFECT_INCORRECT for a
security-oriented application, see source code for details) security-oriented application, see source code for details)
""" """
# minimal level for defects to be raised as exceptions:
self._raise_defects_level = raise_defects self._raise_defects_level = raise_defects
# list of defects/issues not raised as exceptions:
# tuples of (exception type, message)
self.parsing_issues = []
if filename: if filename:
self.open(filename) self.open(filename)
def _raise_defect(self, defect_level, message): def _raise_defect(self, defect_level, message, exception_type=IOError):
""" """
This method should be called for any defect found during file parsing. This method should be called for any defect found during file parsing.
It may raise an IOError exception according to the minimal level chosen It may raise an IOError exception according to the minimal level chosen
@ -996,10 +999,14 @@ class OleFileIO:
DEFECT_INCORRECT : an error according to specifications, but parsing can go on DEFECT_INCORRECT : an error according to specifications, but parsing can go on
DEFECT_FATAL : an error which cannot be ignored, parsing is impossible DEFECT_FATAL : an error which cannot be ignored, parsing is impossible
message: string describing the defect, used with raised exception. message: string describing the defect, used with raised exception.
exception_type: exception class to be raised, IOError by default
""" """
# added by [PL] # added by [PL]
if defect_level >= self._raise_defects_level: if defect_level >= self._raise_defects_level:
raise IOError, message raise exception_type, message
else:
# just record the issue, no exception raised:
self.parsing_issues.append((exception_type, message))
def open(self, filename): def open(self, filename):
@ -1689,6 +1696,11 @@ class OleFileIO:
# make sure no_conversion is a list, just to simplify code below: # make sure no_conversion is a list, just to simplify code below:
if no_conversion == None: if no_conversion == None:
no_conversion = [] no_conversion = []
# stream path as a string to report exceptions:
streampath = filename
if not isinstance(streampath, str):
streampath = '/'.join(streampath)
fp = self.openstream(filename) fp = self.openstream(filename)
data = {} data = {}
@ -1705,16 +1717,21 @@ class OleFileIO:
# get section # get section
s = "****" + fp.read(i32(fp.read(4))-4) s = "****" + fp.read(i32(fp.read(4))-4)
# number of properties:
num_props = i32(s, 4)
except: except:
# catch exception while parsing property header, and only raise # catch exception while parsing property header, and only raise
# a DEFECT_INCORRECT then return an empty dict, because this is not # a DEFECT_INCORRECT then return an empty dict, because this is not
# a fatal error when parsing the whole file # a fatal error when parsing the whole file
exctype, excvalue = sys.exc_info()[:2] exctype, excvalue = sys.exc_info()[:2]
self._raise_defect(DEFECT_INCORRECT, excvalue) msg = 'Error while parsing properties header in stream %s: %s' % (
repr(streampath), excvalue)
self._raise_defect(DEFECT_INCORRECT, msg, exctype)
return data return data
for i in range(i32(s, 4)): for i in range(num_props):
try: try:
id = 0 # just in case of an exception
id = i32(s, 8+i*8) id = i32(s, 8+i*8)
offset = i32(s, 12+i*8) offset = i32(s, 12+i*8)
type = i32(s, offset) type = i32(s, offset)
@ -1809,7 +1826,9 @@ class OleFileIO:
# catch exception while parsing each property, and only raise # catch exception while parsing each property, and only raise
# a DEFECT_INCORRECT, because parsing can go on # a DEFECT_INCORRECT, because parsing can go on
exctype, excvalue = sys.exc_info()[:2] exctype, excvalue = sys.exc_info()[:2]
self._raise_defect(DEFECT_INCORRECT, excvalue) msg = 'Error while parsing property id %d in stream %s: %s' % (
id, repr(streampath), excvalue)
self._raise_defect(DEFECT_INCORRECT, msg, exctype)
return data return data
@ -1911,5 +1930,13 @@ Options:
print "size :", ole.get_size('worddocument') print "size :", ole.get_size('worddocument')
if ole.exists('macros/vba'): if ole.exists('macros/vba'):
print "This document may contain VBA macros." print "This document may contain VBA macros."
# print parsing issues:
print '\nNon-fatal issues raised during parsing:'
if ole.parsing_issues:
for exctype, msg in ole.parsing_issues:
print '- %s: %s' % (exctype.__name__, msg)
else:
print 'None'
## except IOError, v: ## except IOError, v:
## print "***", "cannot read", file, "-", v ## print "***", "cannot read", file, "-", v