version 0.19 2009-12-10

This commit is contained in:
decalage 2011-10-20 05:34:48 +02:00 committed by Martin Panter
parent 70a99619bc
commit 61a3ceb812

View File

@ -6,9 +6,9 @@ OleFileIO_PL:
Microsoft Compound Document File Format), such as Microsoft Office Microsoft Compound Document File Format), such as Microsoft Office
documents, Image Composer and FlashPix files, Outlook messages, ... documents, Image Composer and FlashPix files, Outlook messages, ...
version 0.18 2007-12-05 Philippe Lagadec - http://lagasoft.free.fr version 0.19 2009-12-10 Philippe Lagadec - http://www.decalage.info
Project website: http://lagasoft.free.fr/python/olefileio Project website: http://www.decalage.info/python/olefileio
Improved version of the OleFileIO module from PIL library v1.1.6 Improved version of the OleFileIO module from PIL library v1.1.6
See: http://www.pythonware.com/products/pil/index.htm See: http://www.pythonware.com/products/pil/index.htm
@ -16,7 +16,7 @@ See: http://www.pythonware.com/products/pil/index.htm
The Python Imaging Library (PIL) is The Python Imaging Library (PIL) is
Copyright (c) 1997-2005 by Secret Labs AB Copyright (c) 1997-2005 by Secret Labs AB
Copyright (c) 1995-2005 by Fredrik Lundh Copyright (c) 1995-2005 by Fredrik Lundh
OleFileIO_PL changes are Copyright (c) 2005-2007 by Philippe Lagadec OleFileIO_PL changes are Copyright (c) 2005-2009 by Philippe Lagadec
See source code and LICENSE.txt for information on usage and redistribution. See source code and LICENSE.txt for information on usage and redistribution.
@ -24,15 +24,15 @@ WARNING: THIS IS (STILL) WORK IN PROGRESS.
""" """
__author__ = "Fredrik Lundh (Secret Labs AB), Philippe Lagadec" __author__ = "Fredrik Lundh (Secret Labs AB), Philippe Lagadec"
__date__ = "2007-12-08" __date__ = "2009-12-10"
__version__ = '0.18' __version__ = '0.19'
#--- LICENSE ------------------------------------------------------------------ #--- LICENSE ------------------------------------------------------------------
# OleFileIO_PL is an improved version of the OleFileIO module from the # OleFileIO_PL is an improved version of the OleFileIO module from the
# Python Imaging Library (PIL). # Python Imaging Library (PIL).
# OleFileIO_PL changes are Copyright (c) 2005-2007 by Philippe Lagadec # OleFileIO_PL changes are Copyright (c) 2005-2009 by Philippe Lagadec
# #
# The Python Imaging Library (PIL) is # The Python Imaging Library (PIL) is
# Copyright (c) 1997-2005 by Secret Labs AB # Copyright (c) 1997-2005 by Secret Labs AB
@ -102,6 +102,8 @@ __version__ = '0.18'
# 2007-12-05 v0.18 PL: - fixed several bugs in handling of FAT, MiniFAT and # 2007-12-05 v0.18 PL: - fixed several bugs in handling of FAT, MiniFAT and
# streams # streams
# - added option '-c' in main to check all streams # - added option '-c' in main to check all streams
# 2009-12-10 v0.19 PL: - bugfix for 32 bit arrays on 64 bits platforms
# (thanks to Ben G. and Martijn for reporting the bug)
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
# TODO (for version 1.0): # TODO (for version 1.0):
@ -193,6 +195,16 @@ import string, StringIO, struct, array, os.path
#[PL] Define explicitly the public API to avoid private objects in pydoc: #[PL] Define explicitly the public API to avoid private objects in pydoc:
__all__ = ['OleFileIO', 'isOleFile'] __all__ = ['OleFileIO', 'isOleFile']
#[PL] workaround to fix an issue with array item size on 64 bits systems:
if array.array('L').itemsize == 4:
# on 32 bits platforms, long integers in an array are 32 bits:
UINT32 = 'L'
elif array.array('I').itemsize == 4:
# on 64 bits platforms, integers in an array are 32 bits:
UINT32 = 'I'
else:
raise ValueError, 'Need to fix a bug with 32 bit arrays, please contact author...'
#[PL] These workarounds were inspired from the Path module #[PL] These workarounds were inspired from the Path module
# (see http://www.jorendorff.com/articles/python/path/) # (see http://www.jorendorff.com/articles/python/path/)
@ -1044,7 +1056,7 @@ class OleFileIO:
if not DEBUG_MODE: if not DEBUG_MODE:
return return
VPL=8 # number of values per line (8+1 * 8+1 = 81) VPL=8 # number of values per line (8+1 * 8+1 = 81)
tab = array.array('L', sector) tab = array.array(UINT32, sector)
nbsect = len(tab) nbsect = len(tab)
nlines = (nbsect+VPL-1)/VPL nlines = (nbsect+VPL-1)/VPL
print "index", print "index",
@ -1076,7 +1088,7 @@ class OleFileIO:
fat1 = sect fat1 = sect
else: else:
# if it's a raw sector, it is parsed in an array # if it's a raw sector, it is parsed in an array
fat1 = array.array('L', sect) fat1 = array.array(UINT32, sect)
self.dumpsect(sect) self.dumpsect(sect)
# The FAT is a sector chain starting at the first index of itself. # The FAT is a sector chain starting at the first index of itself.
for isect in fat1: for isect in fat1:
@ -1088,7 +1100,7 @@ class OleFileIO:
s = self.getsect(isect) s = self.getsect(isect)
# parse it as an array of 32 bits integers, and add it to the # parse it as an array of 32 bits integers, and add it to the
# global FAT array # global FAT array
self.fat = self.fat + array.array('L', s) self.fat = self.fat + array.array(UINT32, s)
return isect return isect
@ -1106,7 +1118,7 @@ class OleFileIO:
# [PL] FAT is an array of 32 bits unsigned ints, it's more effective # [PL] FAT is an array of 32 bits unsigned ints, it's more effective
# to use an array than a list in Python. # to use an array than a list in Python.
# It's initialized as empty first: # It's initialized as empty first:
self.fat = array.array('L') self.fat = array.array(UINT32)
self.loadfat_sect(sect) self.loadfat_sect(sect)
#self.dumpfat(self.fat) #self.dumpfat(self.fat)
## for i in range(0, len(sect), 4): ## for i in range(0, len(sect), 4):
@ -1116,7 +1128,7 @@ class OleFileIO:
## break ## break
## s = self.getsect(ix) ## s = self.getsect(ix)
## #fat = fat + map(lambda i, s=s: i32(s, i), range(0, len(s), 4)) ## #fat = fat + map(lambda i, s=s: i32(s, i), range(0, len(s), 4))
## fat = fat + array.array('L', s) ## fat = fat + array.array(UINT32, s)
if self.csectDif != 0: if self.csectDif != 0:
# [PL] There's a DIFAT because file is larger than 6.8MB # [PL] There's a DIFAT because file is larger than 6.8MB
# some checks just in case: # some checks just in case:
@ -1139,7 +1151,7 @@ class OleFileIO:
debug( "DIFAT block %d, sector %X" % (i, isect_difat) ) debug( "DIFAT block %d, sector %X" % (i, isect_difat) )
#TODO: check if corresponding FAT SID = DIFSECT #TODO: check if corresponding FAT SID = DIFSECT
sector_difat = self.getsect(isect_difat) sector_difat = self.getsect(isect_difat)
difat = array.array('L', sector_difat) difat = array.array(UINT32, sector_difat)
self.dumpsect(sector_difat) self.dumpsect(sector_difat)
self.loadfat_sect(difat[:127]) self.loadfat_sect(difat[:127])
# last DIFAT pointer is next DIFAT sector: # last DIFAT pointer is next DIFAT sector:
@ -1189,7 +1201,7 @@ class OleFileIO:
s = self._open(self.minifatsect, stream_size, force_FAT=True).read() s = self._open(self.minifatsect, stream_size, force_FAT=True).read()
#[PL] Old code replaced by an array: #[PL] Old code replaced by an array:
#self.minifat = map(lambda i, s=s: i32(s, i), range(0, len(s), 4)) #self.minifat = map(lambda i, s=s: i32(s, i), range(0, len(s), 4))
self.minifat = array.array('L', s) self.minifat = array.array(UINT32, s)
# Then shrink the array to used size, to avoid indexes out of MiniStream: # Then shrink the array to used size, to avoid indexes out of MiniStream:
debug('MiniFAT shrunk from %d to %d sectors' % (len(self.minifat), nb_minisectors)) debug('MiniFAT shrunk from %d to %d sectors' % (len(self.minifat), nb_minisectors))
self.minifat = self.minifat[:nb_minisectors] self.minifat = self.minifat[:nb_minisectors]
@ -1544,11 +1556,11 @@ if __name__ == "__main__":
print """ print """
Launched from command line, this script parses OLE files and prints info. Launched from command line, this script parses OLE files and prints info.
Usage: OleFileIO_PL.py [-d] [-s] <file> [file2 ...] Usage: OleFileIO_PL.py [-d] [-c] <file> [file2 ...]
Options: Options:
-d : debug mode (display a lot of debug information, for developers only) -d : debug mode (display a lot of debug information, for developers only)
-s : check all streams (for debugging purposes) -c : check all streams (for debugging purposes)
""" """
sys.exit() sys.exit()