version 0.19 2009-12-10

This commit is contained in:
decalage 2011-10-20 05:34:48 +02:00 committed by Martin Panter
parent 70a99619bc
commit 61a3ceb812

View File

@ -6,9 +6,9 @@ OleFileIO_PL:
Microsoft Compound Document File Format), such as Microsoft Office Microsoft Compound Document File Format), such as Microsoft Office
documents, Image Composer and FlashPix files, Outlook messages, ... documents, Image Composer and FlashPix files, Outlook messages, ...
version 0.18 2007-12-05 Philippe Lagadec - http://lagasoft.free.fr version 0.19 2009-12-10 Philippe Lagadec - http://www.decalage.info
Project website: http://lagasoft.free.fr/python/olefileio Project website: http://www.decalage.info/python/olefileio
Improved version of the OleFileIO module from PIL library v1.1.6 Improved version of the OleFileIO module from PIL library v1.1.6
See: http://www.pythonware.com/products/pil/index.htm See: http://www.pythonware.com/products/pil/index.htm
@ -16,7 +16,7 @@ See: http://www.pythonware.com/products/pil/index.htm
The Python Imaging Library (PIL) is The Python Imaging Library (PIL) is
Copyright (c) 1997-2005 by Secret Labs AB Copyright (c) 1997-2005 by Secret Labs AB
Copyright (c) 1995-2005 by Fredrik Lundh Copyright (c) 1995-2005 by Fredrik Lundh
OleFileIO_PL changes are Copyright (c) 2005-2007 by Philippe Lagadec OleFileIO_PL changes are Copyright (c) 2005-2009 by Philippe Lagadec
See source code and LICENSE.txt for information on usage and redistribution. See source code and LICENSE.txt for information on usage and redistribution.
@ -24,15 +24,15 @@ WARNING: THIS IS (STILL) WORK IN PROGRESS.
""" """
__author__ = "Fredrik Lundh (Secret Labs AB), Philippe Lagadec" __author__ = "Fredrik Lundh (Secret Labs AB), Philippe Lagadec"
__date__ = "2007-12-08" __date__ = "2009-12-10"
__version__ = '0.18' __version__ = '0.19'
#--- LICENSE ------------------------------------------------------------------ #--- LICENSE ------------------------------------------------------------------
# OleFileIO_PL is an improved version of the OleFileIO module from the # OleFileIO_PL is an improved version of the OleFileIO module from the
# Python Imaging Library (PIL). # Python Imaging Library (PIL).
# OleFileIO_PL changes are Copyright (c) 2005-2007 by Philippe Lagadec # OleFileIO_PL changes are Copyright (c) 2005-2009 by Philippe Lagadec
# #
# The Python Imaging Library (PIL) is # The Python Imaging Library (PIL) is
# Copyright (c) 1997-2005 by Secret Labs AB # Copyright (c) 1997-2005 by Secret Labs AB
@ -102,6 +102,8 @@ __version__ = '0.18'
# 2007-12-05 v0.18 PL: - fixed several bugs in handling of FAT, MiniFAT and # 2007-12-05 v0.18 PL: - fixed several bugs in handling of FAT, MiniFAT and
# streams # streams
# - added option '-c' in main to check all streams # - added option '-c' in main to check all streams
# 2009-12-10 v0.19 PL: - bugfix for 32 bit arrays on 64 bits platforms
# (thanks to Ben G. and Martijn for reporting the bug)
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
# TODO (for version 1.0): # TODO (for version 1.0):
@ -193,6 +195,16 @@ import string, StringIO, struct, array, os.path
#[PL] Define explicitly the public API to avoid private objects in pydoc: #[PL] Define explicitly the public API to avoid private objects in pydoc:
__all__ = ['OleFileIO', 'isOleFile'] __all__ = ['OleFileIO', 'isOleFile']
#[PL] workaround to fix an issue with array item size on 64 bits systems:
if array.array('L').itemsize == 4:
# on 32 bits platforms, long integers in an array are 32 bits:
UINT32 = 'L'
elif array.array('I').itemsize == 4:
# on 64 bits platforms, integers in an array are 32 bits:
UINT32 = 'I'
else:
raise ValueError, 'Need to fix a bug with 32 bit arrays, please contact author...'
#[PL] These workarounds were inspired from the Path module #[PL] These workarounds were inspired from the Path module
# (see http://www.jorendorff.com/articles/python/path/) # (see http://www.jorendorff.com/articles/python/path/)
@ -362,11 +374,11 @@ def _clsid(clsid):
try: try:
# is Unicode supported ? # is Unicode supported ?
unicode unicode
def _unicode(s, errors='replace'): def _unicode(s, errors='replace'):
""" """
Map unicode string to Latin 1. (Python with Unicode support) Map unicode string to Latin 1. (Python with Unicode support)
s: UTF-16LE unicode string to convert to Latin-1 s: UTF-16LE unicode string to convert to Latin-1
errors: 'replace', 'ignore' or 'strict'. See Python doc for unicode() errors: 'replace', 'ignore' or 'strict'. See Python doc for unicode()
""" """
@ -381,7 +393,7 @@ try:
else: else:
# Second the unicode string is converted to Latin-1 # Second the unicode string is converted to Latin-1
return u.encode('latin_1', errors) return u.encode('latin_1', errors)
except: except:
# there was an error during Unicode to Latin-1 conversion: # there was an error during Unicode to Latin-1 conversion:
raise IOError, 'incorrect Unicode name' raise IOError, 'incorrect Unicode name'
@ -575,7 +587,7 @@ class _OleDirectoryEntry:
""" """
Constructor for an _OleDirectoryEntry object. Constructor for an _OleDirectoryEntry object.
Parses a 128-bytes entry from the OLE Directory stream. Parses a 128-bytes entry from the OLE Directory stream.
entry : string (must be 128 bytes long) entry : string (must be 128 bytes long)
sid : index of this directory entry in the OLE file directory sid : index of this directory entry in the OLE file directory
olefile: OleFileIO containing this directory entry olefile: OleFileIO containing this directory entry
@ -662,7 +674,7 @@ class _OleDirectoryEntry:
else: else:
minifat = False minifat = False
olefile._check_duplicate_stream(self.isectStart, minifat) olefile._check_duplicate_stream(self.isectStart, minifat)
def build_storage_tree(self): def build_storage_tree(self):
@ -789,7 +801,7 @@ class OleFileIO:
def __init__(self, filename = None, raise_defects=DEFECT_FATAL): def __init__(self, filename = None, raise_defects=DEFECT_FATAL):
""" """
Constructor for OleFileIO class. Constructor for OleFileIO class.
filename: file to open. filename: file to open.
raise_defects: minimal level for defects to be raised as exceptions. raise_defects: minimal level for defects to be raised as exceptions.
(use DEFECT_FATAL for a typical application, DEFECT_INCORRECT for a (use DEFECT_FATAL for a typical application, DEFECT_INCORRECT for a
@ -816,7 +828,7 @@ class OleFileIO:
# added by [PL] # added by [PL]
if defect_level >= self._raise_defects_level: if defect_level >= self._raise_defects_level:
raise IOError, message raise IOError, message
def open(self, filename): def open(self, filename):
""" """
@ -1044,7 +1056,7 @@ class OleFileIO:
if not DEBUG_MODE: if not DEBUG_MODE:
return return
VPL=8 # number of values per line (8+1 * 8+1 = 81) VPL=8 # number of values per line (8+1 * 8+1 = 81)
tab = array.array('L', sector) tab = array.array(UINT32, sector)
nbsect = len(tab) nbsect = len(tab)
nlines = (nbsect+VPL-1)/VPL nlines = (nbsect+VPL-1)/VPL
print "index", print "index",
@ -1076,7 +1088,7 @@ class OleFileIO:
fat1 = sect fat1 = sect
else: else:
# if it's a raw sector, it is parsed in an array # if it's a raw sector, it is parsed in an array
fat1 = array.array('L', sect) fat1 = array.array(UINT32, sect)
self.dumpsect(sect) self.dumpsect(sect)
# The FAT is a sector chain starting at the first index of itself. # The FAT is a sector chain starting at the first index of itself.
for isect in fat1: for isect in fat1:
@ -1088,7 +1100,7 @@ class OleFileIO:
s = self.getsect(isect) s = self.getsect(isect)
# parse it as an array of 32 bits integers, and add it to the # parse it as an array of 32 bits integers, and add it to the
# global FAT array # global FAT array
self.fat = self.fat + array.array('L', s) self.fat = self.fat + array.array(UINT32, s)
return isect return isect
@ -1098,7 +1110,7 @@ class OleFileIO:
""" """
# The header contains a sector numbers # The header contains a sector numbers
# for the first 109 FAT sectors. Additional sectors are # for the first 109 FAT sectors. Additional sectors are
# described by DIF blocks # described by DIF blocks
sect = header[76:512] sect = header[76:512]
debug( "len(sect)=%d, so %d integers" % (len(sect), len(sect)/4) ) debug( "len(sect)=%d, so %d integers" % (len(sect), len(sect)/4) )
@ -1106,7 +1118,7 @@ class OleFileIO:
# [PL] FAT is an array of 32 bits unsigned ints, it's more effective # [PL] FAT is an array of 32 bits unsigned ints, it's more effective
# to use an array than a list in Python. # to use an array than a list in Python.
# It's initialized as empty first: # It's initialized as empty first:
self.fat = array.array('L') self.fat = array.array(UINT32)
self.loadfat_sect(sect) self.loadfat_sect(sect)
#self.dumpfat(self.fat) #self.dumpfat(self.fat)
## for i in range(0, len(sect), 4): ## for i in range(0, len(sect), 4):
@ -1116,7 +1128,7 @@ class OleFileIO:
## break ## break
## s = self.getsect(ix) ## s = self.getsect(ix)
## #fat = fat + map(lambda i, s=s: i32(s, i), range(0, len(s), 4)) ## #fat = fat + map(lambda i, s=s: i32(s, i), range(0, len(s), 4))
## fat = fat + array.array('L', s) ## fat = fat + array.array(UINT32, s)
if self.csectDif != 0: if self.csectDif != 0:
# [PL] There's a DIFAT because file is larger than 6.8MB # [PL] There's a DIFAT because file is larger than 6.8MB
# some checks just in case: # some checks just in case:
@ -1139,7 +1151,7 @@ class OleFileIO:
debug( "DIFAT block %d, sector %X" % (i, isect_difat) ) debug( "DIFAT block %d, sector %X" % (i, isect_difat) )
#TODO: check if corresponding FAT SID = DIFSECT #TODO: check if corresponding FAT SID = DIFSECT
sector_difat = self.getsect(isect_difat) sector_difat = self.getsect(isect_difat)
difat = array.array('L', sector_difat) difat = array.array(UINT32, sector_difat)
self.dumpsect(sector_difat) self.dumpsect(sector_difat)
self.loadfat_sect(difat[:127]) self.loadfat_sect(difat[:127])
# last DIFAT pointer is next DIFAT sector: # last DIFAT pointer is next DIFAT sector:
@ -1189,7 +1201,7 @@ class OleFileIO:
s = self._open(self.minifatsect, stream_size, force_FAT=True).read() s = self._open(self.minifatsect, stream_size, force_FAT=True).read()
#[PL] Old code replaced by an array: #[PL] Old code replaced by an array:
#self.minifat = map(lambda i, s=s: i32(s, i), range(0, len(s), 4)) #self.minifat = map(lambda i, s=s: i32(s, i), range(0, len(s), 4))
self.minifat = array.array('L', s) self.minifat = array.array(UINT32, s)
# Then shrink the array to used size, to avoid indexes out of MiniStream: # Then shrink the array to used size, to avoid indexes out of MiniStream:
debug('MiniFAT shrunk from %d to %d sectors' % (len(self.minifat), nb_minisectors)) debug('MiniFAT shrunk from %d to %d sectors' % (len(self.minifat), nb_minisectors))
self.minifat = self.minifat[:nb_minisectors] self.minifat = self.minifat[:nb_minisectors]
@ -1255,8 +1267,8 @@ class OleFileIO:
self.root = self.direntries[0] self.root = self.direntries[0]
# read and build all storage trees, starting from the root: # read and build all storage trees, starting from the root:
self.root.build_storage_tree() self.root.build_storage_tree()
def _load_direntry (self, sid): def _load_direntry (self, sid):
""" """
Load a directory entry from the directory. Load a directory entry from the directory.
@ -1292,7 +1304,7 @@ class OleFileIO:
""" """
Open a stream, either in FAT or MiniFAT according to its size. Open a stream, either in FAT or MiniFAT according to its size.
(openstream helper) (openstream helper)
start: index of first sector start: index of first sector
size: size of stream (or nothing if size is unknown) size: size of stream (or nothing if size is unknown)
force_FAT: if False (default), stream will be opened in FAT or MiniFAT force_FAT: if False (default), stream will be opened in FAT or MiniFAT
@ -1378,7 +1390,7 @@ class OleFileIO:
def openstream(self, filename): def openstream(self, filename):
""" """
Open a stream as a read-only file object (StringIO). Open a stream as a read-only file object (StringIO).
filename: path of stream in storage tree (except root entry), either: filename: path of stream in storage tree (except root entry), either:
- a string using Unix path syntax, for example: - a string using Unix path syntax, for example:
'storage_1/storage_1.2/stream' 'storage_1/storage_1.2/stream'
@ -1480,7 +1492,7 @@ class OleFileIO:
id = i32(s, 8+i*8) id = i32(s, 8+i*8)
offset = i32(s, 12+i*8) offset = i32(s, 12+i*8)
type = i32(s, offset) type = i32(s, offset)
debug ('property id=%d: type=%d offset=%X' % (id, type, offset)) debug ('property id=%d: type=%d offset=%X' % (id, type, offset))
# test for common types first (should perhaps use # test for common types first (should perhaps use
@ -1544,11 +1556,11 @@ if __name__ == "__main__":
print """ print """
Launched from command line, this script parses OLE files and prints info. Launched from command line, this script parses OLE files and prints info.
Usage: OleFileIO_PL.py [-d] [-s] <file> [file2 ...] Usage: OleFileIO_PL.py [-d] [-c] <file> [file2 ...]
Options: Options:
-d : debug mode (display a lot of debug information, for developers only) -d : debug mode (display a lot of debug information, for developers only)
-s : check all streams (for debugging purposes) -c : check all streams (for debugging purposes)
""" """
sys.exit() sys.exit()
@ -1588,7 +1600,7 @@ Options:
v = '(binary data)' v = '(binary data)'
break break
print " ", k, v print " ", k, v
if check_streams: if check_streams:
# Read all streams to check if there are errors: # Read all streams to check if there are errors:
print '\nChecking streams...' print '\nChecking streams...'