version 0.18 2007-12-05

This commit is contained in:
decalage 2011-10-20 05:29:49 +02:00 committed by Martin Panter
parent 18333d8edf
commit 70a99619bc

View File

@ -6,7 +6,7 @@ OleFileIO_PL:
Microsoft Compound Document File Format), such as Microsoft Office Microsoft Compound Document File Format), such as Microsoft Office
documents, Image Composer and FlashPix files, Outlook messages, ... documents, Image Composer and FlashPix files, Outlook messages, ...
version 0.17 2007-12-04 Philippe Lagadec - http://lagasoft.free.fr version 0.18 2007-12-05 Philippe Lagadec - http://lagasoft.free.fr
Project website: http://lagasoft.free.fr/python/olefileio Project website: http://lagasoft.free.fr/python/olefileio
@ -24,8 +24,8 @@ WARNING: THIS IS (STILL) WORK IN PROGRESS.
""" """
__author__ = "Fredrik Lundh (Secret Labs AB), Philippe Lagadec" __author__ = "Fredrik Lundh (Secret Labs AB), Philippe Lagadec"
__date__ = "2007-12-04" __date__ = "2007-12-08"
__version__ = '0.17' __version__ = '0.18'
#--- LICENSE ------------------------------------------------------------------ #--- LICENSE ------------------------------------------------------------------
@ -99,6 +99,9 @@ __version__ = '0.17'
# - added safety check in main for large or binary # - added safety check in main for large or binary
# properties # properties
# - allow size>0 for storages for some implementations # - allow size>0 for storages for some implementations
# 2007-12-05 v0.18 PL: - fixed several bugs in handling of FAT, MiniFAT and
# streams
# - added option '-c' in main to check all streams
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
# TODO (for version 1.0): # TODO (for version 1.0):
@ -106,6 +109,8 @@ __version__ = '0.17'
# - add underscore to each private method, to avoid their display in # - add underscore to each private method, to avoid their display in
# pydoc/epydoc documentation # pydoc/epydoc documentation
# - replace all raised exceptions with _raise_defect (at least in OleFileIO) # - replace all raised exceptions with _raise_defect (at least in OleFileIO)
# - merge code from _OleStream and OleFileIO.getsect to read sectors
# (maybe add a class for FAT and MiniFAT ?)
# - add method to check all streams (follow sectors chains without storing all # - add method to check all streams (follow sectors chains without storing all
# stream in memory, and report anomalies) # stream in memory, and report anomalies)
# - use _OleDirectoryEntry.kids_dict to improve _find and _list ? # - use _OleDirectoryEntry.kids_dict to improve _find and _list ?
@ -434,8 +439,13 @@ class _OleStream(StringIO.StringIO):
return : a StringIO instance containing the OLE stream return : a StringIO instance containing the OLE stream
""" """
debug('_OleStream.__init__:') debug('_OleStream.__init__:')
debug(' size=%d, offset=%d, sectorsize=%d, len(fat)=%d' debug(' sect=%d (%X), size=%d, offset=%d, sectorsize=%d, len(fat)=%d, fp=%s'
%(size,offset,sectorsize,len(fat))) %(sect,sect,size,offset,sectorsize,len(fat), repr(fp)))
# for debugging messages, size of file where stream is read:
if isinstance(fp, StringIO.StringIO):
filesize = len(fp.getvalue()) # file in MiniFAT
else:
filesize = os.path.getsize(fp.name) # file on disk
#[PL] To detect malformed documents with FAT loops, we compute the #[PL] To detect malformed documents with FAT loops, we compute the
# expected number of sectors in the stream: # expected number of sectors in the stream:
unknown_size = False unknown_size = False
@ -448,6 +458,7 @@ class _OleStream(StringIO.StringIO):
unknown_size = True unknown_size = True
debug(' stream with UNKNOWN SIZE') debug(' stream with UNKNOWN SIZE')
nb_sectors = (size + (sectorsize-1)) / sectorsize nb_sectors = (size + (sectorsize-1)) / sectorsize
debug('nb_sectors = %d' % nb_sectors)
# This number should (at least) be less than the total number of # This number should (at least) be less than the total number of
# sectors in the given FAT: # sectors in the given FAT:
if nb_sectors > len(fat): if nb_sectors > len(fat):
@ -458,6 +469,7 @@ class _OleStream(StringIO.StringIO):
data = [] data = []
# if size is zero, then first sector index should be ENDOFCHAIN: # if size is zero, then first sector index should be ENDOFCHAIN:
if size == 0 and sect != ENDOFCHAIN: if size == 0 and sect != ENDOFCHAIN:
debug('size == 0 and sect != ENDOFCHAIN:')
raise IOError, 'incorrect OLE sector index for empty stream' raise IOError, 'incorrect OLE sector index for empty stream'
#[PL] A fixed-length for loop is used instead of an undefined while #[PL] A fixed-length for loop is used instead of an undefined while
# loop to avoid DoS attacks: # loop to avoid DoS attacks:
@ -468,19 +480,35 @@ class _OleStream(StringIO.StringIO):
break break
else: else:
# else this means that the stream is smaller than declared: # else this means that the stream is smaller than declared:
debug('sect=ENDOFCHAIN before expected size')
raise IOError, 'incomplete OLE stream' raise IOError, 'incomplete OLE stream'
# sector index should be within FAT: # sector index should be within FAT:
if sect<0 or sect>=len(fat): if sect<0 or sect>=len(fat):
debug('fp = '+ repr(fp)) debug('sect=%d (%X) / len(fat)=%d' % (sect, sect, len(fat)))
debug('file size: %d' % os.path.getsize(fp.name)) debug('i=%d / nb_sectors=%d' %(i, nb_sectors))
debug('offset=%d, sectorsize=%d, sect=%d, seek=%d, len read=%d, len(fat)=%d' % ## tmp_data = string.join(data, "")
(offset, sectorsize, sect, offset + sectorsize * sect, len(sector_data), len(fat))) ## f = open('test_debug.bin', 'wb')
## f.write(tmp_data)
## f.close()
## debug('data read so far: %d bytes' % len(tmp_data))
raise IOError, 'incorrect OLE FAT, sector index out of range' raise IOError, 'incorrect OLE FAT, sector index out of range'
#TODO: merge this code with OleFileIO.getsect() ?
#TODO: check if this works with 4K sectors: #TODO: check if this works with 4K sectors:
fp.seek(offset + sectorsize * sect) try:
fp.seek(offset + sectorsize * sect)
except:
debug('sect=%d, seek=%d, filesize=%d' %
(sect, offset+sectorsize*sect, filesize))
raise IOError, 'OLE sector index out of range'
sector_data = fp.read(sectorsize) sector_data = fp.read(sectorsize)
# [PL] check if there was enough data: # [PL] check if there was enough data:
if len(sector_data) != sectorsize: # Note: if sector is the last of the file, sometimes it is not a
# complete sector (of 512 or 4K), so we may read less than
# sectorsize.
if len(sector_data)!=sectorsize and sect!=(len(fat)-1):
debug('sect=%d / len(fat)=%d, seek=%d / filesize=%d, len read=%d' %
(sect, len(fat), offset+sectorsize*sect, filesize, len(sector_data)))
debug('seek+len(read)=%d' % (offset+sectorsize*sect+len(sector_data)))
raise IOError, 'incomplete OLE sector' raise IOError, 'incomplete OLE sector'
data.append(sector_data) data.append(sector_data)
# jump to next sector in the FAT: # jump to next sector in the FAT:
@ -494,14 +522,18 @@ class _OleStream(StringIO.StringIO):
raise IOError, 'incorrect last sector index in OLE stream' raise IOError, 'incorrect last sector index in OLE stream'
data = string.join(data, "") data = string.join(data, "")
# Data is truncated to the actual stream size: # Data is truncated to the actual stream size:
if len(data) > size: if len(data) >= size:
data = data[:size] data = data[:size]
# actual stream size is stored for future use: # actual stream size is stored for future use:
self.size = size self.size = size
else: elif unknown_size:
# actual stream size was not known, now we know the size of read # actual stream size was not known, now we know the size of read
# data: # data:
self.size = len(data) self.size = len(data)
else:
# read data is less than expected:
debug('len(data)=%d, size=%d' % (len(data), size))
raise IOError, 'OLE stream size is less than declared'
# when all data is read in memory, StringIO constructor is called # when all data is read in memory, StringIO constructor is called
StringIO.StringIO.__init__(self, data) StringIO.StringIO.__init__(self, data)
# Then the _OleStream object can be used as a read-only file object. # Then the _OleStream object can be used as a read-only file object.
@ -597,7 +629,7 @@ class _OleDirectoryEntry:
# name is converted from unicode to Latin-1: # name is converted from unicode to Latin-1:
self.name = _unicode(name) self.name = _unicode(name)
debug('DirEntry SID=%d: %s' % (self.sid, self.name)) debug('DirEntry SID=%d: %s' % (self.sid, repr(self.name)))
debug(' - type: %d' % self.entry_type) debug(' - type: %d' % self.entry_type)
debug(' - sect: %d' % self.isectStart) debug(' - sect: %d' % self.isectStart)
debug(' - SID left: %d, right: %d, child: %d' % (self.sid_left, debug(' - SID left: %d, right: %d, child: %d' % (self.sid_left,
@ -641,7 +673,7 @@ class _OleDirectoryEntry:
only be called for the root object once. only be called for the root object once.
""" """
debug('build_storage_tree: SID=%d - %s - sid_child=%d' debug('build_storage_tree: SID=%d - %s - sid_child=%d'
% (self.sid, self.name, self.sid_child)) % (self.sid, repr(self.name), self.sid_child))
if self.sid_child != NOSTREAM: if self.sid_child != NOSTREAM:
# if child SID is not NOSTREAM, then this entry is a storage. # if child SID is not NOSTREAM, then this entry is a storage.
# Let's walk through the tree of children to fill the kids list: # Let's walk through the tree of children to fill the kids list:
@ -676,7 +708,7 @@ class _OleDirectoryEntry:
# get child direntry: # get child direntry:
child = self.olefile._load_direntry(child_sid) #direntries[child_sid] child = self.olefile._load_direntry(child_sid) #direntries[child_sid]
debug('append_kids: child_sid=%d - %s - sid_left=%d, sid_right=%d, sid_child=%d' debug('append_kids: child_sid=%d - %s - sid_left=%d, sid_right=%d, sid_child=%d'
% (child.sid, child.name, child.sid_left, child.sid_right, child.sid_child)) % (child.sid, repr(child.name), child.sid_left, child.sid_right, child.sid_child))
# the directory entries are organized as a red-black tree. # the directory entries are organized as a red-black tree.
# (cf. Wikipedia for details) # (cf. Wikipedia for details)
# First walk through left side of the tree: # First walk through left side of the tree:
@ -847,74 +879,91 @@ class OleFileIO:
header_size = struct.calcsize(fmt_header) header_size = struct.calcsize(fmt_header)
debug( "fmt_header size = %d, +FAT = %d" % (header_size, header_size + 109*4) ) debug( "fmt_header size = %d, +FAT = %d" % (header_size, header_size + 109*4) )
header1 = header[:header_size] header1 = header[:header_size]
(Sig, clsid, MinorVersion, DllVersion, ByteOrder, SectorShift, (
MiniSectorShift, Reserved, Reserved1, csectDir, self.csectFat, sectDirStart, self.Sig,
signature, MiniSectorCutoff, MiniFatStart, csectMiniFat, self.sectDifStart, self.clsid,
self.csectDif) = struct.unpack(fmt_header, header1) self.MinorVersion,
self.DllVersion,
self.ByteOrder,
self.SectorShift,
self.MiniSectorShift,
self.Reserved, self.Reserved1,
self.csectDir,
self.csectFat,
self.sectDirStart,
self.signature,
self.MiniSectorCutoff,
self.MiniFatStart,
self.csectMiniFat,
self.sectDifStart,
self.csectDif
) = struct.unpack(fmt_header, header1)
debug( struct.unpack(fmt_header, header1)) debug( struct.unpack(fmt_header, header1))
if Sig != '\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1': if self.Sig != '\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1':
# OLE signature should always be present # OLE signature should always be present
self._raise_defect(DEFECT_FATAL, "incorrect OLE signature") self._raise_defect(DEFECT_FATAL, "incorrect OLE signature")
if clsid != '\x00'*16: if self.clsid != '\x00'*16:
# according to AAF specs, CLSID should always be zero # according to AAF specs, CLSID should always be zero
self._raise_defect(DEFECT_INCORRECT, "incorrect CLSID in OLE header") self._raise_defect(DEFECT_INCORRECT, "incorrect CLSID in OLE header")
debug( "MinorVersion = %d" % MinorVersion ) debug( "MinorVersion = %d" % self.MinorVersion )
debug( "DllVersion = %d" % DllVersion ) debug( "DllVersion = %d" % self.DllVersion )
if DllVersion not in [3, 4]: if self.DllVersion not in [3, 4]:
# version 3: usual format, 512 bytes per sector # version 3: usual format, 512 bytes per sector
# version 4: large format, 4K per sector # version 4: large format, 4K per sector
self._raise_defect(DEFECT_INCORRECT, "incorrect DllVersion in OLE header") self._raise_defect(DEFECT_INCORRECT, "incorrect DllVersion in OLE header")
debug( "ByteOrder = %X" % ByteOrder ) debug( "ByteOrder = %X" % self.ByteOrder )
if ByteOrder != 0xFFFE: if self.ByteOrder != 0xFFFE:
# For now only common little-endian documents are handled correctly # For now only common little-endian documents are handled correctly
self._raise_defect(DEFECT_FATAL, "incorrect ByteOrder in OLE header") self._raise_defect(DEFECT_FATAL, "incorrect ByteOrder in OLE header")
# TODO: add big-endian support for documents created on Mac ? # TODO: add big-endian support for documents created on Mac ?
SectorSize = 2**SectorShift self.SectorSize = 2**self.SectorShift
debug( "SectorSize = %d" % SectorSize ) debug( "SectorSize = %d" % self.SectorSize )
if SectorSize not in [512, 4096]: if self.SectorSize not in [512, 4096]:
self._raise_defect(DEFECT_INCORRECT, "incorrect SectorSize in OLE header") self._raise_defect(DEFECT_INCORRECT, "incorrect SectorSize in OLE header")
if (DllVersion==3 and SectorSize!=512) or (DllVersion==4 and SectorSize!=4096): if (self.DllVersion==3 and self.SectorSize!=512) \
or (self.DllVersion==4 and self.SectorSize!=4096):
self._raise_defect(DEFECT_INCORRECT, "SectorSize does not match DllVersion in OLE header") self._raise_defect(DEFECT_INCORRECT, "SectorSize does not match DllVersion in OLE header")
MiniSectorSize = 2**MiniSectorShift self.MiniSectorSize = 2**self.MiniSectorShift
debug( "MiniSectorSize = %d" % MiniSectorSize ) debug( "MiniSectorSize = %d" % self.MiniSectorSize )
if MiniSectorSize not in [64]: if self.MiniSectorSize not in [64]:
self._raise_defect(DEFECT_INCORRECT, "incorrect MiniSectorSize in OLE header") self._raise_defect(DEFECT_INCORRECT, "incorrect MiniSectorSize in OLE header")
if Reserved != 0 or Reserved1 != 0: if self.Reserved != 0 or self.Reserved1 != 0:
self._raise_defect(DEFECT_INCORRECT, "incorrect OLE header (non-null reserved bytes)") self._raise_defect(DEFECT_INCORRECT, "incorrect OLE header (non-null reserved bytes)")
debug( "csectDir = %d" % csectDir ) debug( "csectDir = %d" % self.csectDir )
if SectorSize==512 and csectDir!=0: if self.SectorSize==512 and self.csectDir!=0:
self._raise_defect(DEFECT_INCORRECT, "incorrect csectDir in OLE header") self._raise_defect(DEFECT_INCORRECT, "incorrect csectDir in OLE header")
debug( "csectFat = %d" % self.csectFat ) debug( "csectFat = %d" % self.csectFat )
debug( "sectDirStart = %X" % sectDirStart ) debug( "sectDirStart = %X" % self.sectDirStart )
debug( "signature = %d" % signature ) debug( "signature = %d" % self.signature )
# Signature should be zero, BUT some implementations do not follow this # Signature should be zero, BUT some implementations do not follow this
# rule => only a potential defect: # rule => only a potential defect:
if signature != 0: if self.signature != 0:
self._raise_defect(DEFECT_POTENTIAL, "incorrect OLE header (signature>0)") self._raise_defect(DEFECT_POTENTIAL, "incorrect OLE header (signature>0)")
debug( "MiniSectorCutoff = %d" % MiniSectorCutoff ) debug( "MiniSectorCutoff = %d" % self.MiniSectorCutoff )
debug( "MiniFatStart = %X" % MiniFatStart ) debug( "MiniFatStart = %X" % self.MiniFatStart )
debug( "csectMiniFat = %d" % csectMiniFat ) debug( "csectMiniFat = %d" % self.csectMiniFat )
debug( "sectDifStart = %X" % self.sectDifStart ) debug( "sectDifStart = %X" % self.sectDifStart )
debug( "csectDif = %d" % self.csectDif ) debug( "csectDif = %d" % self.csectDif )
# calculate the number of sectors in the file # calculate the number of sectors in the file
# (-1 because header doesn't count) # (-1 because header doesn't count)
self.nb_sect = (os.path.getsize(filename) / SectorSize) - 1 filesize = os.path.getsize(filename)
self.nb_sect = ( (filesize + self.SectorSize-1) / self.SectorSize) - 1
debug( "Number of sectors in the file: %d" % self.nb_sect ) debug( "Number of sectors in the file: %d" % self.nb_sect )
# file clsid (probably never used, so we don't store it) # file clsid (probably never used, so we don't store it)
clsid = _clsid(header[8:24]) clsid = _clsid(header[8:24])
self.sectorsize = 1 << i16(header, 30) self.sectorsize = self.SectorSize #1 << i16(header, 30)
self.minisectorsize = 1 << i16(header, 32) self.minisectorsize = self.MiniSectorSize #1 << i16(header, 32)
self.minisectorcutoff = i32(header, 56) self.minisectorcutoff = self.MiniSectorCutoff # i32(header, 56)
# check known streams for duplicate references (these are always in FAT, # check known streams for duplicate references (these are always in FAT,
# never in MiniFAT): # never in MiniFAT):
self._check_duplicate_stream(sectDirStart) self._check_duplicate_stream(self.sectDirStart)
# check MiniFAT only if it is not empty: # check MiniFAT only if it is not empty:
if csectMiniFat: if self.csectMiniFat:
self._check_duplicate_stream(MiniFatStart) self._check_duplicate_stream(self.MiniFatStart)
# check DIFAT only if it is not empty: # check DIFAT only if it is not empty:
if self.csectDif: if self.csectDif:
self._check_duplicate_stream(self.sectDifStart) self._check_duplicate_stream(self.sectDifStart)
@ -923,9 +972,9 @@ class OleFileIO:
self.loadfat(header) self.loadfat(header)
# Load direcory. This sets both the direntries list (ordered by sid) # Load direcory. This sets both the direntries list (ordered by sid)
# and the root (ordered by hierarchy) members. # and the root (ordered by hierarchy) members.
self.loaddirectory(i32(header, 48)) self.loaddirectory(self.sectDirStart)#i32(header, 48))
self.ministream = None self.ministream = None
self.minifatsect = i32(header, 60) self.minifatsect = self.MiniFatStart #i32(header, 60)
def _check_duplicate_stream(self, first_sect, minifat=False): def _check_duplicate_stream(self, first_sect, minifat=False):
@ -1104,6 +1153,13 @@ class OleFileIO:
## # FAT should contain csectFat blocks ## # FAT should contain csectFat blocks
## print "FAT length: %d instead of %d" % (len(self.fat), self.csectFat) ## print "FAT length: %d instead of %d" % (len(self.fat), self.csectFat)
## raise IOError, 'incorrect DIFAT' ## raise IOError, 'incorrect DIFAT'
# since FAT is read from fixed-size sectors, it may contain more values
# than the actual number of sectors in the file.
# Keep only the relevant sector indexes:
if len(self.fat) > self.nb_sect:
debug('len(fat)=%d, shrunk to nb_sect=%d' % (len(self.fat), self.nb_sect))
self.fat = self.fat[:self.nb_sect]
debug('\nFAT:')
self.dumpfat(self.fat) self.dumpfat(self.fat)
@ -1111,13 +1167,35 @@ class OleFileIO:
""" """
Load the MiniFAT table. Load the MiniFAT table.
""" """
# This is stored in a standard sub-stream, pointed to by a header # MiniFAT is stored in a standard sub-stream, pointed to by a header
# field. # field.
s = self._open(self.minifatsect).read() # NOTE: there are two sizes to take into account for this stream:
# 1) Stream size is calculated according to the number of sectors
# declared in the OLE header. This allocated stream may be more than
# needed to store the actual sector indexes.
# (self.csectMiniFat is the number of sectors of size self.SectorSize)
stream_size = self.csectMiniFat * self.SectorSize
# 2) Actually used size is calculated by dividing the MiniStream size
# (given by root entry size) by the size of mini sectors, *4 for
# 32 bits indexes:
nb_minisectors = (self.root.size + self.MiniSectorSize-1) / self.MiniSectorSize
used_size = nb_minisectors * 4
debug('loadminifat(): minifatsect=%d, nb FAT sectors=%d, used_size=%d, stream_size=%d, nb MiniSectors=%d' %
(self.minifatsect, self.csectMiniFat, used_size, stream_size, nb_minisectors))
if used_size > stream_size:
# This is not really a problem, but may indicate a wrong implementation:
self._raise_defect(DEFECT_INCORRECT, 'OLE MiniStream is larger than MiniFAT')
# In any case, first read stream_size:
s = self._open(self.minifatsect, stream_size, force_FAT=True).read()
#[PL] Old code replaced by an array: #[PL] Old code replaced by an array:
#self.minifat = map(lambda i, s=s: i32(s, i), range(0, len(s), 4)) #self.minifat = map(lambda i, s=s: i32(s, i), range(0, len(s), 4))
self.minifat = array.array('L', s) self.minifat = array.array('L', s)
# Then shrink the array to used size, to avoid indexes out of MiniStream:
debug('MiniFAT shrunk from %d to %d sectors' % (len(self.minifat), nb_minisectors))
self.minifat = self.minifat[:nb_minisectors]
debug('loadminifat(): len=%d' % len(self.minifat))
debug('\nMiniFAT:')
self.dumpfat(self.minifat)
def getsect(self, sect): def getsect(self, sect):
""" """
@ -1133,9 +1211,13 @@ class OleFileIO:
try: try:
self.fp.seek(self.sectorsize * (sect+1)) self.fp.seek(self.sectorsize * (sect+1))
except: except:
debug('getsect(): sect=%X, seek=%d, filesize=%d' %
(sect, self.sectorsize*(sect+1), os.path.getsize(self.fp.name)))
self._raise_defect(DEFECT_FATAL, 'OLE sector index out of range') self._raise_defect(DEFECT_FATAL, 'OLE sector index out of range')
sector = self.fp.read(self.sectorsize) sector = self.fp.read(self.sectorsize)
if len(sector) != self.sectorsize: if len(sector) != self.sectorsize:
debug('getsect(): sect=%X, read=%d, sectorsize=%d' %
(sect, len(sector), self.sectorsize))
self._raise_defect(DEFECT_FATAL, 'incomplete OLE sector') self._raise_defect(DEFECT_FATAL, 'incomplete OLE sector')
return sector return sector
@ -1206,23 +1288,31 @@ class OleFileIO:
self.root.dump() self.root.dump()
def _open(self, start, size = 0x7FFFFFFF): def _open(self, start, size = 0x7FFFFFFF, force_FAT=False):
""" """
Open a stream, either in FAT or MiniFAT according to its size. Open a stream, either in FAT or MiniFAT according to its size.
(openstream helper) (openstream helper)
start: index of first sector start: index of first sector
size: size of stream (or nothing if size is unknown) size: size of stream (or nothing if size is unknown)
force_FAT: if False (default), stream will be opened in FAT or MiniFAT
according to size. If True, it will always be opened in FAT.
""" """
debug('OleFileIO.open(): sect=%d, size=%d, force_FAT=%s' %
(start, size, str(force_FAT)))
# stream size is compared to the MiniSectorCutoff threshold: # stream size is compared to the MiniSectorCutoff threshold:
if size < self.minisectorcutoff: if size < self.minisectorcutoff and not force_FAT:
# ministream object # ministream object
if not self.ministream: if not self.ministream:
# load MiniFAT if it wasn't already done: # load MiniFAT if it wasn't already done:
self.loadminifat() self.loadminifat()
# The first sector index of the miniFAT stream is stored in the # The first sector index of the miniFAT stream is stored in the
# root directory entry: # root directory entry:
self.ministream = self._open(self.root.isectStart) size_ministream = self.root.size
debug('Opening MiniStream: sect=%d, size=%d' %
(self.root.isectStart, size_ministream))
self.ministream = self._open(self.root.isectStart,
size_ministream, force_FAT=True)
return _OleStream(self.ministream, start, size, 0, return _OleStream(self.ministream, start, size, 0,
self.minisectorsize, self.minifat) self.minisectorsize, self.minifat)
else: else:
@ -1454,19 +1544,27 @@ if __name__ == "__main__":
print """ print """
Launched from command line, this script parses OLE files and prints info. Launched from command line, this script parses OLE files and prints info.
Usage: OleFileIO_PL.py [-d] <file> [file2 ...] Usage: OleFileIO_PL.py [-d] [-s] <file> [file2 ...]
Options: Options:
-d : debug mode (display a lot of messages, for developers only) -d : debug mode (display a lot of debug information, for developers only)
-s : check all streams (for debugging purposes)
""" """
sys.exit() sys.exit()
check_streams = False
for filename in sys.argv[1:]: for filename in sys.argv[1:]:
## try: ## try:
# OPTIONS:
if filename == '-d': if filename == '-d':
# option to switch debug mode on: # option to switch debug mode on:
set_debug_mode(True) set_debug_mode(True)
continue continue
if filename == '-c':
# option to switch check streams mode on:
check_streams = True
continue
ole = OleFileIO(filename, raise_defects=DEFECT_INCORRECT) ole = OleFileIO(filename, raise_defects=DEFECT_INCORRECT)
print "-" * 68 print "-" * 68
print filename print filename
@ -1490,6 +1588,21 @@ Options:
v = '(binary data)' v = '(binary data)'
break break
print " ", k, v print " ", k, v
if check_streams:
# Read all streams to check if there are errors:
print '\nChecking streams...'
for streamname in ole.listdir():
# print name using repr() to convert binary chars to \xNN:
print '-', repr('/'.join(streamname)),'-',
st_type = ole.get_type(streamname)
if st_type == STGTY_STREAM:
print 'size %d' % ole.get_size(streamname)
# just try to read stream in memory:
ole.openstream(streamname)
else:
print 'NOT a stream : type=%d' % st_type
print ''
#[PL] Test a few new methods: #[PL] Test a few new methods:
root = ole.get_rootentry_name() root = ole.get_rootentry_name()