mirror of
https://github.com/python-pillow/Pillow.git
synced 2025-02-10 00:20:57 +03:00
version 0.14 2007-11-19
This commit is contained in:
parent
fad61ba20c
commit
d6d3f50205
276
PIL/OleFileIO.py
276
PIL/OleFileIO.py
|
@ -6,7 +6,7 @@ OleFileIO_PL:
|
||||||
Microsoft Office documents, Image Composer and FlashPix files,
|
Microsoft Office documents, Image Composer and FlashPix files,
|
||||||
Outlook messages, ...
|
Outlook messages, ...
|
||||||
|
|
||||||
version 0.13 2007-09-04 Philippe Lagadec - http://lagasoft.free.fr
|
version 0.14 2007-11-19 Philippe Lagadec - http://lagasoft.free.fr
|
||||||
|
|
||||||
Improved version of OleFileIO module from PIL library v1.1.6
|
Improved version of OleFileIO module from PIL library v1.1.6
|
||||||
See: http://www.pythonware.com/products/pil/index.htm
|
See: http://www.pythonware.com/products/pil/index.htm
|
||||||
|
@ -22,8 +22,8 @@ WARNING: THIS IS (STILL) WORK IN PROGRESS.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__author__ = "Fredrik Lundh (Secret Labs AB), Philippe Lagadec"
|
__author__ = "Fredrik Lundh (Secret Labs AB), Philippe Lagadec"
|
||||||
__date__ = "2007-09-04"
|
__date__ = "2007-11-19"
|
||||||
__version__ = '0.13'
|
__version__ = '0.14'
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
#-----------------------------------------------------------------------------
|
||||||
# CHANGELOG: (OleFileIO_PL changes only)
|
# CHANGELOG: (OleFileIO_PL changes only)
|
||||||
|
@ -41,17 +41,22 @@ __version__ = '0.13'
|
||||||
# 2007-09-04 v0.13 PL: - improved/translated (lots of) comments
|
# 2007-09-04 v0.13 PL: - improved/translated (lots of) comments
|
||||||
# - updated license
|
# - updated license
|
||||||
# - converted tabs to 4 spaces
|
# - converted tabs to 4 spaces
|
||||||
|
# 2007-11-19 v0.14 PL: - added OleFileIO.raise_defect() to adapt sensitivity
|
||||||
|
# - improved _unicode() to use Python 2.x unicode support
|
||||||
|
# - fixed bug in _OleDirectoryEntry
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
#-----------------------------------------------------------------------------
|
||||||
# TODO:
|
# TODO:
|
||||||
# - fix Unicode names handling
|
# - replace all raised exceptions with raise_defect (at least in OleFileIO)
|
||||||
|
# - fix Unicode names handling (find some way to stay compatible with Py1.5.2)
|
||||||
|
# => if possible avoid converting names to Latin-1
|
||||||
# - fix handling of DIFSECT blocks in FAT (not stop)
|
# - fix handling of DIFSECT blocks in FAT (not stop)
|
||||||
# - add stricter checks in decoding
|
# - add stricter checks in decoding
|
||||||
# - add (optional) checks on FAT block chains integrity to detect crossed
|
# - add (optional) checks on FAT block chains integrity to detect crossed
|
||||||
# sectors, loops, ...
|
# sectors, loops, ...
|
||||||
# - in __main__ display the whole object tree (not only 1st level), and allow
|
# - improve docstrings to show more sample uses
|
||||||
# to extract objects, or provide a sample script to do it.
|
|
||||||
# - see also original notes and FIXME below
|
# - see also original notes and FIXME below
|
||||||
|
# - remove all obsolete FIXMEs
|
||||||
#-----------------------------------------------------------------------------
|
#-----------------------------------------------------------------------------
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -149,12 +154,21 @@ MAGIC = '\320\317\021\340\241\261\032\341'
|
||||||
|
|
||||||
# [PL]: added constants (from AAF specifications)
|
# [PL]: added constants (from AAF specifications)
|
||||||
MAXREGSECT = 0xFFFFFFFAL; # maximum SECT
|
MAXREGSECT = 0xFFFFFFFAL; # maximum SECT
|
||||||
DIFSECT = 0xFFFFFFFCL; # denotes a DIFAT sector in a FAT
|
DIFSECT = 0xFFFFFFFCL; # (-4) denotes a DIFAT sector in a FAT
|
||||||
FATSECT = 0xFFFFFFFDL; # denotes a FAT sector in a FAT
|
FATSECT = 0xFFFFFFFDL; # (-3) denotes a FAT sector in a FAT
|
||||||
ENDOFCHAIN = 0xFFFFFFFEL; # end of a virtual stream chain
|
ENDOFCHAIN = 0xFFFFFFFEL; # (-2) end of a virtual stream chain
|
||||||
FREESECT = 0xFFFFFFFFL; # unallocated sector
|
FREESECT = 0xFFFFFFFFL; # (-1) unallocated sector
|
||||||
MAXREGSID = 0xFFFFFFFAL; # maximum directory entry ID
|
MAXREGSID = 0xFFFFFFFAL; # maximum directory entry ID
|
||||||
NOSTREAM = 0xFFFFFFFFL; # unallocated directory entry
|
NOSTREAM = 0xFFFFFFFFL; # (-1) unallocated directory entry
|
||||||
|
|
||||||
|
#[PL] object types in storage (from AAF specifications)
|
||||||
|
STGTY_INVALID = 0 # unknown storage type
|
||||||
|
STGTY_STORAGE = 1 # element is a storage object
|
||||||
|
STGTY_STREAM = 2 # element is a stream object
|
||||||
|
STGTY_LOCKBYTES = 3 # element is an ILockBytes object
|
||||||
|
STGTY_PROPERTY = 4 # element is an IPropertyStorage object
|
||||||
|
STGTY_ROOT = 5 # element is a root storage
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
|
@ -183,16 +197,24 @@ for k, v in vars().items():
|
||||||
|
|
||||||
WORD_CLSID = "00020900-0000-0000-C000-000000000046"
|
WORD_CLSID = "00020900-0000-0000-C000-000000000046"
|
||||||
|
|
||||||
|
#[PL]: Defect levels to classify parsing errors - see OleFileIO.raise_defect()
|
||||||
|
DEFECT_UNSURE = 10 # a case which looks weird, but not sure it's a defect
|
||||||
|
DEFECT_POTENTIAL = 20 # a potential defect
|
||||||
|
DEFECT_INCORRECT = 30 # an error according to specifications, but parsing
|
||||||
|
# can go on
|
||||||
|
DEFECT_FATAL = 40 # an error which cannot be ignored, parsing is
|
||||||
|
# impossible
|
||||||
|
|
||||||
#
|
#
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
|
|
||||||
class _OleStream(StringIO.StringIO):
|
class _OleStream(StringIO.StringIO):
|
||||||
"""OLE2 Stream
|
"""
|
||||||
|
OLE2 Stream
|
||||||
|
|
||||||
Returns a read-only file object which can be used to read
|
Returns a read-only file object which can be used to read
|
||||||
the contents of a OLE stream. To open a stream, use the
|
the contents of a OLE stream (instance of the StringIO class).
|
||||||
openstream method in the OleFile class.
|
To open a stream, use the openstream method in the OleFile class.
|
||||||
|
|
||||||
This function can be used with either ordinary streams,
|
This function can be used with either ordinary streams,
|
||||||
or ministreams, depending on the offset, sectorsize, and
|
or ministreams, depending on the offset, sectorsize, and
|
||||||
|
@ -204,8 +226,13 @@ class _OleStream(StringIO.StringIO):
|
||||||
# loading it all in one go.
|
# loading it all in one go.
|
||||||
|
|
||||||
def __init__(self, fp, sect, size, offset, sectorsize, fat):
|
def __init__(self, fp, sect, size, offset, sectorsize, fat):
|
||||||
|
"""
|
||||||
|
Constructor for _OleStream class
|
||||||
|
"""
|
||||||
|
# optimization(?): data is first a list of strings, and join() is called
|
||||||
|
# at the end to concatenate all in one string.
|
||||||
|
# (this may not be really useful with recent Python versions)
|
||||||
data = []
|
data = []
|
||||||
|
|
||||||
# [PL] while sect != -2: # 0xFFFFFFFEL:
|
# [PL] while sect != -2: # 0xFFFFFFFEL:
|
||||||
while sect != ENDOFCHAIN:
|
while sect != ENDOFCHAIN:
|
||||||
fp.seek(offset + sectorsize * sect)
|
fp.seek(offset + sectorsize * sect)
|
||||||
|
@ -213,13 +240,12 @@ class _OleStream(StringIO.StringIO):
|
||||||
# [PL] if pointer is out of the FAT an exception is raised
|
# [PL] if pointer is out of the FAT an exception is raised
|
||||||
if sect >= len(fat) :
|
if sect >= len(fat) :
|
||||||
raise IOError, 'incorrect FAT'
|
raise IOError, 'incorrect FAT'
|
||||||
|
# jump to next sector in the FAT:
|
||||||
sect = fat[sect]
|
sect = fat[sect]
|
||||||
|
|
||||||
data = string.join(data, "")
|
data = string.join(data, "")
|
||||||
|
# when all data is read in memory, StringIO constructor is called
|
||||||
# print len(data), size
|
|
||||||
|
|
||||||
StringIO.StringIO.__init__(self, data[:size])
|
StringIO.StringIO.__init__(self, data[:size])
|
||||||
|
# Then the _OleStream object can be used as a read-only file object.
|
||||||
|
|
||||||
#
|
#
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
|
@ -264,14 +290,16 @@ class _OleDirectoryEntry:
|
||||||
# [PL]: original code from PIL 1.1.5
|
# [PL]: original code from PIL 1.1.5
|
||||||
#if sid != -1
|
#if sid != -1
|
||||||
# [PL]: necessary fix for Python 2.4
|
# [PL]: necessary fix for Python 2.4
|
||||||
#if sid != -1 and sid != 0xFFFFFFFFL:
|
|
||||||
# [PL]: new fix 22/02/2006
|
|
||||||
if sid != NOSTREAM:
|
if sid != NOSTREAM:
|
||||||
|
|
||||||
# the directory entries are organized as a red-black tree.
|
# the directory entries are organized as a red-black tree.
|
||||||
# the following piece of code does an ordered traversal of
|
# the following piece of code does an ordered traversal of
|
||||||
# such a tree (at least that's what I hope ;-)
|
# such a tree (at least that's what I hope ;-)
|
||||||
|
|
||||||
|
#[PL] Note from OpenOffice documentation: the safest way is to
|
||||||
|
# recreate the tree because some implementations may store broken
|
||||||
|
# red-black trees...
|
||||||
|
|
||||||
stack = [self.sid]
|
stack = [self.sid]
|
||||||
|
|
||||||
# start at leftmost position
|
# start at leftmost position
|
||||||
|
@ -279,7 +307,7 @@ class _OleDirectoryEntry:
|
||||||
left, right, child = sidlist[sid][4]
|
left, right, child = sidlist[sid][4]
|
||||||
|
|
||||||
#[PL] while left != -1 and left != 0xFFFFFFFFL:
|
#[PL] while left != -1 and left != 0xFFFFFFFFL:
|
||||||
if left != NOSTREAM:
|
while left != NOSTREAM:
|
||||||
stack.append(sid)
|
stack.append(sid)
|
||||||
sid = left
|
sid = left
|
||||||
left, right, child = sidlist[sid][4]
|
left, right, child = sidlist[sid][4]
|
||||||
|
@ -384,15 +412,42 @@ class OleFileIO:
|
||||||
TIFF files).
|
TIFF files).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, filename = None):
|
def __init__(self, filename = None, raise_defects=DEFECT_FATAL):
|
||||||
|
"""
|
||||||
|
Constructor for OleFileIO class.
|
||||||
|
filename: file to open.
|
||||||
|
raise_defects: minimal level for defects to be raised as exceptions.
|
||||||
|
(use DEFECT_FATAL for a typical application, DEFECT_INCORRECT for a
|
||||||
|
security-oriented application, see source code for details)
|
||||||
|
"""
|
||||||
|
self.raise_defects_level = raise_defects
|
||||||
if filename:
|
if filename:
|
||||||
self.open(filename)
|
self.open(filename)
|
||||||
|
|
||||||
##
|
|
||||||
# Open an OLE2 file.
|
def raise_defect(self, defect_level, message):
|
||||||
|
"""
|
||||||
|
This method should be called for any defect found during file parsing.
|
||||||
|
It may raise an IOError exception according to the minimal level chosen
|
||||||
|
for the OleFileIO object.
|
||||||
|
|
||||||
|
defect_level: defect level, possible values are:
|
||||||
|
DEFECT_UNSURE : a case which looks weird, but not sure it's a defect
|
||||||
|
DEFECT_POTENTIAL : a potential defect
|
||||||
|
DEFECT_INCORRECT : an error according to specifications, but parsing can go on
|
||||||
|
DEFECT_FATAL : an error which cannot be ignored, parsing is impossible
|
||||||
|
message: string describing the defect, used with raised exception.
|
||||||
|
"""
|
||||||
|
# added by [PL]
|
||||||
|
if defect_level >= self.raise_defects_level:
|
||||||
|
raise IOError, message
|
||||||
|
|
||||||
|
|
||||||
def open(self, filename):
|
def open(self, filename):
|
||||||
"""Open an OLE2 file"""
|
"""
|
||||||
|
Open an OLE2 file.
|
||||||
|
Reads the header, FAT and directory.
|
||||||
|
"""
|
||||||
if type(filename) == type(""):
|
if type(filename) == type(""):
|
||||||
self.fp = open(filename, "rb")
|
self.fp = open(filename, "rb")
|
||||||
else:
|
else:
|
||||||
|
@ -401,7 +456,7 @@ class OleFileIO:
|
||||||
header = self.fp.read(512)
|
header = self.fp.read(512)
|
||||||
|
|
||||||
if len(header) != 512 or header[:8] != MAGIC:
|
if len(header) != 512 or header[:8] != MAGIC:
|
||||||
raise IOError, "not an OLE2 structured storage file"
|
self.raise_defect(DEFECT_FATAL, "not an OLE2 structured storage file")
|
||||||
|
|
||||||
# [PL] header structure according to AAF specifications:
|
# [PL] header structure according to AAF specifications:
|
||||||
##Header
|
##Header
|
||||||
|
@ -451,34 +506,42 @@ class OleFileIO:
|
||||||
debug( struct.unpack(fmt_header, header1))
|
debug( struct.unpack(fmt_header, header1))
|
||||||
|
|
||||||
if Sig != '\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1':
|
if Sig != '\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1':
|
||||||
raise IOError, "incorrect OLE signature"
|
# OLE signature should always be present
|
||||||
|
self.raise_defect(DEFECT_FATAL, "incorrect OLE signature")
|
||||||
if clsid != '\x00'*16:
|
if clsid != '\x00'*16:
|
||||||
raise IOError, "incorrect CLSID in OLE header"
|
# according to AAF specs, CLSID should always be zero
|
||||||
|
self.raise_defect(DEFECT_INCORRECT, "incorrect CLSID in OLE header")
|
||||||
debug( "MinorVersion = %d" % MinorVersion )
|
debug( "MinorVersion = %d" % MinorVersion )
|
||||||
debug( "DllVersion = %d" % DllVersion )
|
debug( "DllVersion = %d" % DllVersion )
|
||||||
if DllVersion not in [3, 4]:
|
if DllVersion not in [3, 4]:
|
||||||
raise IOError, "incorrect DllVersion in OLE header"
|
# version 3: usual format, 512 bytes per sector
|
||||||
|
# version 4: large format, 4K per sector
|
||||||
|
self.raise_defect(DEFECT_INCORRECT, "incorrect DllVersion in OLE header")
|
||||||
debug( "ByteOrder = %X" % ByteOrder )
|
debug( "ByteOrder = %X" % ByteOrder )
|
||||||
if ByteOrder != 0xFFFE:
|
if ByteOrder != 0xFFFE:
|
||||||
raise IOError, "incorrect ByteOrder in OLE header"
|
# For now only common little-endian documents are handled correctly
|
||||||
|
self.raise_defect(DEFECT_FATAL, "incorrect ByteOrder in OLE header")
|
||||||
|
# TODO: add big-endian support for documents created on Mac ?
|
||||||
SectorSize = 2**SectorShift
|
SectorSize = 2**SectorShift
|
||||||
debug( "SectorSize = %d" % SectorSize )
|
debug( "SectorSize = %d" % SectorSize )
|
||||||
if SectorSize not in [512, 4096]:
|
if SectorSize not in [512, 4096]:
|
||||||
raise IOError, "incorrect SectorSize in OLE header"
|
self.raise_defect(DEFECT_INCORRECT, "incorrect SectorSize in OLE header")
|
||||||
|
if (DllVersion==3 and SectorSize!=512) or (DllVersion==4 and SectorSize!=4096):
|
||||||
|
self.raise_defect(DEFECT_INCORRECT, "SectorSize does not match DllVersion in OLE header")
|
||||||
MiniSectorSize = 2**MiniSectorShift
|
MiniSectorSize = 2**MiniSectorShift
|
||||||
debug( "MiniSectorSize = %d" % MiniSectorSize )
|
debug( "MiniSectorSize = %d" % MiniSectorSize )
|
||||||
if MiniSectorSize not in [64]:
|
if MiniSectorSize not in [64]:
|
||||||
raise IOError, "incorrect MiniSectorSize in OLE header"
|
self.raise_defect(DEFECT_INCORRECT, "incorrect MiniSectorSize in OLE header")
|
||||||
if Reserved != 0 or Reserved1 != 0:
|
if Reserved != 0 or Reserved1 != 0:
|
||||||
raise IOError, "incorrect OLE header"
|
self.raise_defect(DEFECT_INCORRECT, "incorrect OLE header")
|
||||||
debug( "csectDir = %d" % csectDir )
|
debug( "csectDir = %d" % csectDir )
|
||||||
if SectorSize==512 and csectDir!=0:
|
if SectorSize==512 and csectDir!=0:
|
||||||
raise IOError, "incorrect csectDir in OLE header"
|
self.raise_defect(DEFECT_INCORRECT, "incorrect csectDir in OLE header")
|
||||||
debug( "csectFat = %d" % self.csectFat )
|
debug( "csectFat = %d" % self.csectFat )
|
||||||
debug( "sectDirStart = %X" % sectDirStart )
|
debug( "sectDirStart = %X" % sectDirStart )
|
||||||
debug( "signature = %d" % signature )
|
debug( "signature = %d" % signature )
|
||||||
if signature != 0:
|
if signature != 0:
|
||||||
raise IOError, "incorrect OLE header"
|
self.raise_defect(DEFECT_INCORRECT, "incorrect OLE header")
|
||||||
debug( "MiniSectorCutoff = %d" % MiniSectorCutoff )
|
debug( "MiniSectorCutoff = %d" % MiniSectorCutoff )
|
||||||
debug( "MiniFatStart = %X" % MiniFatStart )
|
debug( "MiniFatStart = %X" % MiniFatStart )
|
||||||
debug( "csectMiniFat = %d" % csectMiniFat )
|
debug( "csectMiniFat = %d" % csectMiniFat )
|
||||||
|
@ -573,13 +636,13 @@ class OleFileIO:
|
||||||
|
|
||||||
def loadfat_sect(self, sect):
|
def loadfat_sect(self, sect):
|
||||||
"Adds the indexes of the given sector to the FAT"
|
"Adds the indexes of the given sector to the FAT"
|
||||||
# un secteur de FAT est un tableau d'ulong
|
# a FAT sector is an array of ulong integers.
|
||||||
if isinstance(sect, array.array):
|
if isinstance(sect, array.array):
|
||||||
fat1 = sect
|
fat1 = sect
|
||||||
else:
|
else:
|
||||||
fat1 = array.array('L', sect)
|
fat1 = array.array('L', sect)
|
||||||
self.dumpsect(sect)
|
self.dumpsect(sect)
|
||||||
# la FAT est une chaîne de secteurs débutant au 1er index d'elle-même
|
# The FAT is a sector chain starting a the first index of itself.
|
||||||
for isect in fat1:
|
for isect in fat1:
|
||||||
#print "isect = %X" % isect
|
#print "isect = %X" % isect
|
||||||
if isect == ENDOFCHAIN or isect == FREESECT:
|
if isect == ENDOFCHAIN or isect == FREESECT:
|
||||||
|
@ -620,10 +683,10 @@ class OleFileIO:
|
||||||
if self.csectFat <= 109:
|
if self.csectFat <= 109:
|
||||||
# there must be at least 109 blocks in header and the rest in
|
# there must be at least 109 blocks in header and the rest in
|
||||||
# DIFAT, so number of sectors must be >109.
|
# DIFAT, so number of sectors must be >109.
|
||||||
raise IOError, 'incorrect DIFAT, not enough sectors'
|
self.raise_defect(DEFECT_INCORRECT, 'incorrect DIFAT, not enough sectors')
|
||||||
if self.sectDifStart >= self.nb_sect:
|
if self.sectDifStart >= self.nb_sect:
|
||||||
# initial DIFAT block index must be valid
|
# initial DIFAT block index must be valid
|
||||||
raise IOError, 'incorrect DIFAT, first index out of range'
|
self.raise_defect(DEFECT_FATAL, 'incorrect DIFAT, first index out of range')
|
||||||
debug( "DIFAT analysis..." )
|
debug( "DIFAT analysis..." )
|
||||||
# We compute the necessary number of DIFAT sectors :
|
# We compute the necessary number of DIFAT sectors :
|
||||||
# (each DIFAT sector = 127 pointers + 1 towards next DIFAT sector)
|
# (each DIFAT sector = 127 pointers + 1 towards next DIFAT sector)
|
||||||
|
@ -634,6 +697,7 @@ class OleFileIO:
|
||||||
isect_difat = self.sectDifStart
|
isect_difat = self.sectDifStart
|
||||||
for i in xrange(nb_difat):
|
for i in xrange(nb_difat):
|
||||||
debug( "DIFAT block %d, sector %X" % (i, isect_difat) )
|
debug( "DIFAT block %d, sector %X" % (i, isect_difat) )
|
||||||
|
#TODO: check if corresponding FAT SID = DIFSECT
|
||||||
sector_difat = self.getsect(isect_difat)
|
sector_difat = self.getsect(isect_difat)
|
||||||
difat = array.array('L', sector_difat)
|
difat = array.array('L', sector_difat)
|
||||||
self.dumpsect(sector_difat)
|
self.dumpsect(sector_difat)
|
||||||
|
@ -652,14 +716,23 @@ class OleFileIO:
|
||||||
self.dumpfat(self.fat)
|
self.dumpfat(self.fat)
|
||||||
|
|
||||||
def loadminifat(self):
|
def loadminifat(self):
|
||||||
"Load the MINIFAT table."
|
"""
|
||||||
# This is stored in a standard sub-
|
Load the MiniFAT table.
|
||||||
# stream, pointed to by a header field.
|
"""
|
||||||
|
# This is stored in a standard sub-stream, pointed to by a header
|
||||||
|
# field.
|
||||||
s = self._open(self.minifatsect).read()
|
s = self._open(self.minifatsect).read()
|
||||||
self.minifat = map(lambda i, s=s: i32(s, i), range(0, len(s), 4))
|
#[PL] Old code replaced by an array:
|
||||||
|
#self.minifat = map(lambda i, s=s: i32(s, i), range(0, len(s), 4))
|
||||||
|
self.minifat = array.array('L', s)
|
||||||
|
|
||||||
|
|
||||||
def getsect(self, sect):
|
def getsect(self, sect):
|
||||||
"Read given sector"
|
"""
|
||||||
|
Read given sector from file on disk.
|
||||||
|
sect: sector index
|
||||||
|
returns a string containing the sector data.
|
||||||
|
"""
|
||||||
# [PL] this original code was wrong when sectors are 4KB instead of
|
# [PL] this original code was wrong when sectors are 4KB instead of
|
||||||
# 512 bytes:
|
# 512 bytes:
|
||||||
#self.fp.seek(512 + self.sectorsize * sect)
|
#self.fp.seek(512 + self.sectorsize * sect)
|
||||||
|
@ -668,27 +741,51 @@ class OleFileIO:
|
||||||
try:
|
try:
|
||||||
self.fp.seek(self.sectorsize * (sect+1))
|
self.fp.seek(self.sectorsize * (sect+1))
|
||||||
except:
|
except:
|
||||||
raise IOError, 'wrong index for OLE sector'
|
self.raise_defect(DEFECT_FATAL, 'wrong index for OLE sector')
|
||||||
sector = self.fp.read(self.sectorsize)
|
sector = self.fp.read(self.sectorsize)
|
||||||
if len(sector) != self.sectorsize:
|
if len(sector) != self.sectorsize:
|
||||||
raise IOError, 'incomplete OLE sector'
|
self.raise_defect(DEFECT_FATAL, 'incomplete OLE sector')
|
||||||
return sector
|
return sector
|
||||||
|
|
||||||
def _unicode(self, s):
|
def _unicode(self, s):
|
||||||
# Map unicode string to Latin 1
|
"""
|
||||||
|
Map unicode string to Latin 1.
|
||||||
|
"""
|
||||||
# FIXME: some day, Python will provide an official way to handle
|
# FIXME: some day, Python will provide an official way to handle
|
||||||
# Unicode strings, but until then, this will have to do...
|
# Unicode strings, but until then, this will have to do...
|
||||||
|
|
||||||
|
#[PL]: use Python Unicode when available (Python>=2.0):
|
||||||
|
try:
|
||||||
|
# First the string is converted to plain Unicode:
|
||||||
|
# (assuming it is encoded as UTF-16 little-endian)
|
||||||
|
u = unicode(s, 'UTF-16LE')
|
||||||
|
# Second the string is converted to Latin-1
|
||||||
|
return u.encode('latin_1')
|
||||||
|
except ValueError:
|
||||||
|
# there was an error during UTF-16 to Unicode decoding:
|
||||||
|
self.raise_defect(DEFECT_INCORRECT, 'incorrect Unicode name')
|
||||||
|
# if no exception raised, fallback to foolproof version:
|
||||||
return filter(ord, s)
|
return filter(ord, s)
|
||||||
|
except UnicodeError:
|
||||||
|
# there was an error during Unicode to Latin-1 encoding:
|
||||||
|
self.raise_defect(DEFECT_INCORRECT, 'incorrect Unicode name')
|
||||||
|
# if no exception raised, fallback to foolproof version:
|
||||||
|
return filter(ord, s)
|
||||||
|
except:
|
||||||
|
# we assume this is an old Python version without Unicode support.
|
||||||
|
# Null bytes are simply removed:
|
||||||
|
return filter(ord, s)
|
||||||
|
|
||||||
|
|
||||||
def loaddirectory(self, sect):
|
def loaddirectory(self, sect):
|
||||||
"""
|
"""
|
||||||
Load the directory.
|
Load the directory.
|
||||||
|
sect: sector index of directory stream.
|
||||||
"""
|
"""
|
||||||
# The directory is stored in a standard
|
# The directory is stored in a standard
|
||||||
# substream, independent of its size.
|
# substream, independent of its size.
|
||||||
|
|
||||||
# read directory stream
|
# open directory stream as a read-only file:
|
||||||
fp = self._open(sect)
|
fp = self._open(sect)
|
||||||
|
|
||||||
# create list of sid entries
|
# create list of sid entries
|
||||||
|
@ -697,6 +794,36 @@ class OleFileIO:
|
||||||
entry = fp.read(128)
|
entry = fp.read(128)
|
||||||
if not entry:
|
if not entry:
|
||||||
break
|
break
|
||||||
|
#[PL] décodage DirEntry
|
||||||
|
fmt_entry = "<64sHBBLLL16sLQQLLH"
|
||||||
|
len_entry = struct.calcsize(fmt_entry)
|
||||||
|
#debug("taille DirEntry: %d" % len_entry)
|
||||||
|
(name, namelength, type, color, sid_left, sid_right, sid_child,
|
||||||
|
clsid, dwUserFlags, createTime, modifyTime, isectStart, sizeLow,
|
||||||
|
sizeHigh) = struct.unpack(fmt_entry, entry[:len_entry])
|
||||||
|
#debug("namelength = %d" % namelength)
|
||||||
|
if type == STGTY_INVALID:
|
||||||
|
break
|
||||||
|
if type not in [STGTY_ROOT, STGTY_STORAGE, STGTY_STREAM]:
|
||||||
|
raise IOError, 'unhandled storage type'
|
||||||
|
#debug (struct.unpack(fmt_entry, entry[:len_entry]))
|
||||||
|
# vérification et conversion du nom Unicode
|
||||||
|
# on a au maximum 31 caractères + le zéro terminal
|
||||||
|
if namelength>64:
|
||||||
|
raise IOError, 'incorrect DirEntry name length'
|
||||||
|
# on ne garde que les caractères sans le zéro terminal
|
||||||
|
name = name[:(namelength-2)]
|
||||||
|
# on convertit la chaîne d'octets en véritable chaîne Unicode
|
||||||
|
name = unicode(name, 'utf_16_le')
|
||||||
|
debug("DirEntry: '%s'" % name)
|
||||||
|
# Si cette chaîne contient un caractère nul c'est que le champ
|
||||||
|
# namelength est incorrect:
|
||||||
|
if unichr(0) in name:
|
||||||
|
debug(len(name))
|
||||||
|
debug(binascii.hexlify(name))
|
||||||
|
raise IOError, 'incorrect DirEntry name length'
|
||||||
|
debug("type:%d" % type)
|
||||||
|
|
||||||
type = ord(entry[66])
|
type = ord(entry[66])
|
||||||
name = self._unicode(entry[0:0+i16(entry, 64)])
|
name = self._unicode(entry[0:0+i16(entry, 64)])
|
||||||
ptrs = i32(entry, 68), i32(entry, 72), i32(entry, 76)
|
ptrs = i32(entry, 68), i32(entry, 72), i32(entry, 76)
|
||||||
|
@ -712,6 +839,7 @@ class OleFileIO:
|
||||||
self.root.dump()
|
self.root.dump()
|
||||||
|
|
||||||
def _clsid(self, clsid):
|
def _clsid(self, clsid):
|
||||||
|
"Converts a CLSID to a human-readable string"
|
||||||
if clsid == "\0" * len(clsid):
|
if clsid == "\0" * len(clsid):
|
||||||
return ""
|
return ""
|
||||||
return (("%08X-%04X-%04X-%02X%02X-" + "%02X" * 6) %
|
return (("%08X-%04X-%04X-%02X%02X-" + "%02X" * 6) %
|
||||||
|
@ -719,7 +847,7 @@ class OleFileIO:
|
||||||
tuple(map(ord, clsid[8:16]))))
|
tuple(map(ord, clsid[8:16]))))
|
||||||
|
|
||||||
def _list(self, files, prefix, node):
|
def _list(self, files, prefix, node):
|
||||||
# listdir helper
|
"listdir helper"
|
||||||
|
|
||||||
prefix = prefix + [node.name]
|
prefix = prefix + [node.name]
|
||||||
for entry in node.kids:
|
for entry in node.kids:
|
||||||
|
@ -729,8 +857,14 @@ class OleFileIO:
|
||||||
files.append(prefix[1:] + [entry.name])
|
files.append(prefix[1:] + [entry.name])
|
||||||
|
|
||||||
def _find(self, filename):
|
def _find(self, filename):
|
||||||
# openstream helper
|
"""
|
||||||
|
Returns directory entry of given filename.
|
||||||
|
filename: list of storage filenames, path to the desired stream/storage.
|
||||||
|
Example: ['Root Entry', 'storage_1', 'storage_1.2', 'stream']
|
||||||
|
(openstream helper)
|
||||||
|
"""
|
||||||
|
#TODO: if filename is a string instead of a list, split it on slashes
|
||||||
|
# to allow a more common way of expressing paths ?
|
||||||
node = self.root
|
node = self.root
|
||||||
for name in filename:
|
for name in filename:
|
||||||
for kid in node.kids:
|
for kid in node.kids:
|
||||||
|
@ -742,16 +876,22 @@ class OleFileIO:
|
||||||
return node.sid
|
return node.sid
|
||||||
|
|
||||||
def _open(self, start, size = 0x7FFFFFFF):
|
def _open(self, start, size = 0x7FFFFFFF):
|
||||||
# openstream helper.
|
"""
|
||||||
|
Opens a stream, either in FAT or MiniFAT according to its size.
|
||||||
|
(openstream helper)
|
||||||
|
start: index of first sector
|
||||||
|
size: size of stream
|
||||||
|
"""
|
||||||
|
# stream size is compared to the MiniSectorCutoff threshold:
|
||||||
if size < self.minisectorcutoff:
|
if size < self.minisectorcutoff:
|
||||||
# ministream object
|
# ministream object
|
||||||
if not self.ministream:
|
if not self.ministream:
|
||||||
|
# load MiniFAT if it wasn't already done:
|
||||||
self.loadminifat()
|
self.loadminifat()
|
||||||
self.ministream = self._open(self.sidlist[0][2])
|
self.ministream = self._open(self.sidlist[0][2])
|
||||||
return _OleStream(self.ministream, start, size, 0,
|
return _OleStream(self.ministream, start, size, 0,
|
||||||
self.minisectorsize, self.minifat)
|
self.minisectorsize, self.minifat)
|
||||||
|
else:
|
||||||
# standard stream
|
# standard stream
|
||||||
return _OleStream(self.fp, start, size, 512,
|
return _OleStream(self.fp, start, size, 512,
|
||||||
self.sectorsize, self.fat)
|
self.sectorsize, self.fat)
|
||||||
|
@ -760,8 +900,9 @@ class OleFileIO:
|
||||||
# Returns a list of streams stored in this file.
|
# Returns a list of streams stored in this file.
|
||||||
|
|
||||||
def listdir(self):
|
def listdir(self):
|
||||||
"""Return a list of streams stored in this file"""
|
"""
|
||||||
|
Return a list of streams stored in this file
|
||||||
|
"""
|
||||||
files = []
|
files = []
|
||||||
self._list(files, [], self.root)
|
self._list(files, [], self.root)
|
||||||
return files
|
return files
|
||||||
|
@ -770,8 +911,9 @@ class OleFileIO:
|
||||||
# Opens a stream as a read-only file object.
|
# Opens a stream as a read-only file object.
|
||||||
|
|
||||||
def openstream(self, filename):
|
def openstream(self, filename):
|
||||||
"""Open a stream as a read-only file object"""
|
"""
|
||||||
|
Open a stream as a read-only file object
|
||||||
|
"""
|
||||||
slot = self._find(filename)
|
slot = self._find(filename)
|
||||||
name, type, sect, size, sids, clsid = self.sidlist[slot]
|
name, type, sect, size, sids, clsid = self.sidlist[slot]
|
||||||
if type != 2:
|
if type != 2:
|
||||||
|
@ -868,17 +1010,17 @@ if __name__ == "__main__":
|
||||||
print ""
|
print ""
|
||||||
sys.exit("usage: OleFileIO_PL.py <file> [file2 ...]")
|
sys.exit("usage: OleFileIO_PL.py <file> [file2 ...]")
|
||||||
|
|
||||||
for file in sys.argv[1:]:
|
for filename in sys.argv[1:]:
|
||||||
## try:
|
## try:
|
||||||
ole = OleFileIO(file)
|
ole = OleFileIO(filename)
|
||||||
print "-" * 68
|
print "-" * 68
|
||||||
print file
|
print filename
|
||||||
print "-" * 68
|
print "-" * 68
|
||||||
ole.dumpdirectory()
|
ole.dumpdirectory()
|
||||||
for file in ole.listdir():
|
for streamname in ole.listdir():
|
||||||
if file[-1][0] == "\005":
|
if streamname[-1][0] == "\005":
|
||||||
print file
|
print streamname, ": properties"
|
||||||
props = ole.getproperties(file)
|
props = ole.getproperties(streamname)
|
||||||
props = props.items()
|
props = props.items()
|
||||||
props.sort()
|
props.sort()
|
||||||
for k, v in props:
|
for k, v in props:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user