Health fixes

This commit is contained in:
Andrew Murray 2015-12-08 22:10:31 +11:00
parent 430a87997b
commit 63ea351a9b

View File

@ -264,7 +264,11 @@ __version__ = '0.43'
import io import io
import sys import sys
import struct, array, os.path, datetime, logging import struct
import array
import os.path
import datetime
import logging
#=== COMPATIBILITY WORKAROUNDS ================================================ #=== COMPATIBILITY WORKAROUNDS ================================================
@ -346,6 +350,7 @@ class NullHandler(logging.Handler):
def emit(self, record): def emit(self, record):
pass pass
def get_logger(name, level=logging.NOTSET): def get_logger(name, level=logging.NOTSET):
""" """
Create a suitable logger object for this module. Create a suitable logger object for this module.
@ -404,15 +409,15 @@ STGTY_ROOT = 5 # element is a root storage
# -------------------------------------------------------------------- # --------------------------------------------------------------------
# property types # property types
VT_EMPTY=0; VT_NULL=1; VT_I2=2; VT_I4=3; VT_R4=4; VT_R8=5; VT_CY=6; VT_EMPTY = 0; VT_NULL = 1; VT_I2 = 2; VT_I4 = 3; VT_R4 = 4; VT_R8 = 5; VT_CY = 6
VT_DATE=7; VT_BSTR=8; VT_DISPATCH=9; VT_ERROR=10; VT_BOOL=11; VT_DATE = 7; VT_BSTR = 8; VT_DISPATCH = 9; VT_ERROR = 10; VT_BOOL = 11
VT_VARIANT=12; VT_UNKNOWN=13; VT_DECIMAL=14; VT_I1=16; VT_UI1=17; VT_VARIANT = 12; VT_UNKNOWN = 13; VT_DECIMAL = 14; VT_I1 = 16; VT_UI1 = 17
VT_UI2=18; VT_UI4=19; VT_I8=20; VT_UI8=21; VT_INT=22; VT_UINT=23; VT_UI2 = 18; VT_UI4 = 19; VT_I8 = 20; VT_UI8 = 21; VT_INT = 22; VT_UINT = 23
VT_VOID=24; VT_HRESULT=25; VT_PTR=26; VT_SAFEARRAY=27; VT_CARRAY=28; VT_VOID = 24; VT_HRESULT = 25; VT_PTR = 26; VT_SAFEARRAY = 27; VT_CARRAY = 28
VT_USERDEFINED=29; VT_LPSTR=30; VT_LPWSTR=31; VT_FILETIME=64; VT_USERDEFINED = 29; VT_LPSTR = 30; VT_LPWSTR = 31; VT_FILETIME = 64
VT_BLOB=65; VT_STREAM=66; VT_STORAGE=67; VT_STREAMED_OBJECT=68; VT_BLOB = 65; VT_STREAM = 66; VT_STORAGE = 67; VT_STREAMED_OBJECT = 68
VT_STORED_OBJECT=69; VT_BLOB_OBJECT=70; VT_CF=71; VT_CLSID=72; VT_STORED_OBJECT = 69; VT_BLOB_OBJECT = 70; VT_CF = 71; VT_CLSID = 72
VT_VECTOR=0x1000; VT_VECTOR = 0x1000
# map property id to name (for debugging purposes) # map property id to name (for debugging purposes)
@ -480,7 +485,6 @@ def isOleFile (filename):
else: else:
return False return False
if bytes is str: if bytes is str:
# version for Python 2.x # version for Python 2.x
def i8(c): def i8(c):
@ -490,9 +494,9 @@ else:
def i8(c): def i8(c):
return c if c.__class__ is int else c[0] return c if c.__class__ is int else c[0]
# TODO: replace i16 and i32 with more readable struct.unpack equivalent? # TODO: replace i16 and i32 with more readable struct.unpack equivalent?
def i16(c, o = 0): def i16(c, o = 0):
""" """
Converts a 2-bytes (16 bits) string to an integer. Converts a 2-bytes (16 bits) string to an integer.
@ -532,7 +536,6 @@ def _clsid(clsid):
tuple(map(i8, clsid[8:16])))) tuple(map(i8, clsid[8:16]))))
def filetime2datetime(filetime): def filetime2datetime(filetime):
""" """
convert FILETIME (64 bits int) to Python datetime.datetime convert FILETIME (64 bits int) to Python datetime.datetime
@ -544,10 +547,9 @@ def filetime2datetime(filetime):
return _FILETIME_null_date + datetime.timedelta(microseconds=filetime//10) return _FILETIME_null_date + datetime.timedelta(microseconds=filetime//10)
#=== CLASSES ================================================================== #=== CLASSES ==================================================================
class OleMetadata: class OleMetadata(object):
""" """
class to parse and store metadata from standard properties of OLE files. class to parse and store metadata from standard properties of OLE files.
@ -652,7 +654,6 @@ class OleMetadata:
self.language = None self.language = None
self.doc_version = None self.doc_version = None
def parse_properties(self, olefile): def parse_properties(self, olefile):
""" """
Parse standard properties of an OLE file, from the streams Parse standard properties of an OLE file, from the streams
@ -779,11 +780,11 @@ class _OleStream(io.BytesIO):
if sect < 0 or sect >= len(fat): if sect < 0 or sect >= len(fat):
log.debug('sect=%d (%X) / len(fat)=%d' % (sect, sect, len(fat))) log.debug('sect=%d (%X) / len(fat)=%d' % (sect, sect, len(fat)))
log.debug('i=%d / nb_sectors=%d' % (i, nb_sectors)) log.debug('i=%d / nb_sectors=%d' % (i, nb_sectors))
## tmp_data = b"".join(data) # tmp_data = b"".join(data)
## f = open('test_debug.bin', 'wb') # f = open('test_debug.bin', 'wb')
## f.write(tmp_data) # f.write(tmp_data)
## f.close() # f.close()
## log.debug('data read so far: %d bytes' % len(tmp_data)) # log.debug('data read so far: %d bytes' % len(tmp_data))
raise IOError('incorrect OLE FAT, sector index out of range') raise IOError('incorrect OLE FAT, sector index out of range')
# TODO: merge this code with OleFileIO.getsect() ? # TODO: merge this code with OleFileIO.getsect() ?
# TODO: check if this works with 4K sectors: # TODO: check if this works with 4K sectors:
@ -835,7 +836,7 @@ class _OleStream(io.BytesIO):
#--- _OleDirectoryEntry ------------------------------------------------------- #--- _OleDirectoryEntry -------------------------------------------------------
class _OleDirectoryEntry: class _OleDirectoryEntry(object):
""" """
OLE2 Directory Entry OLE2 Directory Entry
@ -865,7 +866,6 @@ class _OleDirectoryEntry:
DIRENTRY_SIZE = 128 DIRENTRY_SIZE = 128
assert struct.calcsize(STRUCT_DIRENTRY) == DIRENTRY_SIZE assert struct.calcsize(STRUCT_DIRENTRY) == DIRENTRY_SIZE
def __init__(self, entry, sid, olefile): def __init__(self, entry, sid, olefile):
""" """
Constructor for an _OleDirectoryEntry object. Constructor for an _OleDirectoryEntry object.
@ -961,8 +961,6 @@ class _OleDirectoryEntry:
minifat = False minifat = False
olefile._check_duplicate_stream(self.isectStart, minifat) olefile._check_duplicate_stream(self.isectStart, minifat)
def build_storage_tree(self): def build_storage_tree(self):
""" """
Read and build the red-black tree attached to this _OleDirectoryEntry Read and build the red-black tree attached to this _OleDirectoryEntry
@ -986,7 +984,6 @@ class _OleDirectoryEntry:
# (see rich comparison methods in this class) # (see rich comparison methods in this class)
self.kids.sort() self.kids.sort()
def append_kids(self, child_sid): def append_kids(self, child_sid):
""" """
Walk through red-black tree of children of this directory entry to add Walk through red-black tree of children of this directory entry to add
@ -1030,7 +1027,6 @@ class _OleDirectoryEntry:
# Afterwards build kid's own tree if it's also a storage: # Afterwards build kid's own tree if it's also a storage:
child.build_storage_tree() child.build_storage_tree()
def __eq__(self, other): def __eq__(self, other):
"Compare entries by name" "Compare entries by name"
return self.name == other.name return self.name == other.name
@ -1050,7 +1046,6 @@ class _OleDirectoryEntry:
# TODO: replace by the same function as MS implementation ? # TODO: replace by the same function as MS implementation ?
# (order by name length first, then case-insensitive order) # (order by name length first, then case-insensitive order)
def dump(self, tab = 0): def dump(self, tab = 0):
"Dump this entry, and all its subentries (for debug purposes only)" "Dump this entry, and all its subentries (for debug purposes only)"
TYPES = ["(invalid)", "(storage)", "(stream)", "(lockbytes)", TYPES = ["(invalid)", "(storage)", "(stream)", "(lockbytes)",
@ -1065,7 +1060,6 @@ class _OleDirectoryEntry:
for kid in self.kids: for kid in self.kids:
kid.dump(tab + 2) kid.dump(tab + 2)
def getmtime(self): def getmtime(self):
""" """
Return modification time of a directory entry. Return modification time of a directory entry.
@ -1079,7 +1073,6 @@ class _OleDirectoryEntry:
return None return None
return filetime2datetime(self.modifyTime) return filetime2datetime(self.modifyTime)
def getctime(self): def getctime(self):
""" """
Return creation time of a directory entry. Return creation time of a directory entry.
@ -1096,7 +1089,7 @@ class _OleDirectoryEntry:
#--- OleFileIO ---------------------------------------------------------------- #--- OleFileIO ----------------------------------------------------------------
class OleFileIO: class OleFileIO(object):
""" """
OLE container object OLE container object
@ -1165,7 +1158,6 @@ class OleFileIO:
if filename: if filename:
self.open(filename, write_mode=write_mode) self.open(filename, write_mode=write_mode)
def _raise_defect(self, defect_level, message, exception_type=IOError): def _raise_defect(self, defect_level, message, exception_type=IOError):
""" """
This method should be called for any defect found during file parsing. This method should be called for any defect found during file parsing.
@ -1191,7 +1183,6 @@ class OleFileIO:
self.parsing_issues.append((exception_type, message)) self.parsing_issues.append((exception_type, message))
log.warning(message) log.warning(message)
def _decode_utf16_str(self, utf16_str, errors='replace'): def _decode_utf16_str(self, utf16_str, errors='replace'):
""" """
Decode a string encoded in UTF-16 LE format, as found in the OLE Decode a string encoded in UTF-16 LE format, as found in the OLE
@ -1210,7 +1201,6 @@ class OleFileIO:
# path_encoding=None, return the Unicode string as-is: # path_encoding=None, return the Unicode string as-is:
return unicode_str return unicode_str
def open(self, filename, write_mode=False): def open(self, filename, write_mode=False):
""" """
Open an OLE2 file in read-only or read/write mode. Open an OLE2 file in read-only or read/write mode.
@ -1433,14 +1423,12 @@ class OleFileIO:
self.ministream = None self.ministream = None
self.minifatsect = self.first_mini_fat_sector #i32(header, 60) self.minifatsect = self.first_mini_fat_sector #i32(header, 60)
def close(self): def close(self):
""" """
close the OLE file, to release the file object close the OLE file, to release the file object
""" """
self.fp.close() self.fp.close()
def _check_duplicate_stream(self, first_sect, minifat=False): def _check_duplicate_stream(self, first_sect, minifat=False):
""" """
Checks if a stream has not been already referenced elsewhere. Checks if a stream has not been already referenced elsewhere.
@ -1466,7 +1454,6 @@ class OleFileIO:
else: else:
used_streams.append(first_sect) used_streams.append(first_sect)
def dumpfat(self, fat, firstindex=0): def dumpfat(self, fat, firstindex=0):
""" """
Display a part of FAT in human-readable form for debugging purposes Display a part of FAT in human-readable form for debugging purposes
@ -1503,7 +1490,6 @@ class OleFileIO:
print(name, end=" ") print(name, end=" ")
print() print()
def dumpsect(self, sector, firstindex=0): def dumpsect(self, sector, firstindex=0):
""" """
Display a sector in a human-readable form, for debugging purposes Display a sector in a human-readable form, for debugging purposes
@ -1540,7 +1526,6 @@ class OleFileIO:
a.byteswap() a.byteswap()
return a return a
def loadfat_sect(self, sect): def loadfat_sect(self, sect):
""" """
Adds the indexes of the given sector to the FAT Adds the indexes of the given sector to the FAT
@ -1576,7 +1561,6 @@ class OleFileIO:
self.fat = self.fat + nextfat self.fat = self.fat + nextfat
return isect return isect
def loadfat(self, header): def loadfat(self, header):
""" """
Load the FAT table. Load the FAT table.
@ -1595,14 +1579,14 @@ class OleFileIO:
self.fat = array.array(UINT32) self.fat = array.array(UINT32)
self.loadfat_sect(sect) self.loadfat_sect(sect)
#self.dumpfat(self.fat) #self.dumpfat(self.fat)
## for i in range(0, len(sect), 4): # for i in range(0, len(sect), 4):
## ix = i32(sect, i) # ix = i32(sect, i)
## #[PL] if ix == -2 or ix == -1: # ix == 0xFFFFFFFE or ix == 0xFFFFFFFF: # # [PL] if ix == -2 or ix == -1: # ix == 0xFFFFFFFE or ix == 0xFFFFFFFF:
## if ix == 0xFFFFFFFE or ix == 0xFFFFFFFF: # if ix == 0xFFFFFFFE or ix == 0xFFFFFFFF:
## break # break
## s = self.getsect(ix) # s = self.getsect(ix)
## #fat = fat + [i32(s, i) for i in range(0, len(s), 4)] # #fat = fat + [i32(s, i) for i in range(0, len(s), 4)]
## fat = fat + array.array(UINT32, s) # fat = fat + array.array(UINT32, s)
if self.num_difat_sectors != 0: if self.num_difat_sectors != 0:
# [PL] There's a DIFAT because file is larger than 6.8MB # [PL] There's a DIFAT because file is larger than 6.8MB
# some checks just in case: # some checks just in case:
@ -1640,10 +1624,10 @@ class OleFileIO:
if isect_difat not in [ENDOFCHAIN, FREESECT]: if isect_difat not in [ENDOFCHAIN, FREESECT]:
# last DIFAT pointer value must be ENDOFCHAIN or FREESECT # last DIFAT pointer value must be ENDOFCHAIN or FREESECT
raise IOError('incorrect end of DIFAT') raise IOError('incorrect end of DIFAT')
## if len(self.fat) != self.num_fat_sectors: # if len(self.fat) != self.num_fat_sectors:
## # FAT should contain num_fat_sectors blocks # # FAT should contain num_fat_sectors blocks
## print("FAT length: %d instead of %d" % (len(self.fat), self.num_fat_sectors)) # print("FAT length: %d instead of %d" % (len(self.fat), self.num_fat_sectors))
## raise IOError('incorrect DIFAT') # raise IOError('incorrect DIFAT')
# since FAT is read from fixed-size sectors, it may contain more values # since FAT is read from fixed-size sectors, it may contain more values
# than the actual number of sectors in the file. # than the actual number of sectors in the file.
# Keep only the relevant sector indexes: # Keep only the relevant sector indexes:
@ -1655,7 +1639,6 @@ class OleFileIO:
log.debug('\nFAT:') log.debug('\nFAT:')
self.dumpfat(self.fat) self.dumpfat(self.fat)
def loadminifat(self): def loadminifat(self):
""" """
Load the MiniFAT table. Load the MiniFAT table.
@ -1723,7 +1706,6 @@ class OleFileIO:
self._raise_defect(DEFECT_FATAL, 'incomplete OLE sector') self._raise_defect(DEFECT_FATAL, 'incomplete OLE sector')
return sector return sector
def write_sect(self, sect, data, padding=b'\x00'): def write_sect(self, sect, data, padding=b'\x00'):
""" """
Write given sector to file on disk. Write given sector to file on disk.
@ -1750,7 +1732,6 @@ class OleFileIO:
raise ValueError("Data is larger than sector size") raise ValueError("Data is larger than sector size")
self.fp.write(data) self.fp.write(data)
def loaddirectory(self, sect): def loaddirectory(self, sect):
""" """
Load the directory. Load the directory.
@ -1780,13 +1761,12 @@ class OleFileIO:
## break ## break
## self.direntries.append(_OleDirectoryEntry(entry, sid, self)) ## self.direntries.append(_OleDirectoryEntry(entry, sid, self))
# load root entry: # load root entry:
root_entry = self._load_direntry(0) self._load_direntry(0)
# Root entry is the first entry: # Root entry is the first entry:
self.root = self.direntries[0] self.root = self.direntries[0]
# read and build all storage trees, starting from the root: # read and build all storage trees, starting from the root:
self.root.build_storage_tree() self.root.build_storage_tree()
def _load_direntry(self, sid): def _load_direntry(self, sid):
""" """
Load a directory entry from the directory. Load a directory entry from the directory.
@ -1812,14 +1792,12 @@ class OleFileIO:
self.direntries[sid] = _OleDirectoryEntry(entry, sid, self) self.direntries[sid] = _OleDirectoryEntry(entry, sid, self)
return self.direntries[sid] return self.direntries[sid]
def dumpdirectory(self): def dumpdirectory(self):
""" """
Dump directory (for debugging only) Dump directory (for debugging only)
""" """
self.root.dump() self.root.dump()
def _open(self, start, size = 0x7FFFFFFF, force_FAT=False): def _open(self, start, size = 0x7FFFFFFF, force_FAT=False):
""" """
Open a stream, either in FAT or MiniFAT according to its size. Open a stream, either in FAT or MiniFAT according to its size.
@ -1855,7 +1833,6 @@ class OleFileIO:
sectorsize=self.sectorsize, fat=self.fat, sectorsize=self.sectorsize, fat=self.fat,
filesize=self._filesize) filesize=self._filesize)
def _list(self, files, prefix, node, streams=True, storages=False): def _list(self, files, prefix, node, streams=True, storages=False):
""" """
listdir helper listdir helper
@ -1884,7 +1861,6 @@ class OleFileIO:
else: else:
self._raise_defect(DEFECT_INCORRECT, 'The directory tree contains an entry which is not a stream nor a storage.') self._raise_defect(DEFECT_INCORRECT, 'The directory tree contains an entry which is not a stream nor a storage.')
def listdir(self, streams=True, storages=False): def listdir(self, streams=True, storages=False):
""" """
Return a list of streams and/or storages stored in this file Return a list of streams and/or storages stored in this file
@ -1898,7 +1874,6 @@ class OleFileIO:
self._list(files, [], self.root, streams, storages) self._list(files, [], self.root, streams, storages)
return files return files
def _find(self, filename): def _find(self, filename):
""" """
Returns directory entry of given filename. (openstream helper) Returns directory entry of given filename. (openstream helper)
@ -1930,7 +1905,6 @@ class OleFileIO:
node = kid node = kid
return node.sid return node.sid
def openstream(self, filename): def openstream(self, filename):
""" """
Open a stream as a read-only file object (BytesIO). Open a stream as a read-only file object (BytesIO).
@ -1952,7 +1926,6 @@ class OleFileIO:
raise IOError("this file is not a stream") raise IOError("this file is not a stream")
return self._open(entry.isectStart, entry.size) return self._open(entry.isectStart, entry.size)
def write_stream(self, stream_name, data): def write_stream(self, stream_name, data):
""" """
Write a stream to disk. For now, it is only possible to replace an Write a stream to disk. For now, it is only possible to replace an
@ -1984,12 +1957,12 @@ class OleFileIO:
nb_sectors = (size + (self.sectorsize-1)) // self.sectorsize nb_sectors = (size + (self.sectorsize-1)) // self.sectorsize
log.debug('nb_sectors = %d' % nb_sectors) log.debug('nb_sectors = %d' % nb_sectors)
for i in range(nb_sectors): for i in range(nb_sectors):
## try: # try:
## self.fp.seek(offset + self.sectorsize * sect) # self.fp.seek(offset + self.sectorsize * sect)
## except: # except:
## log.debug('sect=%d, seek=%d' % # log.debug('sect=%d, seek=%d' %
## (sect, offset+self.sectorsize*sect)) # (sect, offset+self.sectorsize*sect))
## raise IOError('OLE sector index out of range') # raise IOError('OLE sector index out of range')
# extract one sector from data, the last one being smaller: # extract one sector from data, the last one being smaller:
if i < (nb_sectors-1): if i < (nb_sectors-1):
data_sector = data[i*self.sectorsize:(i+1)*self.sectorsize] data_sector = data[i*self.sectorsize:(i+1)*self.sectorsize]
@ -2002,7 +1975,7 @@ class OleFileIO:
% (size, self.sectorsize, len(data_sector), size % self.sectorsize)) % (size, self.sectorsize, len(data_sector), size % self.sectorsize))
assert(len(data_sector) % self.sectorsize == size % self.sectorsize) assert(len(data_sector) % self.sectorsize == size % self.sectorsize)
self.write_sect(sect, data_sector) self.write_sect(sect, data_sector)
## self.fp.write(data_sector) # self.fp.write(data_sector)
# jump to next sector in the FAT: # jump to next sector in the FAT:
try: try:
sect = self.fat[sect] sect = self.fat[sect]
@ -2013,7 +1986,6 @@ class OleFileIO:
if sect != ENDOFCHAIN: if sect != ENDOFCHAIN:
raise IOError('incorrect last sector index in OLE stream') raise IOError('incorrect last sector index in OLE stream')
def get_type(self, filename): def get_type(self, filename):
""" """
Test if given filename exists as a stream or a storage in the OLE Test if given filename exists as a stream or a storage in the OLE
@ -2033,7 +2005,6 @@ class OleFileIO:
except: except:
return False return False
def getmtime(self, filename): def getmtime(self, filename):
""" """
Return modification time of a stream/storage. Return modification time of a stream/storage.
@ -2049,7 +2020,6 @@ class OleFileIO:
entry = self.direntries[sid] entry = self.direntries[sid]
return entry.getmtime() return entry.getmtime()
def getctime(self, filename): def getctime(self, filename):
""" """
Return creation time of a stream/storage. Return creation time of a stream/storage.
@ -2065,7 +2035,6 @@ class OleFileIO:
entry = self.direntries[sid] entry = self.direntries[sid]
return entry.getctime() return entry.getctime()
def exists(self, filename): def exists(self, filename):
""" """
Test if given filename exists as a stream or a storage in the OLE Test if given filename exists as a stream or a storage in the OLE
@ -2076,12 +2045,11 @@ class OleFileIO:
:returns: True if object exist, else False. :returns: True if object exist, else False.
""" """
try: try:
sid = self._find(filename) self._find(filename)
return True return True
except: except:
return False return False
def get_size(self, filename): def get_size(self, filename):
""" """
Return size of a stream in the OLE container, in bytes. Return size of a stream in the OLE container, in bytes.
@ -2098,7 +2066,6 @@ class OleFileIO:
raise TypeError('object is not an OLE stream') raise TypeError('object is not an OLE stream')
return entry.size return entry.size
def get_rootentry_name(self): def get_rootentry_name(self):
""" """
Return root entry name. Should usually be 'Root Entry' or 'R' in most Return root entry name. Should usually be 'Root Entry' or 'R' in most
@ -2106,7 +2073,6 @@ class OleFileIO:
""" """
return self.root.name return self.root.name
def getproperties(self, filename, convert_time=False, no_conversion=None): def getproperties(self, filename, convert_time=False, no_conversion=None):
""" """
Return properties described in substream. Return properties described in substream.
@ -2138,7 +2104,7 @@ class OleFileIO:
# format id # format id
s = fp.read(20) s = fp.read(20)
fmtid = _clsid(s[:16]) # fmtid = _clsid(s[:16])
fp.seek(i32(s, 16)) fp.seek(i32(s, 16))
# get section # get section
@ -2275,7 +2241,7 @@ class OleFileIO:
if __name__ == "__main__": if __name__ == "__main__":
import sys, optparse import optparse
DEFAULT_LOG_LEVEL = "warning" # Default log level DEFAULT_LOG_LEVEL = "warning" # Default log level
LOG_LEVELS = { LOG_LEVELS = {
@ -2355,11 +2321,11 @@ if __name__ == "__main__":
print('NOT a stream : type=%d' % st_type) print('NOT a stream : type=%d' % st_type)
print() print()
## for streamname in ole.listdir(): # for streamname in ole.listdir():
## # print name using repr() to convert binary chars to \xNN: # # print name using repr() to convert binary chars to \xNN:
## print('-', repr('/'.join(streamname)),'-', end=' ') # print('-', repr('/'.join(streamname)),'-', end=' ')
## print(ole.getmtime(streamname)) # print(ole.getmtime(streamname))
## print() # print()
print('Modification/Creation times of all directory entries:') print('Modification/Creation times of all directory entries:')
for entry in ole.direntries: for entry in ole.direntries: