Health fixes

This commit is contained in:
Andrew Murray 2015-12-08 22:10:31 +11:00
parent 430a87997b
commit 63ea351a9b

View File

@ -264,7 +264,11 @@ __version__ = '0.43'
import io
import sys
import struct, array, os.path, datetime, logging
import struct
import array
import os.path
import datetime
import logging
#=== COMPATIBILITY WORKAROUNDS ================================================
@ -346,6 +350,7 @@ class NullHandler(logging.Handler):
def emit(self, record):
pass
def get_logger(name, level=logging.NOTSET):
"""
Create a suitable logger object for this module.
@ -404,15 +409,15 @@ STGTY_ROOT = 5 # element is a root storage
# --------------------------------------------------------------------
# property types
VT_EMPTY=0; VT_NULL=1; VT_I2=2; VT_I4=3; VT_R4=4; VT_R8=5; VT_CY=6;
VT_DATE=7; VT_BSTR=8; VT_DISPATCH=9; VT_ERROR=10; VT_BOOL=11;
VT_VARIANT=12; VT_UNKNOWN=13; VT_DECIMAL=14; VT_I1=16; VT_UI1=17;
VT_UI2=18; VT_UI4=19; VT_I8=20; VT_UI8=21; VT_INT=22; VT_UINT=23;
VT_VOID=24; VT_HRESULT=25; VT_PTR=26; VT_SAFEARRAY=27; VT_CARRAY=28;
VT_USERDEFINED=29; VT_LPSTR=30; VT_LPWSTR=31; VT_FILETIME=64;
VT_BLOB=65; VT_STREAM=66; VT_STORAGE=67; VT_STREAMED_OBJECT=68;
VT_STORED_OBJECT=69; VT_BLOB_OBJECT=70; VT_CF=71; VT_CLSID=72;
VT_VECTOR=0x1000;
VT_EMPTY = 0; VT_NULL = 1; VT_I2 = 2; VT_I4 = 3; VT_R4 = 4; VT_R8 = 5; VT_CY = 6
VT_DATE = 7; VT_BSTR = 8; VT_DISPATCH = 9; VT_ERROR = 10; VT_BOOL = 11
VT_VARIANT = 12; VT_UNKNOWN = 13; VT_DECIMAL = 14; VT_I1 = 16; VT_UI1 = 17
VT_UI2 = 18; VT_UI4 = 19; VT_I8 = 20; VT_UI8 = 21; VT_INT = 22; VT_UINT = 23
VT_VOID = 24; VT_HRESULT = 25; VT_PTR = 26; VT_SAFEARRAY = 27; VT_CARRAY = 28
VT_USERDEFINED = 29; VT_LPSTR = 30; VT_LPWSTR = 31; VT_FILETIME = 64
VT_BLOB = 65; VT_STREAM = 66; VT_STORAGE = 67; VT_STREAMED_OBJECT = 68
VT_STORED_OBJECT = 69; VT_BLOB_OBJECT = 70; VT_CF = 71; VT_CLSID = 72
VT_VECTOR = 0x1000
# map property id to name (for debugging purposes)
@ -480,7 +485,6 @@ def isOleFile (filename):
else:
return False
if bytes is str:
# version for Python 2.x
def i8(c):
@ -490,9 +494,9 @@ else:
def i8(c):
return c if c.__class__ is int else c[0]
# TODO: replace i16 and i32 with more readable struct.unpack equivalent?
def i16(c, o = 0):
"""
Converts a 2-bytes (16 bits) string to an integer.
@ -532,7 +536,6 @@ def _clsid(clsid):
tuple(map(i8, clsid[8:16]))))
def filetime2datetime(filetime):
"""
convert FILETIME (64 bits int) to Python datetime.datetime
@ -544,10 +547,9 @@ def filetime2datetime(filetime):
return _FILETIME_null_date + datetime.timedelta(microseconds=filetime//10)
#=== CLASSES ==================================================================
class OleMetadata:
class OleMetadata(object):
"""
class to parse and store metadata from standard properties of OLE files.
@ -652,7 +654,6 @@ class OleMetadata:
self.language = None
self.doc_version = None
def parse_properties(self, olefile):
"""
Parse standard properties of an OLE file, from the streams
@ -779,11 +780,11 @@ class _OleStream(io.BytesIO):
if sect < 0 or sect >= len(fat):
log.debug('sect=%d (%X) / len(fat)=%d' % (sect, sect, len(fat)))
log.debug('i=%d / nb_sectors=%d' % (i, nb_sectors))
## tmp_data = b"".join(data)
## f = open('test_debug.bin', 'wb')
## f.write(tmp_data)
## f.close()
## log.debug('data read so far: %d bytes' % len(tmp_data))
# tmp_data = b"".join(data)
# f = open('test_debug.bin', 'wb')
# f.write(tmp_data)
# f.close()
# log.debug('data read so far: %d bytes' % len(tmp_data))
raise IOError('incorrect OLE FAT, sector index out of range')
# TODO: merge this code with OleFileIO.getsect() ?
# TODO: check if this works with 4K sectors:
@ -835,7 +836,7 @@ class _OleStream(io.BytesIO):
#--- _OleDirectoryEntry -------------------------------------------------------
class _OleDirectoryEntry:
class _OleDirectoryEntry(object):
"""
OLE2 Directory Entry
@ -865,7 +866,6 @@ class _OleDirectoryEntry:
DIRENTRY_SIZE = 128
assert struct.calcsize(STRUCT_DIRENTRY) == DIRENTRY_SIZE
def __init__(self, entry, sid, olefile):
"""
Constructor for an _OleDirectoryEntry object.
@ -961,8 +961,6 @@ class _OleDirectoryEntry:
minifat = False
olefile._check_duplicate_stream(self.isectStart, minifat)
def build_storage_tree(self):
"""
Read and build the red-black tree attached to this _OleDirectoryEntry
@ -986,7 +984,6 @@ class _OleDirectoryEntry:
# (see rich comparison methods in this class)
self.kids.sort()
def append_kids(self, child_sid):
"""
Walk through red-black tree of children of this directory entry to add
@ -1030,7 +1027,6 @@ class _OleDirectoryEntry:
# Afterwards build kid's own tree if it's also a storage:
child.build_storage_tree()
def __eq__(self, other):
"Compare entries by name"
return self.name == other.name
@ -1050,7 +1046,6 @@ class _OleDirectoryEntry:
# TODO: replace by the same function as MS implementation ?
# (order by name length first, then case-insensitive order)
def dump(self, tab = 0):
"Dump this entry, and all its subentries (for debug purposes only)"
TYPES = ["(invalid)", "(storage)", "(stream)", "(lockbytes)",
@ -1065,7 +1060,6 @@ class _OleDirectoryEntry:
for kid in self.kids:
kid.dump(tab + 2)
def getmtime(self):
"""
Return modification time of a directory entry.
@ -1079,7 +1073,6 @@ class _OleDirectoryEntry:
return None
return filetime2datetime(self.modifyTime)
def getctime(self):
"""
Return creation time of a directory entry.
@ -1096,7 +1089,7 @@ class _OleDirectoryEntry:
#--- OleFileIO ----------------------------------------------------------------
class OleFileIO:
class OleFileIO(object):
"""
OLE container object
@ -1165,7 +1158,6 @@ class OleFileIO:
if filename:
self.open(filename, write_mode=write_mode)
def _raise_defect(self, defect_level, message, exception_type=IOError):
"""
This method should be called for any defect found during file parsing.
@ -1191,7 +1183,6 @@ class OleFileIO:
self.parsing_issues.append((exception_type, message))
log.warning(message)
def _decode_utf16_str(self, utf16_str, errors='replace'):
"""
Decode a string encoded in UTF-16 LE format, as found in the OLE
@ -1210,7 +1201,6 @@ class OleFileIO:
# path_encoding=None, return the Unicode string as-is:
return unicode_str
def open(self, filename, write_mode=False):
"""
Open an OLE2 file in read-only or read/write mode.
@ -1433,14 +1423,12 @@ class OleFileIO:
self.ministream = None
self.minifatsect = self.first_mini_fat_sector #i32(header, 60)
def close(self):
"""
close the OLE file, to release the file object
"""
self.fp.close()
def _check_duplicate_stream(self, first_sect, minifat=False):
"""
Checks if a stream has not been already referenced elsewhere.
@ -1466,7 +1454,6 @@ class OleFileIO:
else:
used_streams.append(first_sect)
def dumpfat(self, fat, firstindex=0):
"""
Display a part of FAT in human-readable form for debugging purposes
@ -1503,7 +1490,6 @@ class OleFileIO:
print(name, end=" ")
print()
def dumpsect(self, sector, firstindex=0):
"""
Display a sector in a human-readable form, for debugging purposes
@ -1540,7 +1526,6 @@ class OleFileIO:
a.byteswap()
return a
def loadfat_sect(self, sect):
"""
Adds the indexes of the given sector to the FAT
@ -1576,7 +1561,6 @@ class OleFileIO:
self.fat = self.fat + nextfat
return isect
def loadfat(self, header):
"""
Load the FAT table.
@ -1595,14 +1579,14 @@ class OleFileIO:
self.fat = array.array(UINT32)
self.loadfat_sect(sect)
#self.dumpfat(self.fat)
## for i in range(0, len(sect), 4):
## ix = i32(sect, i)
## #[PL] if ix == -2 or ix == -1: # ix == 0xFFFFFFFE or ix == 0xFFFFFFFF:
## if ix == 0xFFFFFFFE or ix == 0xFFFFFFFF:
## break
## s = self.getsect(ix)
## #fat = fat + [i32(s, i) for i in range(0, len(s), 4)]
## fat = fat + array.array(UINT32, s)
# for i in range(0, len(sect), 4):
# ix = i32(sect, i)
# # [PL] if ix == -2 or ix == -1: # ix == 0xFFFFFFFE or ix == 0xFFFFFFFF:
# if ix == 0xFFFFFFFE or ix == 0xFFFFFFFF:
# break
# s = self.getsect(ix)
# #fat = fat + [i32(s, i) for i in range(0, len(s), 4)]
# fat = fat + array.array(UINT32, s)
if self.num_difat_sectors != 0:
# [PL] There's a DIFAT because file is larger than 6.8MB
# some checks just in case:
@ -1640,10 +1624,10 @@ class OleFileIO:
if isect_difat not in [ENDOFCHAIN, FREESECT]:
# last DIFAT pointer value must be ENDOFCHAIN or FREESECT
raise IOError('incorrect end of DIFAT')
## if len(self.fat) != self.num_fat_sectors:
## # FAT should contain num_fat_sectors blocks
## print("FAT length: %d instead of %d" % (len(self.fat), self.num_fat_sectors))
## raise IOError('incorrect DIFAT')
# if len(self.fat) != self.num_fat_sectors:
# # FAT should contain num_fat_sectors blocks
# print("FAT length: %d instead of %d" % (len(self.fat), self.num_fat_sectors))
# raise IOError('incorrect DIFAT')
# since FAT is read from fixed-size sectors, it may contain more values
# than the actual number of sectors in the file.
# Keep only the relevant sector indexes:
@ -1655,7 +1639,6 @@ class OleFileIO:
log.debug('\nFAT:')
self.dumpfat(self.fat)
def loadminifat(self):
"""
Load the MiniFAT table.
@ -1723,7 +1706,6 @@ class OleFileIO:
self._raise_defect(DEFECT_FATAL, 'incomplete OLE sector')
return sector
def write_sect(self, sect, data, padding=b'\x00'):
"""
Write given sector to file on disk.
@ -1750,7 +1732,6 @@ class OleFileIO:
raise ValueError("Data is larger than sector size")
self.fp.write(data)
def loaddirectory(self, sect):
"""
Load the directory.
@ -1780,13 +1761,12 @@ class OleFileIO:
## break
## self.direntries.append(_OleDirectoryEntry(entry, sid, self))
# load root entry:
root_entry = self._load_direntry(0)
self._load_direntry(0)
# Root entry is the first entry:
self.root = self.direntries[0]
# read and build all storage trees, starting from the root:
self.root.build_storage_tree()
def _load_direntry(self, sid):
"""
Load a directory entry from the directory.
@ -1812,14 +1792,12 @@ class OleFileIO:
self.direntries[sid] = _OleDirectoryEntry(entry, sid, self)
return self.direntries[sid]
def dumpdirectory(self):
"""
Dump directory (for debugging only)
"""
self.root.dump()
def _open(self, start, size = 0x7FFFFFFF, force_FAT=False):
"""
Open a stream, either in FAT or MiniFAT according to its size.
@ -1855,7 +1833,6 @@ class OleFileIO:
sectorsize=self.sectorsize, fat=self.fat,
filesize=self._filesize)
def _list(self, files, prefix, node, streams=True, storages=False):
"""
listdir helper
@ -1884,7 +1861,6 @@ class OleFileIO:
else:
self._raise_defect(DEFECT_INCORRECT, 'The directory tree contains an entry which is not a stream nor a storage.')
def listdir(self, streams=True, storages=False):
"""
Return a list of streams and/or storages stored in this file
@ -1898,7 +1874,6 @@ class OleFileIO:
self._list(files, [], self.root, streams, storages)
return files
def _find(self, filename):
"""
Returns directory entry of given filename. (openstream helper)
@ -1930,7 +1905,6 @@ class OleFileIO:
node = kid
return node.sid
def openstream(self, filename):
"""
Open a stream as a read-only file object (BytesIO).
@ -1952,7 +1926,6 @@ class OleFileIO:
raise IOError("this file is not a stream")
return self._open(entry.isectStart, entry.size)
def write_stream(self, stream_name, data):
"""
Write a stream to disk. For now, it is only possible to replace an
@ -1984,12 +1957,12 @@ class OleFileIO:
nb_sectors = (size + (self.sectorsize-1)) // self.sectorsize
log.debug('nb_sectors = %d' % nb_sectors)
for i in range(nb_sectors):
## try:
## self.fp.seek(offset + self.sectorsize * sect)
## except:
## log.debug('sect=%d, seek=%d' %
## (sect, offset+self.sectorsize*sect))
## raise IOError('OLE sector index out of range')
# try:
# self.fp.seek(offset + self.sectorsize * sect)
# except:
# log.debug('sect=%d, seek=%d' %
# (sect, offset+self.sectorsize*sect))
# raise IOError('OLE sector index out of range')
# extract one sector from data, the last one being smaller:
if i < (nb_sectors-1):
data_sector = data[i*self.sectorsize:(i+1)*self.sectorsize]
@ -2002,7 +1975,7 @@ class OleFileIO:
% (size, self.sectorsize, len(data_sector), size % self.sectorsize))
assert(len(data_sector) % self.sectorsize == size % self.sectorsize)
self.write_sect(sect, data_sector)
## self.fp.write(data_sector)
# self.fp.write(data_sector)
# jump to next sector in the FAT:
try:
sect = self.fat[sect]
@ -2013,7 +1986,6 @@ class OleFileIO:
if sect != ENDOFCHAIN:
raise IOError('incorrect last sector index in OLE stream')
def get_type(self, filename):
"""
Test if given filename exists as a stream or a storage in the OLE
@ -2033,7 +2005,6 @@ class OleFileIO:
except:
return False
def getmtime(self, filename):
"""
Return modification time of a stream/storage.
@ -2049,7 +2020,6 @@ class OleFileIO:
entry = self.direntries[sid]
return entry.getmtime()
def getctime(self, filename):
"""
Return creation time of a stream/storage.
@ -2065,7 +2035,6 @@ class OleFileIO:
entry = self.direntries[sid]
return entry.getctime()
def exists(self, filename):
"""
Test if given filename exists as a stream or a storage in the OLE
@ -2076,12 +2045,11 @@ class OleFileIO:
:returns: True if object exist, else False.
"""
try:
sid = self._find(filename)
self._find(filename)
return True
except:
return False
def get_size(self, filename):
"""
Return size of a stream in the OLE container, in bytes.
@ -2098,7 +2066,6 @@ class OleFileIO:
raise TypeError('object is not an OLE stream')
return entry.size
def get_rootentry_name(self):
"""
Return root entry name. Should usually be 'Root Entry' or 'R' in most
@ -2106,7 +2073,6 @@ class OleFileIO:
"""
return self.root.name
def getproperties(self, filename, convert_time=False, no_conversion=None):
"""
Return properties described in substream.
@ -2138,7 +2104,7 @@ class OleFileIO:
# format id
s = fp.read(20)
fmtid = _clsid(s[:16])
# fmtid = _clsid(s[:16])
fp.seek(i32(s, 16))
# get section
@ -2275,7 +2241,7 @@ class OleFileIO:
if __name__ == "__main__":
import sys, optparse
import optparse
DEFAULT_LOG_LEVEL = "warning" # Default log level
LOG_LEVELS = {
@ -2355,11 +2321,11 @@ if __name__ == "__main__":
print('NOT a stream : type=%d' % st_type)
print()
## for streamname in ole.listdir():
## # print name using repr() to convert binary chars to \xNN:
## print('-', repr('/'.join(streamname)),'-', end=' ')
## print(ole.getmtime(streamname))
## print()
# for streamname in ole.listdir():
# # print name using repr() to convert binary chars to \xNN:
# print('-', repr('/'.join(streamname)),'-', end=' ')
# print(ole.getmtime(streamname))
# print()
print('Modification/Creation times of all directory entries:')
for entry in ole.direntries: