mirror of
https://github.com/python-pillow/Pillow.git
synced 2025-02-23 23:30:34 +03:00
commit
793dff5a6b
193
PIL/OleFileIO.py
193
PIL/OleFileIO.py
|
@ -1,31 +1,29 @@
|
||||||
#!/usr/local/bin/python
|
#!/usr/local/bin/python
|
||||||
# -*- coding: latin-1 -*-
|
# -*- coding: latin-1 -*-
|
||||||
"""
|
## OleFileIO_PL:
|
||||||
OleFileIO_PL:
|
## Module to read Microsoft OLE2 files (also called Structured Storage or
|
||||||
Module to read Microsoft OLE2 files (also called Structured Storage or
|
## Microsoft Compound Document File Format), such as Microsoft Office
|
||||||
Microsoft Compound Document File Format), such as Microsoft Office
|
## documents, Image Composer and FlashPix files, Outlook messages, ...
|
||||||
documents, Image Composer and FlashPix files, Outlook messages, ...
|
## This version is compatible with Python 2.6+ and 3.x
|
||||||
This version is compatible with Python 2.6+ and 3.x
|
|
||||||
|
|
||||||
version 0.30 2014-02-04 Philippe Lagadec - http://www.decalage.info
|
## version 0.30 2014-02-04 Philippe Lagadec - http://www.decalage.info
|
||||||
|
|
||||||
Project website: http://www.decalage.info/python/olefileio
|
## Project website: http://www.decalage.info/python/olefileio
|
||||||
|
|
||||||
Improved version of the OleFileIO module from PIL library v1.1.6
|
## Improved version of the OleFileIO module from PIL library v1.1.6
|
||||||
See: http://www.pythonware.com/products/pil/index.htm
|
## See: http://www.pythonware.com/products/pil/index.htm
|
||||||
|
|
||||||
The Python Imaging Library (PIL) is
|
## The Python Imaging Library (PIL) is
|
||||||
|
|
||||||
Copyright (c) 1997-2005 by Secret Labs AB
|
## Copyright (c) 1997-2005 by Secret Labs AB
|
||||||
|
## Copyright (c) 1995-2005 by Fredrik Lundh
|
||||||
Copyright (c) 1995-2005 by Fredrik Lundh
|
|
||||||
|
|
||||||
OleFileIO_PL changes are Copyright (c) 2005-2014 by Philippe Lagadec
|
## OleFileIO_PL changes are Copyright (c) 2005-2014 by Philippe Lagadec
|
||||||
|
|
||||||
See source code and LICENSE.txt for information on usage and redistribution.
|
## See source code and LICENSE.txt for information on usage and redistribution.
|
||||||
|
|
||||||
|
## WARNING: THIS IS (STILL) WORK IN PROGRESS.
|
||||||
|
|
||||||
WARNING: THIS IS (STILL) WORK IN PROGRESS.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Starting with OleFileIO_PL v0.30, only Python 2.6+ and 3.x is supported
|
# Starting with OleFileIO_PL v0.30, only Python 2.6+ and 3.x is supported
|
||||||
# This import enables print() as a function rather than a keyword
|
# This import enables print() as a function rather than a keyword
|
||||||
|
@ -373,8 +371,9 @@ for key in list(vars().keys()):
|
||||||
def isOleFile (filename):
|
def isOleFile (filename):
|
||||||
"""
|
"""
|
||||||
Test if file is an OLE container (according to its header).
|
Test if file is an OLE container (according to its header).
|
||||||
filename: file name or path (str, unicode)
|
|
||||||
return: True if OLE, False otherwise.
|
:param filename: file name or path (str, unicode)
|
||||||
|
:returns: True if OLE, False otherwise.
|
||||||
"""
|
"""
|
||||||
f = open(filename, 'rb')
|
f = open(filename, 'rb')
|
||||||
header = f.read(len(MAGIC))
|
header = f.read(len(MAGIC))
|
||||||
|
@ -400,8 +399,8 @@ def i16(c, o = 0):
|
||||||
"""
|
"""
|
||||||
Converts a 2-bytes (16 bits) string to an integer.
|
Converts a 2-bytes (16 bits) string to an integer.
|
||||||
|
|
||||||
c: string containing bytes to convert
|
:param c: string containing bytes to convert
|
||||||
o: offset of bytes to convert in string
|
:param o: offset of bytes to convert in string
|
||||||
"""
|
"""
|
||||||
return i8(c[o]) | (i8(c[o+1])<<8)
|
return i8(c[o]) | (i8(c[o+1])<<8)
|
||||||
|
|
||||||
|
@ -410,8 +409,8 @@ def i32(c, o = 0):
|
||||||
"""
|
"""
|
||||||
Converts a 4-bytes (32 bits) string to an integer.
|
Converts a 4-bytes (32 bits) string to an integer.
|
||||||
|
|
||||||
c: string containing bytes to convert
|
:param c: string containing bytes to convert
|
||||||
o: offset of bytes to convert in string
|
:param o: offset of bytes to convert in string
|
||||||
"""
|
"""
|
||||||
## return int(ord(c[o])+(ord(c[o+1])<<8)+(ord(c[o+2])<<16)+(ord(c[o+3])<<24))
|
## return int(ord(c[o])+(ord(c[o+1])<<8)+(ord(c[o+2])<<16)+(ord(c[o+3])<<24))
|
||||||
## # [PL]: added int() because "<<" gives long int since Python 2.4
|
## # [PL]: added int() because "<<" gives long int since Python 2.4
|
||||||
|
@ -422,7 +421,8 @@ def i32(c, o = 0):
|
||||||
def _clsid(clsid):
|
def _clsid(clsid):
|
||||||
"""
|
"""
|
||||||
Converts a CLSID to a human-readable string.
|
Converts a CLSID to a human-readable string.
|
||||||
clsid: string of length 16.
|
|
||||||
|
:param clsid: string of length 16.
|
||||||
"""
|
"""
|
||||||
assert len(clsid) == 16
|
assert len(clsid) == 16
|
||||||
# if clsid is only made of null bytes, return an empty string:
|
# if clsid is only made of null bytes, return an empty string:
|
||||||
|
@ -442,8 +442,8 @@ def _unicode(s, errors='replace'):
|
||||||
"""
|
"""
|
||||||
Map unicode string to Latin 1. (Python with Unicode support)
|
Map unicode string to Latin 1. (Python with Unicode support)
|
||||||
|
|
||||||
s: UTF-16LE unicode string to convert to Latin-1
|
:param s: UTF-16LE unicode string to convert to Latin-1
|
||||||
errors: 'replace', 'ignore' or 'strict'.
|
:param errors: 'replace', 'ignore' or 'strict'.
|
||||||
"""
|
"""
|
||||||
#TODO: test if it OleFileIO works with Unicode strings, instead of
|
#TODO: test if it OleFileIO works with Unicode strings, instead of
|
||||||
# converting to Latin-1.
|
# converting to Latin-1.
|
||||||
|
@ -653,14 +653,14 @@ class _OleStream(io.BytesIO):
|
||||||
"""
|
"""
|
||||||
Constructor for _OleStream class.
|
Constructor for _OleStream class.
|
||||||
|
|
||||||
fp : file object, the OLE container or the MiniFAT stream
|
:param fp : file object, the OLE container or the MiniFAT stream
|
||||||
sect : sector index of first sector in the stream
|
:param sect : sector index of first sector in the stream
|
||||||
size : total size of the stream
|
:param size : total size of the stream
|
||||||
offset : offset in bytes for the first FAT or MiniFAT sector
|
:param offset : offset in bytes for the first FAT or MiniFAT sector
|
||||||
sectorsize: size of one sector
|
:param sectorsize: size of one sector
|
||||||
fat : array/list of sector indexes (FAT or MiniFAT)
|
:param fat : array/list of sector indexes (FAT or MiniFAT)
|
||||||
filesize : size of OLE file (for debugging)
|
:param filesize : size of OLE file (for debugging)
|
||||||
return : a BytesIO instance containing the OLE stream
|
:returns : a BytesIO instance containing the OLE stream
|
||||||
"""
|
"""
|
||||||
debug('_OleStream.__init__:')
|
debug('_OleStream.__init__:')
|
||||||
debug(' sect=%d (%X), size=%d, offset=%d, sectorsize=%d, len(fat)=%d, fp=%s'
|
debug(' sect=%d (%X), size=%d, offset=%d, sectorsize=%d, len(fat)=%d, fp=%s'
|
||||||
|
@ -796,9 +796,9 @@ class _OleDirectoryEntry:
|
||||||
Constructor for an _OleDirectoryEntry object.
|
Constructor for an _OleDirectoryEntry object.
|
||||||
Parses a 128-bytes entry from the OLE Directory stream.
|
Parses a 128-bytes entry from the OLE Directory stream.
|
||||||
|
|
||||||
entry : string (must be 128 bytes long)
|
:param entry : string (must be 128 bytes long)
|
||||||
sid : index of this directory entry in the OLE file directory
|
:param sid : index of this directory entry in the OLE file directory
|
||||||
olefile: OleFileIO containing this directory entry
|
:param olefile: OleFileIO containing this directory entry
|
||||||
"""
|
"""
|
||||||
self.sid = sid
|
self.sid = sid
|
||||||
# ref to olefile is stored for future use
|
# ref to olefile is stored for future use
|
||||||
|
@ -992,7 +992,7 @@ class _OleDirectoryEntry:
|
||||||
"""
|
"""
|
||||||
Return modification time of a directory entry.
|
Return modification time of a directory entry.
|
||||||
|
|
||||||
return: None if modification time is null, a python datetime object
|
:returns: None if modification time is null, a python datetime object
|
||||||
otherwise (UTC timezone)
|
otherwise (UTC timezone)
|
||||||
|
|
||||||
new in version 0.26
|
new in version 0.26
|
||||||
|
@ -1006,7 +1006,7 @@ class _OleDirectoryEntry:
|
||||||
"""
|
"""
|
||||||
Return creation time of a directory entry.
|
Return creation time of a directory entry.
|
||||||
|
|
||||||
return: None if modification time is null, a python datetime object
|
:returns: None if modification time is null, a python datetime object
|
||||||
otherwise (UTC timezone)
|
otherwise (UTC timezone)
|
||||||
|
|
||||||
new in version 0.26
|
new in version 0.26
|
||||||
|
@ -1023,7 +1023,8 @@ class OleFileIO:
|
||||||
OLE container object
|
OLE container object
|
||||||
|
|
||||||
This class encapsulates the interface to an OLE 2 structured
|
This class encapsulates the interface to an OLE 2 structured
|
||||||
storage file. Use the {@link listdir} and {@link openstream} methods to
|
storage file. Use the :py:meth:`~PIL.OleFileIO.OleFileIO.listdir` and
|
||||||
|
:py:meth:`~PIL.OleFileIO.OleFileIO.openstream` methods to
|
||||||
access the contents of this file.
|
access the contents of this file.
|
||||||
|
|
||||||
Object names are given as a list of strings, one for each subentry
|
Object names are given as a list of strings, one for each subentry
|
||||||
|
@ -1051,8 +1052,8 @@ class OleFileIO:
|
||||||
"""
|
"""
|
||||||
Constructor for OleFileIO class.
|
Constructor for OleFileIO class.
|
||||||
|
|
||||||
filename: file to open.
|
:param filename: file to open.
|
||||||
raise_defects: minimal level for defects to be raised as exceptions.
|
:param raise_defects: minimal level for defects to be raised as exceptions.
|
||||||
(use DEFECT_FATAL for a typical application, DEFECT_INCORRECT for a
|
(use DEFECT_FATAL for a typical application, DEFECT_INCORRECT for a
|
||||||
security-oriented application, see source code for details)
|
security-oriented application, see source code for details)
|
||||||
"""
|
"""
|
||||||
|
@ -1071,13 +1072,13 @@ class OleFileIO:
|
||||||
It may raise an IOError exception according to the minimal level chosen
|
It may raise an IOError exception according to the minimal level chosen
|
||||||
for the OleFileIO object.
|
for the OleFileIO object.
|
||||||
|
|
||||||
defect_level: defect level, possible values are:
|
:param defect_level: defect level, possible values are:
|
||||||
DEFECT_UNSURE : a case which looks weird, but not sure it's a defect
|
DEFECT_UNSURE : a case which looks weird, but not sure it's a defect
|
||||||
DEFECT_POTENTIAL : a potential defect
|
DEFECT_POTENTIAL : a potential defect
|
||||||
DEFECT_INCORRECT : an error according to specifications, but parsing can go on
|
DEFECT_INCORRECT : an error according to specifications, but parsing can go on
|
||||||
DEFECT_FATAL : an error which cannot be ignored, parsing is impossible
|
DEFECT_FATAL : an error which cannot be ignored, parsing is impossible
|
||||||
message: string describing the defect, used with raised exception.
|
:param message: string describing the defect, used with raised exception.
|
||||||
exception_type: exception class to be raised, IOError by default
|
:param exception_type: exception class to be raised, IOError by default
|
||||||
"""
|
"""
|
||||||
# added by [PL]
|
# added by [PL]
|
||||||
if defect_level >= self._raise_defects_level:
|
if defect_level >= self._raise_defects_level:
|
||||||
|
@ -1092,7 +1093,7 @@ class OleFileIO:
|
||||||
Open an OLE2 file.
|
Open an OLE2 file.
|
||||||
Reads the header, FAT and directory.
|
Reads the header, FAT and directory.
|
||||||
|
|
||||||
filename: string-like or file-like object
|
:param filename: string-like or file-like object
|
||||||
"""
|
"""
|
||||||
#[PL] check if filename is a string-like or file-like object:
|
#[PL] check if filename is a string-like or file-like object:
|
||||||
# (it is better to check for a read() method)
|
# (it is better to check for a read() method)
|
||||||
|
@ -1279,8 +1280,8 @@ class OleFileIO:
|
||||||
Checks if a stream has not been already referenced elsewhere.
|
Checks if a stream has not been already referenced elsewhere.
|
||||||
This method should only be called once for each known stream, and only
|
This method should only be called once for each known stream, and only
|
||||||
if stream size is not null.
|
if stream size is not null.
|
||||||
first_sect: index of first sector of the stream in FAT
|
:param first_sect: index of first sector of the stream in FAT
|
||||||
minifat: if True, stream is located in the MiniFAT, else in the FAT
|
:param minifat: if True, stream is located in the MiniFAT, else in the FAT
|
||||||
"""
|
"""
|
||||||
if minifat:
|
if minifat:
|
||||||
debug('_check_duplicate_stream: sect=%d in MiniFAT' % first_sect)
|
debug('_check_duplicate_stream: sect=%d in MiniFAT' % first_sect)
|
||||||
|
@ -1374,8 +1375,9 @@ class OleFileIO:
|
||||||
def loadfat_sect(self, sect):
|
def loadfat_sect(self, sect):
|
||||||
"""
|
"""
|
||||||
Adds the indexes of the given sector to the FAT
|
Adds the indexes of the given sector to the FAT
|
||||||
sect: string containing the first FAT sector, or array of long integers
|
|
||||||
return: index of last FAT sector.
|
:param sect: string containing the first FAT sector, or array of long integers
|
||||||
|
:returns: index of last FAT sector.
|
||||||
"""
|
"""
|
||||||
# a FAT sector is an array of ulong integers.
|
# a FAT sector is an array of ulong integers.
|
||||||
if isinstance(sect, array.array):
|
if isinstance(sect, array.array):
|
||||||
|
@ -1508,8 +1510,9 @@ class OleFileIO:
|
||||||
def getsect(self, sect):
|
def getsect(self, sect):
|
||||||
"""
|
"""
|
||||||
Read given sector from file on disk.
|
Read given sector from file on disk.
|
||||||
sect: sector index
|
|
||||||
returns a string containing the sector data.
|
:param sect: sector index
|
||||||
|
:returns: a string containing the sector data.
|
||||||
"""
|
"""
|
||||||
# [PL] this original code was wrong when sectors are 4KB instead of
|
# [PL] this original code was wrong when sectors are 4KB instead of
|
||||||
# 512 bytes:
|
# 512 bytes:
|
||||||
|
@ -1533,7 +1536,8 @@ class OleFileIO:
|
||||||
def loaddirectory(self, sect):
|
def loaddirectory(self, sect):
|
||||||
"""
|
"""
|
||||||
Load the directory.
|
Load the directory.
|
||||||
sect: sector index of directory stream.
|
|
||||||
|
:param sect: sector index of directory stream.
|
||||||
"""
|
"""
|
||||||
# The directory is stored in a standard
|
# The directory is stored in a standard
|
||||||
# substream, independent of its size.
|
# substream, independent of its size.
|
||||||
|
@ -1570,9 +1574,10 @@ class OleFileIO:
|
||||||
Load a directory entry from the directory.
|
Load a directory entry from the directory.
|
||||||
This method should only be called once for each storage/stream when
|
This method should only be called once for each storage/stream when
|
||||||
loading the directory.
|
loading the directory.
|
||||||
sid: index of storage/stream in the directory.
|
|
||||||
return: a _OleDirectoryEntry object
|
:param sid: index of storage/stream in the directory.
|
||||||
raise: IOError if the entry has always been referenced.
|
:returns: a _OleDirectoryEntry object
|
||||||
|
:exception IOError: if the entry has always been referenced.
|
||||||
"""
|
"""
|
||||||
# check if SID is OK:
|
# check if SID is OK:
|
||||||
if sid<0 or sid>=len(self.direntries):
|
if sid<0 or sid>=len(self.direntries):
|
||||||
|
@ -1601,9 +1606,9 @@ class OleFileIO:
|
||||||
Open a stream, either in FAT or MiniFAT according to its size.
|
Open a stream, either in FAT or MiniFAT according to its size.
|
||||||
(openstream helper)
|
(openstream helper)
|
||||||
|
|
||||||
start: index of first sector
|
:param start: index of first sector
|
||||||
size: size of stream (or nothing if size is unknown)
|
:param size: size of stream (or nothing if size is unknown)
|
||||||
force_FAT: if False (default), stream will be opened in FAT or MiniFAT
|
:param force_FAT: if False (default), stream will be opened in FAT or MiniFAT
|
||||||
according to size. If True, it will always be opened in FAT.
|
according to size. If True, it will always be opened in FAT.
|
||||||
"""
|
"""
|
||||||
debug('OleFileIO.open(): sect=%d, size=%d, force_FAT=%s' %
|
debug('OleFileIO.open(): sect=%d, size=%d, force_FAT=%s' %
|
||||||
|
@ -1633,11 +1638,11 @@ class OleFileIO:
|
||||||
def _list(self, files, prefix, node, streams=True, storages=False):
|
def _list(self, files, prefix, node, streams=True, storages=False):
|
||||||
"""
|
"""
|
||||||
(listdir helper)
|
(listdir helper)
|
||||||
files: list of files to fill in
|
:param files: list of files to fill in
|
||||||
prefix: current location in storage tree (list of names)
|
:param prefix: current location in storage tree (list of names)
|
||||||
node: current node (_OleDirectoryEntry object)
|
:param node: current node (_OleDirectoryEntry object)
|
||||||
streams: bool, include streams if True (True by default) - new in v0.26
|
:param streams: bool, include streams if True (True by default) - new in v0.26
|
||||||
storages: bool, include storages if True (False by default) - new in v0.26
|
:param storages: bool, include storages if True (False by default) - new in v0.26
|
||||||
(note: the root storage is never included)
|
(note: the root storage is never included)
|
||||||
"""
|
"""
|
||||||
prefix = prefix + [node.name]
|
prefix = prefix + [node.name]
|
||||||
|
@ -1660,9 +1665,9 @@ class OleFileIO:
|
||||||
"""
|
"""
|
||||||
Return a list of streams stored in this file
|
Return a list of streams stored in this file
|
||||||
|
|
||||||
streams: bool, include streams if True (True by default) - new in v0.26
|
:param streams: bool, include streams if True (True by default) - new in v0.26
|
||||||
storages: bool, include storages if True (False by default) - new in v0.26
|
:param storages: bool, include storages if True (False by default) - new in v0.26
|
||||||
(note: the root storage is never included)
|
(note: the root storage is never included)
|
||||||
"""
|
"""
|
||||||
files = []
|
files = []
|
||||||
self._list(files, [], self.root, streams, storages)
|
self._list(files, [], self.root, streams, storages)
|
||||||
|
@ -1674,12 +1679,13 @@ class OleFileIO:
|
||||||
Returns directory entry of given filename. (openstream helper)
|
Returns directory entry of given filename. (openstream helper)
|
||||||
Note: this method is case-insensitive.
|
Note: this method is case-insensitive.
|
||||||
|
|
||||||
filename: path of stream in storage tree (except root entry), either:
|
:param filename: path of stream in storage tree (except root entry), either:
|
||||||
|
|
||||||
- a string using Unix path syntax, for example:
|
- a string using Unix path syntax, for example:
|
||||||
'storage_1/storage_1.2/stream'
|
'storage_1/storage_1.2/stream'
|
||||||
- a list of storage filenames, path to the desired stream/storage.
|
- a list of storage filenames, path to the desired stream/storage.
|
||||||
Example: ['storage_1', 'storage_1.2', 'stream']
|
Example: ['storage_1', 'storage_1.2', 'stream']
|
||||||
return: sid of requested filename
|
:returns: sid of requested filename
|
||||||
raise IOError if file not found
|
raise IOError if file not found
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -1703,15 +1709,15 @@ class OleFileIO:
|
||||||
"""
|
"""
|
||||||
Open a stream as a read-only file object (BytesIO).
|
Open a stream as a read-only file object (BytesIO).
|
||||||
|
|
||||||
filename: path of stream in storage tree (except root entry), either:
|
:param filename: path of stream in storage tree (except root entry), either:
|
||||||
|
|
||||||
- a string using Unix path syntax, for example:
|
- a string using Unix path syntax, for example:
|
||||||
'storage_1/storage_1.2/stream'
|
'storage_1/storage_1.2/stream'
|
||||||
- a list of storage filenames, path to the desired stream/storage.
|
- a list of storage filenames, path to the desired stream/storage.
|
||||||
Example: ['storage_1', 'storage_1.2', 'stream']
|
Example: ['storage_1', 'storage_1.2', 'stream']
|
||||||
|
|
||||||
return: file object (read-only)
|
:returns: file object (read-only)
|
||||||
raise IOError if filename not found, or if this is not a stream.
|
:exception IOError: if filename not found, or if this is not a stream.
|
||||||
"""
|
"""
|
||||||
sid = self._find(filename)
|
sid = self._find(filename)
|
||||||
entry = self.direntries[sid]
|
entry = self.direntries[sid]
|
||||||
|
@ -1725,8 +1731,8 @@ class OleFileIO:
|
||||||
Test if given filename exists as a stream or a storage in the OLE
|
Test if given filename exists as a stream or a storage in the OLE
|
||||||
container, and return its type.
|
container, and return its type.
|
||||||
|
|
||||||
filename: path of stream in storage tree. (see openstream for syntax)
|
:param filename: path of stream in storage tree. (see openstream for syntax)
|
||||||
return: False if object does not exist, its entry type (>0) otherwise:
|
:returns: False if object does not exist, its entry type (>0) otherwise:
|
||||||
|
|
||||||
- STGTY_STREAM: a stream
|
- STGTY_STREAM: a stream
|
||||||
- STGTY_STORAGE: a storage
|
- STGTY_STORAGE: a storage
|
||||||
|
@ -1744,10 +1750,10 @@ class OleFileIO:
|
||||||
"""
|
"""
|
||||||
Return modification time of a stream/storage.
|
Return modification time of a stream/storage.
|
||||||
|
|
||||||
filename: path of stream/storage in storage tree. (see openstream for
|
:param filename: path of stream/storage in storage tree. (see openstream for
|
||||||
syntax)
|
syntax)
|
||||||
return: None if modification time is null, a python datetime object
|
:returns: None if modification time is null, a python datetime object
|
||||||
otherwise (UTC timezone)
|
otherwise (UTC timezone)
|
||||||
|
|
||||||
new in version 0.26
|
new in version 0.26
|
||||||
"""
|
"""
|
||||||
|
@ -1760,10 +1766,10 @@ class OleFileIO:
|
||||||
"""
|
"""
|
||||||
Return creation time of a stream/storage.
|
Return creation time of a stream/storage.
|
||||||
|
|
||||||
filename: path of stream/storage in storage tree. (see openstream for
|
:param filename: path of stream/storage in storage tree. (see openstream for
|
||||||
syntax)
|
syntax)
|
||||||
return: None if creation time is null, a python datetime object
|
:returns: None if creation time is null, a python datetime object
|
||||||
otherwise (UTC timezone)
|
otherwise (UTC timezone)
|
||||||
|
|
||||||
new in version 0.26
|
new in version 0.26
|
||||||
"""
|
"""
|
||||||
|
@ -1777,8 +1783,8 @@ class OleFileIO:
|
||||||
Test if given filename exists as a stream or a storage in the OLE
|
Test if given filename exists as a stream or a storage in the OLE
|
||||||
container.
|
container.
|
||||||
|
|
||||||
filename: path of stream in storage tree. (see openstream for syntax)
|
:param filename: path of stream in storage tree. (see openstream for syntax)
|
||||||
return: True if object exist, else False.
|
:returns: True if object exist, else False.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
sid = self._find(filename)
|
sid = self._find(filename)
|
||||||
|
@ -1791,9 +1797,10 @@ class OleFileIO:
|
||||||
"""
|
"""
|
||||||
Return size of a stream in the OLE container, in bytes.
|
Return size of a stream in the OLE container, in bytes.
|
||||||
|
|
||||||
filename: path of stream in storage tree (see openstream for syntax)
|
:param filename: path of stream in storage tree (see openstream for syntax)
|
||||||
return: size in bytes (long integer)
|
:returns: size in bytes (long integer)
|
||||||
raise: IOError if file not found, TypeError if this is not a stream.
|
:exception IOError: if file not found
|
||||||
|
:exception TypeError: if this is not a stream
|
||||||
"""
|
"""
|
||||||
sid = self._find(filename)
|
sid = self._find(filename)
|
||||||
entry = self.direntries[sid]
|
entry = self.direntries[sid]
|
||||||
|
@ -1815,11 +1822,11 @@ class OleFileIO:
|
||||||
"""
|
"""
|
||||||
Return properties described in substream.
|
Return properties described in substream.
|
||||||
|
|
||||||
filename: path of stream in storage tree (see openstream for syntax)
|
:param filename: path of stream in storage tree (see openstream for syntax)
|
||||||
convert_time: bool, if True timestamps will be converted to Python datetime
|
:param convert_time: bool, if True timestamps will be converted to Python datetime
|
||||||
no_conversion: None or list of int, timestamps not to be converted
|
:param no_conversion: None or list of int, timestamps not to be converted
|
||||||
(for example total editing time is not a real timestamp)
|
(for example total editing time is not a real timestamp)
|
||||||
return: a dictionary of values indexed by id (integer)
|
:returns: a dictionary of values indexed by id (integer)
|
||||||
"""
|
"""
|
||||||
# make sure no_conversion is a list, just to simplify code below:
|
# make sure no_conversion is a list, just to simplify code below:
|
||||||
if no_conversion == None:
|
if no_conversion == None:
|
||||||
|
|
15
docs/PIL.rst
15
docs/PIL.rst
|
@ -72,13 +72,6 @@ can be found here.
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
:mod:`ImageMorph` Module
|
|
||||||
------------------------
|
|
||||||
|
|
||||||
.. automodule:: PIL.ImageMorph
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
|
|
||||||
:mod:`ImageShow` Module
|
:mod:`ImageShow` Module
|
||||||
-----------------------
|
-----------------------
|
||||||
|
@ -104,14 +97,6 @@ can be found here.
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
:mod:`OleFileIO` Module
|
|
||||||
-----------------------
|
|
||||||
|
|
||||||
.. automodule:: PIL.OleFileIO
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
|
|
||||||
:mod:`PaletteFile` Module
|
:mod:`PaletteFile` Module
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
|
|
13
docs/reference/ImageMorph.rst
Normal file
13
docs/reference/ImageMorph.rst
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
.. py:module:: PIL.ImageMorph
|
||||||
|
.. py:currentmodule:: PIL.ImageMorph
|
||||||
|
|
||||||
|
:py:mod:`ImageMorph` Module
|
||||||
|
===========================
|
||||||
|
|
||||||
|
The :py:mod:`ImageMorph` module provides morphology operations on images.
|
||||||
|
|
||||||
|
.. automodule:: PIL.ImageMorph
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
:noindex:
|
364
docs/reference/OleFileIO.rst
Normal file
364
docs/reference/OleFileIO.rst
Normal file
|
@ -0,0 +1,364 @@
|
||||||
|
.. py:module:: PIL.OleFileIO
|
||||||
|
.. py:currentmodule:: PIL.OleFileIO
|
||||||
|
|
||||||
|
:py:mod:`OleFileIO` Module
|
||||||
|
===========================
|
||||||
|
|
||||||
|
The :py:mod:`OleFileIO` module reads Microsoft OLE2 files (also called
|
||||||
|
Structured Storage or Microsoft Compound Document File Format), such
|
||||||
|
as Microsoft Office documents, Image Composer and FlashPix files, and
|
||||||
|
Outlook messages.
|
||||||
|
|
||||||
|
This module is the `OleFileIO\_PL`_ project by Philippe Lagadec, v0.30,
|
||||||
|
merged back into Pillow.
|
||||||
|
|
||||||
|
.. _OleFileIO\_PL: http://www.decalage.info/python/olefileio
|
||||||
|
|
||||||
|
How to use this module
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
For more information, see also the file **PIL/OleFileIO.py**, sample
|
||||||
|
code at the end of the module itself, and docstrings within the code.
|
||||||
|
|
||||||
|
About the structure of OLE files
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
An OLE file can be seen as a mini file system or a Zip archive: It
|
||||||
|
contains **streams** of data that look like files embedded within the
|
||||||
|
OLE file. Each stream has a name. For example, the main stream of a MS
|
||||||
|
Word document containing its text is named "WordDocument".
|
||||||
|
|
||||||
|
An OLE file can also contain **storages**. A storage is a folder that
|
||||||
|
contains streams or other storages. For example, a MS Word document with
|
||||||
|
VBA macros has a storage called "Macros".
|
||||||
|
|
||||||
|
Special streams can contain **properties**. A property is a specific
|
||||||
|
value that can be used to store information such as the metadata of a
|
||||||
|
document (title, author, creation date, etc). Property stream names
|
||||||
|
usually start with the character '05'.
|
||||||
|
|
||||||
|
For example, a typical MS Word document may look like this:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
\x05DocumentSummaryInformation (stream)
|
||||||
|
\x05SummaryInformation (stream)
|
||||||
|
WordDocument (stream)
|
||||||
|
Macros (storage)
|
||||||
|
PROJECT (stream)
|
||||||
|
PROJECTwm (stream)
|
||||||
|
VBA (storage)
|
||||||
|
Module1 (stream)
|
||||||
|
ThisDocument (stream)
|
||||||
|
_VBA_PROJECT (stream)
|
||||||
|
dir (stream)
|
||||||
|
ObjectPool (storage)
|
||||||
|
|
||||||
|
Test if a file is an OLE container
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Use isOleFile to check if the first bytes of the file contain the Magic
|
||||||
|
for OLE files, before opening it. isOleFile returns True if it is an OLE
|
||||||
|
file, False otherwise.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
assert OleFileIO.isOleFile('myfile.doc')
|
||||||
|
|
||||||
|
Open an OLE file from disk
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Create an OleFileIO object with the file path as parameter:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
ole = OleFileIO.OleFileIO('myfile.doc')
|
||||||
|
|
||||||
|
Open an OLE file from a file-like object
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
This is useful if the file is not on disk, e.g. already stored in a
|
||||||
|
string or as a file-like object.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
ole = OleFileIO.OleFileIO(f)
|
||||||
|
|
||||||
|
For example the code below reads a file into a string, then uses BytesIO
|
||||||
|
to turn it into a file-like object.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
data = open('myfile.doc', 'rb').read()
|
||||||
|
f = io.BytesIO(data) # or StringIO.StringIO for Python 2.x
|
||||||
|
ole = OleFileIO.OleFileIO(f)
|
||||||
|
|
||||||
|
How to handle malformed OLE files
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
By default, the parser is configured to be as robust and permissive as
|
||||||
|
possible, allowing to parse most malformed OLE files. Only fatal errors
|
||||||
|
will raise an exception. It is possible to tell the parser to be more
|
||||||
|
strict in order to raise exceptions for files that do not fully conform
|
||||||
|
to the OLE specifications, using the raise\_defect option:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
ole = OleFileIO.OleFileIO('myfile.doc', raise_defects=DEFECT_INCORRECT)
|
||||||
|
|
||||||
|
When the parsing is done, the list of non-fatal issues detected is
|
||||||
|
available as a list in the parsing\_issues attribute of the OleFileIO
|
||||||
|
object:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
print('Non-fatal issues raised during parsing:')
|
||||||
|
if ole.parsing_issues:
|
||||||
|
for exctype, msg in ole.parsing_issues:
|
||||||
|
print('- %s: %s' % (exctype.__name__, msg))
|
||||||
|
else:
|
||||||
|
print('None')
|
||||||
|
|
||||||
|
Syntax for stream and storage path
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Two different syntaxes are allowed for methods that need or return the
|
||||||
|
path of streams and storages:
|
||||||
|
|
||||||
|
1) Either a **list of strings** including all the storages from the root
|
||||||
|
up to the stream/storage name. For example a stream called
|
||||||
|
"WordDocument" at the root will have ['WordDocument'] as full path. A
|
||||||
|
stream called "ThisDocument" located in the storage "Macros/VBA" will
|
||||||
|
be ['Macros', 'VBA', 'ThisDocument']. This is the original syntax
|
||||||
|
from PIL. While hard to read and not very convenient, this syntax
|
||||||
|
works in all cases.
|
||||||
|
|
||||||
|
2) Or a **single string with slashes** to separate storage and stream
|
||||||
|
names (similar to the Unix path syntax). The previous examples would
|
||||||
|
be 'WordDocument' and 'Macros/VBA/ThisDocument'. This syntax is
|
||||||
|
easier, but may fail if a stream or storage name contains a slash.
|
||||||
|
|
||||||
|
Both are case-insensitive.
|
||||||
|
|
||||||
|
Switching between the two is easy:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
slash_path = '/'.join(list_path)
|
||||||
|
list_path = slash_path.split('/')
|
||||||
|
|
||||||
|
Get the list of streams
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
listdir() returns a list of all the streams contained in the OLE file,
|
||||||
|
including those stored in storages. Each stream is listed itself as a
|
||||||
|
list, as described above.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
print(ole.listdir())
|
||||||
|
|
||||||
|
Sample result:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
[['\x01CompObj'], ['\x05DocumentSummaryInformation'], ['\x05SummaryInformation']
|
||||||
|
, ['1Table'], ['Macros', 'PROJECT'], ['Macros', 'PROJECTwm'], ['Macros', 'VBA',
|
||||||
|
'Module1'], ['Macros', 'VBA', 'ThisDocument'], ['Macros', 'VBA', '_VBA_PROJECT']
|
||||||
|
, ['Macros', 'VBA', 'dir'], ['ObjectPool'], ['WordDocument']]
|
||||||
|
|
||||||
|
As an option it is possible to choose if storages should also be listed,
|
||||||
|
with or without streams:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
ole.listdir (streams=False, storages=True)
|
||||||
|
|
||||||
|
Test if known streams/storages exist:
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
exists(path) checks if a given stream or storage exists in the OLE file.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
if ole.exists('worddocument'):
|
||||||
|
print("This is a Word document.")
|
||||||
|
if ole.exists('macros/vba'):
|
||||||
|
print("This document seems to contain VBA macros.")
|
||||||
|
|
||||||
|
Read data from a stream
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
openstream(path) opens a stream as a file-like object.
|
||||||
|
|
||||||
|
The following example extracts the "Pictures" stream from a PPT file:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
pics = ole.openstream('Pictures')
|
||||||
|
data = pics.read()
|
||||||
|
|
||||||
|
|
||||||
|
Get information about a stream/storage
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Several methods can provide the size, type and timestamps of a given
|
||||||
|
stream/storage:
|
||||||
|
|
||||||
|
get\_size(path) returns the size of a stream in bytes:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
s = ole.get_size('WordDocument')
|
||||||
|
|
||||||
|
get\_type(path) returns the type of a stream/storage, as one of the
|
||||||
|
following constants: STGTY\_STREAM for a stream, STGTY\_STORAGE for a
|
||||||
|
storage, STGTY\_ROOT for the root entry, and False for a non existing
|
||||||
|
path.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
t = ole.get_type('WordDocument')
|
||||||
|
|
||||||
|
get\_ctime(path) and get\_mtime(path) return the creation and
|
||||||
|
modification timestamps of a stream/storage, as a Python datetime object
|
||||||
|
with UTC timezone. Please note that these timestamps are only present if
|
||||||
|
the application that created the OLE file explicitly stored them, which
|
||||||
|
is rarely the case. When not present, these methods return None.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
c = ole.get_ctime('WordDocument')
|
||||||
|
m = ole.get_mtime('WordDocument')
|
||||||
|
|
||||||
|
The root storage is a special case: You can get its creation and
|
||||||
|
modification timestamps using the OleFileIO.root attribute:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
c = ole.root.getctime()
|
||||||
|
m = ole.root.getmtime()
|
||||||
|
|
||||||
|
Extract metadata
|
||||||
|
~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
get\_metadata() will check if standard property streams exist, parse all
|
||||||
|
the properties they contain, and return an OleMetadata object with the
|
||||||
|
found properties as attributes.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
meta = ole.get_metadata()
|
||||||
|
print('Author:', meta.author)
|
||||||
|
print('Title:', meta.title)
|
||||||
|
print('Creation date:', meta.create_time)
|
||||||
|
# print all metadata:
|
||||||
|
meta.dump()
|
||||||
|
|
||||||
|
Available attributes include:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
codepage, title, subject, author, keywords, comments, template,
|
||||||
|
last_saved_by, revision_number, total_edit_time, last_printed, create_time,
|
||||||
|
last_saved_time, num_pages, num_words, num_chars, thumbnail,
|
||||||
|
creating_application, security, codepage_doc, category, presentation_target,
|
||||||
|
bytes, lines, paragraphs, slides, notes, hidden_slides, mm_clips,
|
||||||
|
scale_crop, heading_pairs, titles_of_parts, manager, company, links_dirty,
|
||||||
|
chars_with_spaces, unused, shared_doc, link_base, hlinks, hlinks_changed,
|
||||||
|
version, dig_sig, content_type, content_status, language, doc_version
|
||||||
|
|
||||||
|
See the source code of the OleMetadata class for more information.
|
||||||
|
|
||||||
|
Parse a property stream
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
get\_properties(path) can be used to parse any property stream that is
|
||||||
|
not handled by get\_metadata. It returns a dictionary indexed by
|
||||||
|
integers. Each integer is the index of the property, pointing to its
|
||||||
|
value. For example in the standard property stream
|
||||||
|
'05SummaryInformation', the document title is property #2, and the
|
||||||
|
subject is #3.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
p = ole.getproperties('specialprops')
|
||||||
|
|
||||||
|
By default as in the original PIL version, timestamp properties are
|
||||||
|
converted into a number of seconds since Jan 1,1601. With the option
|
||||||
|
convert\_time, you can obtain more convenient Python datetime objects
|
||||||
|
(UTC timezone). If some time properties should not be converted (such as
|
||||||
|
total editing time in '05SummaryInformation'), the list of indexes can
|
||||||
|
be passed as no\_conversion:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
p = ole.getproperties('specialprops', convert_time=True, no_conversion=[10])
|
||||||
|
|
||||||
|
Close the OLE file
|
||||||
|
~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Unless your application is a simple script that terminates after
|
||||||
|
processing an OLE file, do not forget to close each OleFileIO object
|
||||||
|
after parsing to close the file on disk.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
ole.close()
|
||||||
|
|
||||||
|
Use OleFileIO as a script
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
OleFileIO can also be used as a script from the command-line to
|
||||||
|
display the structure of an OLE file and its metadata, for example:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
PIL/OleFileIO.py myfile.doc
|
||||||
|
|
||||||
|
You can use the option -c to check that all streams can be read fully,
|
||||||
|
and -d to generate very verbose debugging information.
|
||||||
|
|
||||||
|
How to contribute
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
The code is available in `a Mercurial repository on
|
||||||
|
bitbucket <https://bitbucket.org/decalage/olefileio_pl>`_. You may use
|
||||||
|
it to submit enhancements or to report any issue.
|
||||||
|
|
||||||
|
If you would like to help us improve this module, or simply provide
|
||||||
|
feedback, please `contact me <http://decalage.info/contact>`_. You can
|
||||||
|
help in many ways:
|
||||||
|
|
||||||
|
- test this module on different platforms / Python versions
|
||||||
|
- find and report bugs
|
||||||
|
- improve documentation, code samples, docstrings
|
||||||
|
- write unittest test cases
|
||||||
|
- provide tricky malformed files
|
||||||
|
|
||||||
|
How to report bugs
|
||||||
|
------------------
|
||||||
|
|
||||||
|
To report a bug, for example a normal file which is not parsed
|
||||||
|
correctly, please use the `issue reporting
|
||||||
|
page <https://bitbucket.org/decalage/olefileio_pl/issues?status=new&status=open>`_,
|
||||||
|
or if you prefer to do it privately, use this `contact
|
||||||
|
form <http://decalage.info/contact>`_. Please provide all the
|
||||||
|
information about the context and how to reproduce the bug.
|
||||||
|
|
||||||
|
If possible please join the debugging output of OleFileIO. For this,
|
||||||
|
launch the following command :
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
PIL/OleFileIO.py -d -c file >debug.txt
|
||||||
|
|
||||||
|
|
||||||
|
Classes and Methods
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
.. automodule:: PIL.OleFileIO
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
:noindex:
|
|
@ -16,6 +16,7 @@ Reference
|
||||||
ImageFont
|
ImageFont
|
||||||
ImageGrab
|
ImageGrab
|
||||||
ImageMath
|
ImageMath
|
||||||
|
ImageMorph
|
||||||
ImageOps
|
ImageOps
|
||||||
ImagePalette
|
ImagePalette
|
||||||
ImagePath
|
ImagePath
|
||||||
|
@ -25,5 +26,6 @@ Reference
|
||||||
ImageTk
|
ImageTk
|
||||||
ImageWin
|
ImageWin
|
||||||
ExifTags
|
ExifTags
|
||||||
|
OleFileIO
|
||||||
PSDraw
|
PSDraw
|
||||||
../PIL
|
../PIL
|
||||||
|
|
Loading…
Reference in New Issue
Block a user