mirror of
https://github.com/python-pillow/Pillow.git
synced 2025-01-13 10:46:16 +03:00
Merge pull request #2199 from jdufresne/dep-olefile
Remove vendored version of olefile Python package in favor of upstream
This commit is contained in:
commit
80b78be21a
|
@ -46,6 +46,8 @@ install:
|
||||||
- pushd depends && ./install_extra_test_images.sh && popd
|
- pushd depends && ./install_extra_test_images.sh && popd
|
||||||
|
|
||||||
|
|
||||||
|
- travis_retry pip install -e .
|
||||||
|
|
||||||
before_script:
|
before_script:
|
||||||
# Qt needs a display for some of the tests, and it's only run on the system site packages install
|
# Qt needs a display for some of the tests, and it's only run on the system site packages install
|
||||||
- "export DISPLAY=:99.0"
|
- "export DISPLAY=:99.0"
|
||||||
|
|
|
@ -17,11 +17,14 @@
|
||||||
|
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
from PIL import Image, ImageFile
|
from PIL import Image, ImageFile, _binary
|
||||||
from PIL.OleFileIO import i8, i32, MAGIC, OleFileIO
|
|
||||||
|
import olefile
|
||||||
|
|
||||||
__version__ = "0.1"
|
__version__ = "0.1"
|
||||||
|
|
||||||
|
i32 = _binary.i32le
|
||||||
|
i8 = _binary.i8
|
||||||
|
|
||||||
# we map from colour field tuples to (mode, rawmode) descriptors
|
# we map from colour field tuples to (mode, rawmode) descriptors
|
||||||
MODES = {
|
MODES = {
|
||||||
|
@ -43,7 +46,7 @@ MODES = {
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
|
|
||||||
def _accept(prefix):
|
def _accept(prefix):
|
||||||
return prefix[:8] == MAGIC
|
return prefix[:8] == olefile.MAGIC
|
||||||
|
|
||||||
|
|
||||||
##
|
##
|
||||||
|
@ -60,7 +63,7 @@ class FpxImageFile(ImageFile.ImageFile):
|
||||||
# to be a FlashPix file
|
# to be a FlashPix file
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.ole = OleFileIO(self.fp)
|
self.ole = olefile.OleFileIO(self.fp)
|
||||||
except IOError:
|
except IOError:
|
||||||
raise SyntaxError("not an FPX file; invalid OLE file")
|
raise SyntaxError("not an FPX file; invalid OLE file")
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,8 @@
|
||||||
|
|
||||||
|
|
||||||
from PIL import Image, TiffImagePlugin
|
from PIL import Image, TiffImagePlugin
|
||||||
from PIL.OleFileIO import MAGIC, OleFileIO
|
|
||||||
|
import olefile
|
||||||
|
|
||||||
__version__ = "0.1"
|
__version__ = "0.1"
|
||||||
|
|
||||||
|
@ -28,7 +29,7 @@ __version__ = "0.1"
|
||||||
|
|
||||||
|
|
||||||
def _accept(prefix):
|
def _accept(prefix):
|
||||||
return prefix[:8] == MAGIC
|
return prefix[:8] == olefile.MAGIC
|
||||||
|
|
||||||
|
|
||||||
##
|
##
|
||||||
|
@ -45,7 +46,7 @@ class MicImageFile(TiffImagePlugin.TiffImageFile):
|
||||||
# to be a Microsoft Image Composer file
|
# to be a Microsoft Image Composer file
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.ole = OleFileIO(self.fp)
|
self.ole = olefile.OleFileIO(self.fp)
|
||||||
except IOError:
|
except IOError:
|
||||||
raise SyntaxError("not an MIC file; invalid OLE file")
|
raise SyntaxError("not an MIC file; invalid OLE file")
|
||||||
|
|
||||||
|
|
|
@ -1,180 +0,0 @@
|
||||||
olefile (formerly OleFileIO_PL)
|
|
||||||
===============================
|
|
||||||
|
|
||||||
[olefile](http://www.decalage.info/olefile) is a Python package to parse, read and write
|
|
||||||
[Microsoft OLE2 files](http://en.wikipedia.org/wiki/Compound_File_Binary_Format)
|
|
||||||
(also called Structured Storage, Compound File Binary Format or Compound Document File Format),
|
|
||||||
such as Microsoft Office 97-2003 documents, vbaProject.bin in MS Office 2007+ files, Image Composer
|
|
||||||
and FlashPix files, Outlook messages, StickyNotes, several Microscopy file formats, McAfee antivirus quarantine files,
|
|
||||||
etc.
|
|
||||||
|
|
||||||
|
|
||||||
**Quick links:** [Home page](http://www.decalage.info/olefile) -
|
|
||||||
[Download/Install](https://bitbucket.org/decalage/olefileio_pl/wiki/Install) -
|
|
||||||
[Documentation](https://bitbucket.org/decalage/olefileio_pl/wiki) -
|
|
||||||
[Report Issues/Suggestions/Questions](https://bitbucket.org/decalage/olefileio_pl/issues?status=new&status=open) -
|
|
||||||
[Contact the author](http://decalage.info/contact) -
|
|
||||||
[Repository](https://bitbucket.org/decalage/olefileio_pl) -
|
|
||||||
[Updates on Twitter](https://twitter.com/decalage2)
|
|
||||||
|
|
||||||
|
|
||||||
News
|
|
||||||
----
|
|
||||||
|
|
||||||
Follow all updates and news on Twitter: <https://twitter.com/decalage2>
|
|
||||||
|
|
||||||
- **2015-01-25 v0.42**: improved handling of special characters in stream/storage names on Python 2.x (using UTF-8
|
|
||||||
instead of Latin-1), fixed bug in listdir with empty storages.
|
|
||||||
- 2014-11-25 v0.41: OleFileIO.open and isOleFile now support OLE files stored in byte strings, fixed installer for
|
|
||||||
python 3, added support for Jython (Niko Ehrenfeuchter)
|
|
||||||
- 2014-10-01 v0.40: renamed OleFileIO_PL to olefile, added initial write support for streams >4K, updated doc and
|
|
||||||
license, improved the setup script.
|
|
||||||
- 2014-07-27 v0.31: fixed support for large files with 4K sectors, thanks to Niko Ehrenfeuchter, Martijn Berger and
|
|
||||||
Dave Jones. Added test scripts from Pillow (by hugovk). Fixed setup for Python 3 (Martin Panter)
|
|
||||||
- 2014-02-04 v0.30: now compatible with Python 3.x, thanks to Martin Panter who did most of the hard work.
|
|
||||||
- 2013-07-24 v0.26: added methods to parse stream/storage timestamps, improved listdir to include storages, fixed
|
|
||||||
parsing of direntry timestamps
|
|
||||||
- 2013-05-27 v0.25: improved metadata extraction, properties parsing and exception handling, fixed
|
|
||||||
[issue #12](https://bitbucket.org/decalage/olefileio_pl/issue/12/error-when-converting-timestamps-in-ole)
|
|
||||||
- 2013-05-07 v0.24: new features to extract metadata (get\_metadata method and OleMetadata class), improved
|
|
||||||
getproperties to convert timestamps to Python datetime
|
|
||||||
- 2012-10-09: published [python-oletools](http://www.decalage.info/python/oletools), a package of analysis tools based
|
|
||||||
on OleFileIO_PL
|
|
||||||
- 2012-09-11 v0.23: added support for file-like objects, fixed [issue #8](https://bitbucket.org/decalage/olefileio_pl/issue/8/bug-with-file-object)
|
|
||||||
- 2012-02-17 v0.22: fixed issues #7 (bug in getproperties) and #2 (added close method)
|
|
||||||
- 2011-10-20: code hosted on bitbucket to ease contributions and bug tracking
|
|
||||||
- 2010-01-24 v0.21: fixed support for big-endian CPUs, such as PowerPC Macs.
|
|
||||||
- 2009-12-11 v0.20: small bugfix in OleFileIO.open when filename is not plain str.
|
|
||||||
- 2009-12-10 v0.19: fixed support for 64 bits platforms (thanks to Ben G. and Martijn for reporting the bug)
|
|
||||||
- see changelog in source code for more info.
|
|
||||||
|
|
||||||
Download/Install
|
|
||||||
----------------
|
|
||||||
|
|
||||||
If you have pip or setuptools installed (pip is included in Python 2.7.9+), you may simply run **pip install olefile**
|
|
||||||
or **easy_install olefile** for the first installation.
|
|
||||||
|
|
||||||
To update olefile, run **pip install -U olefile**.
|
|
||||||
|
|
||||||
Otherwise, see https://bitbucket.org/decalage/olefileio_pl/wiki/Install
|
|
||||||
|
|
||||||
Features
|
|
||||||
--------
|
|
||||||
|
|
||||||
- Parse, read and write any OLE file such as Microsoft Office 97-2003 legacy document formats (Word .doc, Excel .xls,
|
|
||||||
PowerPoint .ppt, Visio .vsd, Project .mpp), Image Composer and FlashPix files, Outlook messages, StickyNotes,
|
|
||||||
Zeiss AxioVision ZVI files, Olympus FluoView OIB files, etc
|
|
||||||
- List all the streams and storages contained in an OLE file
|
|
||||||
- Open streams as files
|
|
||||||
- Parse and read property streams, containing metadata of the file
|
|
||||||
- Portable, pure Python module, no dependency
|
|
||||||
|
|
||||||
olefile can be used as an independent package or with PIL/Pillow.
|
|
||||||
|
|
||||||
olefile is mostly meant for developers. If you are looking for tools to analyze OLE files or to extract data (especially
|
|
||||||
for security purposes such as malware analysis and forensics), then please also check my
|
|
||||||
[python-oletools](http://www.decalage.info/python/oletools), which are built upon olefile and provide a higher-level interface.
|
|
||||||
|
|
||||||
|
|
||||||
History
|
|
||||||
-------
|
|
||||||
|
|
||||||
olefile is based on the OleFileIO module from [PIL](http://www.pythonware.com/products/pil/index.htm), the excellent
|
|
||||||
Python Imaging Library, created and maintained by Fredrik Lundh. The olefile API is still compatible with PIL, but
|
|
||||||
since 2005 I have improved the internal implementation significantly, with new features, bugfixes and a more robust
|
|
||||||
design. From 2005 to 2014 the project was called OleFileIO_PL, and in 2014 I changed its name to olefile to celebrate
|
|
||||||
its 9 years and its new write features.
|
|
||||||
|
|
||||||
As far as I know, olefile is the most complete and robust Python implementation to read MS OLE2 files, portable on
|
|
||||||
several operating systems. (please tell me if you know other similar Python modules)
|
|
||||||
|
|
||||||
Since 2014 olefile/OleFileIO_PL has been integrated into [Pillow](http://python-pillow.org), the friendly fork
|
|
||||||
of PIL. olefile will continue to be improved as a separate project, and new versions will be merged into Pillow
|
|
||||||
regularly.
|
|
||||||
|
|
||||||
|
|
||||||
Main improvements over the original version of OleFileIO in PIL:
|
|
||||||
----------------------------------------------------------------
|
|
||||||
|
|
||||||
- Compatible with Python 3.x and 2.6+
|
|
||||||
- Many bug fixes
|
|
||||||
- Support for files larger than 6.8MB
|
|
||||||
- Support for 64 bits platforms and big-endian CPUs
|
|
||||||
- Robust: many checks to detect malformed files
|
|
||||||
- Runtime option to choose if malformed files should be parsed or raise exceptions
|
|
||||||
- Improved API
|
|
||||||
- Metadata extraction, stream/storage timestamps (e.g. for document forensics)
|
|
||||||
- Can open file-like objects
|
|
||||||
- Added setup.py and install.bat to ease installation
|
|
||||||
- More convenient slash-based syntax for stream paths
|
|
||||||
- Write features
|
|
||||||
|
|
||||||
Documentation
|
|
||||||
-------------
|
|
||||||
|
|
||||||
Please see the [online documentation](https://bitbucket.org/decalage/olefileio_pl/wiki) for more information,
|
|
||||||
especially the [OLE overview](https://bitbucket.org/decalage/olefileio_pl/wiki/OLE_Overview) and the
|
|
||||||
[API page](https://bitbucket.org/decalage/olefileio_pl/wiki/API) which describe how to use olefile in Python applications.
|
|
||||||
A copy of the same documentation is also provided in the doc subfolder of the olefile package.
|
|
||||||
|
|
||||||
|
|
||||||
## Real-life examples ##
|
|
||||||
|
|
||||||
A real-life example: [using OleFileIO_PL for malware analysis and forensics](http://blog.gregback.net/2011/03/using-remnux-for-forensic-puzzle-6/).
|
|
||||||
|
|
||||||
See also [this paper](https://computer-forensics.sans.org/community/papers/gcfa/grow-forensic-tools-taxonomy-python-libraries-helpful-forensic-analysis_6879) about python tools for forensics, which features olefile.
|
|
||||||
|
|
||||||
|
|
||||||
License
|
|
||||||
-------
|
|
||||||
|
|
||||||
olefile (formerly OleFileIO_PL) is copyright (c) 2005-2015 Philippe Lagadec
|
|
||||||
([http://www.decalage.info](http://www.decalage.info))
|
|
||||||
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without modification,
|
|
||||||
are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice, this
|
|
||||||
list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer in the documentation
|
|
||||||
and/or other materials provided with the distribution.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
||||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
||||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
||||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
||||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
||||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
||||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
|
|
||||||
----------
|
|
||||||
|
|
||||||
olefile is based on source code from the OleFileIO module of the Python Imaging Library (PIL) published by Fredrik
|
|
||||||
Lundh under the following license:
|
|
||||||
|
|
||||||
The Python Imaging Library (PIL) is
|
|
||||||
|
|
||||||
Copyright © 1997-2011 by Secret Labs AB
|
|
||||||
Copyright © 1995-2011 by Fredrik Lundh
|
|
||||||
|
|
||||||
By obtaining, using, and/or copying this software and/or its associated documentation, you agree that you have read,
|
|
||||||
understood, and will comply with the following terms and conditions:
|
|
||||||
|
|
||||||
Permission to use, copy, modify, and distribute this software and its associated documentation for any purpose and
|
|
||||||
without fee is hereby granted, provided that the above copyright notice appears in all copies, and that both that
|
|
||||||
copyright notice and this permission notice appear in supporting documentation, and that the name of Secret Labs AB or
|
|
||||||
the author not be used in advertising or publicity pertaining to distribution of the software without specific, written
|
|
||||||
prior permission.
|
|
||||||
|
|
||||||
SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
|
|
||||||
OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR
|
|
||||||
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
|
|
||||||
CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
|
|
||||||
SOFTWARE.
|
|
2307
PIL/OleFileIO.py
2307
PIL/OleFileIO.py
File diff suppressed because it is too large
Load Diff
|
@ -1,147 +0,0 @@
|
||||||
from helper import unittest, PillowTestCase
|
|
||||||
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
import PIL.OleFileIO as OleFileIO
|
|
||||||
|
|
||||||
|
|
||||||
class TestOleFileIo(PillowTestCase):
|
|
||||||
|
|
||||||
def test_isOleFile(self):
|
|
||||||
ole_file = "Tests/images/test-ole-file.doc"
|
|
||||||
|
|
||||||
self.assertTrue(OleFileIO.isOleFile(ole_file))
|
|
||||||
with open(ole_file, 'rb') as fp:
|
|
||||||
self.assertTrue(OleFileIO.isOleFile(fp))
|
|
||||||
self.assertTrue(OleFileIO.isOleFile(fp.read()))
|
|
||||||
|
|
||||||
non_ole_file = "Tests/images/flower.jpg"
|
|
||||||
|
|
||||||
self.assertFalse(OleFileIO.isOleFile(non_ole_file))
|
|
||||||
with open(non_ole_file, 'rb') as fp:
|
|
||||||
self.assertFalse(OleFileIO.isOleFile(fp))
|
|
||||||
self.assertFalse(OleFileIO.isOleFile(fp.read()))
|
|
||||||
|
|
||||||
def test_exists_worddocument(self):
|
|
||||||
# Arrange
|
|
||||||
ole_file = "Tests/images/test-ole-file.doc"
|
|
||||||
ole = OleFileIO.OleFileIO(ole_file)
|
|
||||||
|
|
||||||
# Act
|
|
||||||
exists = ole.exists('worddocument')
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
self.assertTrue(exists)
|
|
||||||
ole.close()
|
|
||||||
|
|
||||||
def test_exists_no_vba_macros(self):
|
|
||||||
# Arrange
|
|
||||||
ole_file = "Tests/images/test-ole-file.doc"
|
|
||||||
ole = OleFileIO.OleFileIO(ole_file)
|
|
||||||
|
|
||||||
# Act
|
|
||||||
exists = ole.exists('macros/vba')
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
self.assertFalse(exists)
|
|
||||||
ole.close()
|
|
||||||
|
|
||||||
def test_get_type(self):
|
|
||||||
# Arrange
|
|
||||||
ole_file = "Tests/images/test-ole-file.doc"
|
|
||||||
ole = OleFileIO.OleFileIO(ole_file)
|
|
||||||
|
|
||||||
# Act
|
|
||||||
entry_type = ole.get_type('worddocument')
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
self.assertEqual(entry_type, OleFileIO.STGTY_STREAM)
|
|
||||||
ole.close()
|
|
||||||
|
|
||||||
def test_get_size(self):
|
|
||||||
# Arrange
|
|
||||||
ole_file = "Tests/images/test-ole-file.doc"
|
|
||||||
ole = OleFileIO.OleFileIO(ole_file)
|
|
||||||
|
|
||||||
# Act
|
|
||||||
size = ole.get_size('worddocument')
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
self.assertGreater(size, 0)
|
|
||||||
ole.close()
|
|
||||||
|
|
||||||
def test_get_rootentry_name(self):
|
|
||||||
# Arrange
|
|
||||||
ole_file = "Tests/images/test-ole-file.doc"
|
|
||||||
ole = OleFileIO.OleFileIO(ole_file)
|
|
||||||
|
|
||||||
# Act
|
|
||||||
root = ole.get_rootentry_name()
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
self.assertEqual(root, "Root Entry")
|
|
||||||
ole.close()
|
|
||||||
|
|
||||||
def test_meta(self):
|
|
||||||
# Arrange
|
|
||||||
ole_file = "Tests/images/test-ole-file.doc"
|
|
||||||
ole = OleFileIO.OleFileIO(ole_file)
|
|
||||||
|
|
||||||
# Act
|
|
||||||
meta = ole.get_metadata()
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
self.assertEqual(meta.author, b"Laurence Ipsum")
|
|
||||||
self.assertEqual(meta.num_pages, 1)
|
|
||||||
ole.close()
|
|
||||||
|
|
||||||
def test_gettimes(self):
|
|
||||||
# Arrange
|
|
||||||
ole_file = "Tests/images/test-ole-file.doc"
|
|
||||||
ole = OleFileIO.OleFileIO(ole_file)
|
|
||||||
root_entry = ole.direntries[0]
|
|
||||||
|
|
||||||
# Act
|
|
||||||
ctime = root_entry.getctime()
|
|
||||||
mtime = root_entry.getmtime()
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
self.assertIsNone(ctime)
|
|
||||||
self.assertIsInstance(mtime, datetime.datetime)
|
|
||||||
self.assertEqual(mtime.year, 2014)
|
|
||||||
ole.close()
|
|
||||||
|
|
||||||
def test_listdir(self):
|
|
||||||
# Arrange
|
|
||||||
ole_file = "Tests/images/test-ole-file.doc"
|
|
||||||
ole = OleFileIO.OleFileIO(ole_file)
|
|
||||||
|
|
||||||
# Act
|
|
||||||
dirlist = ole.listdir()
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
self.assertIn(['WordDocument'], dirlist)
|
|
||||||
ole.close()
|
|
||||||
|
|
||||||
def test_debug(self):
|
|
||||||
# Arrange
|
|
||||||
ole_file = "Tests/images/test-ole-file.doc"
|
|
||||||
ole = OleFileIO.OleFileIO(ole_file)
|
|
||||||
meta = ole.get_metadata()
|
|
||||||
|
|
||||||
# Act
|
|
||||||
OleFileIO.set_debug_mode(True)
|
|
||||||
ole.dumpdirectory()
|
|
||||||
meta.dump()
|
|
||||||
|
|
||||||
OleFileIO.set_debug_mode(False)
|
|
||||||
ole.dumpdirectory()
|
|
||||||
meta.dump()
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
# No assert, just check they run ok
|
|
||||||
ole.close()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
|
@ -1,364 +0,0 @@
|
||||||
.. py:module:: PIL.OleFileIO
|
|
||||||
.. py:currentmodule:: PIL.OleFileIO
|
|
||||||
|
|
||||||
:py:mod:`OleFileIO` Module
|
|
||||||
===========================
|
|
||||||
|
|
||||||
The :py:mod:`OleFileIO` module reads Microsoft OLE2 files (also called
|
|
||||||
Structured Storage or Microsoft Compound Document File Format), such
|
|
||||||
as Microsoft Office documents, Image Composer and FlashPix files, and
|
|
||||||
Outlook messages.
|
|
||||||
|
|
||||||
This module is the `OleFileIO\_PL`_ project by Philippe Lagadec, v0.42,
|
|
||||||
merged back into Pillow.
|
|
||||||
|
|
||||||
.. _OleFileIO\_PL: http://www.decalage.info/python/olefileio
|
|
||||||
|
|
||||||
How to use this module
|
|
||||||
----------------------
|
|
||||||
|
|
||||||
For more information, see also the file **PIL/OleFileIO.py**, sample
|
|
||||||
code at the end of the module itself, and docstrings within the code.
|
|
||||||
|
|
||||||
About the structure of OLE files
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
An OLE file can be seen as a mini file system or a Zip archive: It
|
|
||||||
contains **streams** of data that look like files embedded within the
|
|
||||||
OLE file. Each stream has a name. For example, the main stream of a MS
|
|
||||||
Word document containing its text is named "WordDocument".
|
|
||||||
|
|
||||||
An OLE file can also contain **storages**. A storage is a folder that
|
|
||||||
contains streams or other storages. For example, a MS Word document with
|
|
||||||
VBA macros has a storage called "Macros".
|
|
||||||
|
|
||||||
Special streams can contain **properties**. A property is a specific
|
|
||||||
value that can be used to store information such as the metadata of a
|
|
||||||
document (title, author, creation date, etc). Property stream names
|
|
||||||
usually start with the character '05'.
|
|
||||||
|
|
||||||
For example, a typical MS Word document may look like this:
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
\x05DocumentSummaryInformation (stream)
|
|
||||||
\x05SummaryInformation (stream)
|
|
||||||
WordDocument (stream)
|
|
||||||
Macros (storage)
|
|
||||||
PROJECT (stream)
|
|
||||||
PROJECTwm (stream)
|
|
||||||
VBA (storage)
|
|
||||||
Module1 (stream)
|
|
||||||
ThisDocument (stream)
|
|
||||||
_VBA_PROJECT (stream)
|
|
||||||
dir (stream)
|
|
||||||
ObjectPool (storage)
|
|
||||||
|
|
||||||
Test if a file is an OLE container
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Use isOleFile to check if the first bytes of the file contain the Magic
|
|
||||||
for OLE files, before opening it. isOleFile returns True if it is an OLE
|
|
||||||
file, False otherwise.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
assert OleFileIO.isOleFile('myfile.doc')
|
|
||||||
|
|
||||||
Open an OLE file from disk
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Create an OleFileIO object with the file path as parameter:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
ole = OleFileIO.OleFileIO('myfile.doc')
|
|
||||||
|
|
||||||
Open an OLE file from a file-like object
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
This is useful if the file is not on disk, e.g. already stored in a
|
|
||||||
string or as a file-like object.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
ole = OleFileIO.OleFileIO(f)
|
|
||||||
|
|
||||||
For example the code below reads a file into a string, then uses BytesIO
|
|
||||||
to turn it into a file-like object.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
data = open('myfile.doc', 'rb').read()
|
|
||||||
f = io.BytesIO(data) # or StringIO.StringIO for Python 2.x
|
|
||||||
ole = OleFileIO.OleFileIO(f)
|
|
||||||
|
|
||||||
How to handle malformed OLE files
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
By default, the parser is configured to be as robust and permissive as
|
|
||||||
possible, allowing to parse most malformed OLE files. Only fatal errors
|
|
||||||
will raise an exception. It is possible to tell the parser to be more
|
|
||||||
strict in order to raise exceptions for files that do not fully conform
|
|
||||||
to the OLE specifications, using the raise\_defect option:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
ole = OleFileIO.OleFileIO('myfile.doc', raise_defects=DEFECT_INCORRECT)
|
|
||||||
|
|
||||||
When the parsing is done, the list of non-fatal issues detected is
|
|
||||||
available as a list in the parsing\_issues attribute of the OleFileIO
|
|
||||||
object:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
print('Non-fatal issues raised during parsing:')
|
|
||||||
if ole.parsing_issues:
|
|
||||||
for exctype, msg in ole.parsing_issues:
|
|
||||||
print('- %s: %s' % (exctype.__name__, msg))
|
|
||||||
else:
|
|
||||||
print('None')
|
|
||||||
|
|
||||||
Syntax for stream and storage path
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Two different syntaxes are allowed for methods that need or return the
|
|
||||||
path of streams and storages:
|
|
||||||
|
|
||||||
1) Either a **list of strings** including all the storages from the root
|
|
||||||
up to the stream/storage name. For example a stream called
|
|
||||||
"WordDocument" at the root will have ['WordDocument'] as full path. A
|
|
||||||
stream called "ThisDocument" located in the storage "Macros/VBA" will
|
|
||||||
be ['Macros', 'VBA', 'ThisDocument']. This is the original syntax
|
|
||||||
from PIL. While hard to read and not very convenient, this syntax
|
|
||||||
works in all cases.
|
|
||||||
|
|
||||||
2) Or a **single string with slashes** to separate storage and stream
|
|
||||||
names (similar to the Unix path syntax). The previous examples would
|
|
||||||
be 'WordDocument' and 'Macros/VBA/ThisDocument'. This syntax is
|
|
||||||
easier, but may fail if a stream or storage name contains a slash.
|
|
||||||
|
|
||||||
Both are case-insensitive.
|
|
||||||
|
|
||||||
Switching between the two is easy:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
slash_path = '/'.join(list_path)
|
|
||||||
list_path = slash_path.split('/')
|
|
||||||
|
|
||||||
Get the list of streams
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
listdir() returns a list of all the streams contained in the OLE file,
|
|
||||||
including those stored in storages. Each stream is listed itself as a
|
|
||||||
list, as described above.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
print(ole.listdir())
|
|
||||||
|
|
||||||
Sample result:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
[['\x01CompObj'], ['\x05DocumentSummaryInformation'], ['\x05SummaryInformation']
|
|
||||||
, ['1Table'], ['Macros', 'PROJECT'], ['Macros', 'PROJECTwm'], ['Macros', 'VBA',
|
|
||||||
'Module1'], ['Macros', 'VBA', 'ThisDocument'], ['Macros', 'VBA', '_VBA_PROJECT']
|
|
||||||
, ['Macros', 'VBA', 'dir'], ['ObjectPool'], ['WordDocument']]
|
|
||||||
|
|
||||||
As an option it is possible to choose if storages should also be listed,
|
|
||||||
with or without streams:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
ole.listdir (streams=False, storages=True)
|
|
||||||
|
|
||||||
Test if known streams/storages exist:
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
exists(path) checks if a given stream or storage exists in the OLE file.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
if ole.exists('worddocument'):
|
|
||||||
print("This is a Word document.")
|
|
||||||
if ole.exists('macros/vba'):
|
|
||||||
print("This document seems to contain VBA macros.")
|
|
||||||
|
|
||||||
Read data from a stream
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
openstream(path) opens a stream as a file-like object.
|
|
||||||
|
|
||||||
The following example extracts the "Pictures" stream from a PPT file:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
pics = ole.openstream('Pictures')
|
|
||||||
data = pics.read()
|
|
||||||
|
|
||||||
|
|
||||||
Get information about a stream/storage
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Several methods can provide the size, type and timestamps of a given
|
|
||||||
stream/storage:
|
|
||||||
|
|
||||||
get\_size(path) returns the size of a stream in bytes:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
s = ole.get_size('WordDocument')
|
|
||||||
|
|
||||||
get\_type(path) returns the type of a stream/storage, as one of the
|
|
||||||
following constants: STGTY\_STREAM for a stream, STGTY\_STORAGE for a
|
|
||||||
storage, STGTY\_ROOT for the root entry, and False for a non existing
|
|
||||||
path.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
t = ole.get_type('WordDocument')
|
|
||||||
|
|
||||||
get\_ctime(path) and get\_mtime(path) return the creation and
|
|
||||||
modification timestamps of a stream/storage, as a Python datetime object
|
|
||||||
with UTC timezone. Please note that these timestamps are only present if
|
|
||||||
the application that created the OLE file explicitly stored them, which
|
|
||||||
is rarely the case. When not present, these methods return None.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
c = ole.get_ctime('WordDocument')
|
|
||||||
m = ole.get_mtime('WordDocument')
|
|
||||||
|
|
||||||
The root storage is a special case: You can get its creation and
|
|
||||||
modification timestamps using the OleFileIO.root attribute:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
c = ole.root.getctime()
|
|
||||||
m = ole.root.getmtime()
|
|
||||||
|
|
||||||
Extract metadata
|
|
||||||
~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
get\_metadata() will check if standard property streams exist, parse all
|
|
||||||
the properties they contain, and return an OleMetadata object with the
|
|
||||||
found properties as attributes.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
meta = ole.get_metadata()
|
|
||||||
print('Author:', meta.author)
|
|
||||||
print('Title:', meta.title)
|
|
||||||
print('Creation date:', meta.create_time)
|
|
||||||
# print all metadata:
|
|
||||||
meta.dump()
|
|
||||||
|
|
||||||
Available attributes include:
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
codepage, title, subject, author, keywords, comments, template,
|
|
||||||
last_saved_by, revision_number, total_edit_time, last_printed, create_time,
|
|
||||||
last_saved_time, num_pages, num_words, num_chars, thumbnail,
|
|
||||||
creating_application, security, codepage_doc, category, presentation_target,
|
|
||||||
bytes, lines, paragraphs, slides, notes, hidden_slides, mm_clips,
|
|
||||||
scale_crop, heading_pairs, titles_of_parts, manager, company, links_dirty,
|
|
||||||
chars_with_spaces, unused, shared_doc, link_base, hlinks, hlinks_changed,
|
|
||||||
version, dig_sig, content_type, content_status, language, doc_version
|
|
||||||
|
|
||||||
See the source code of the OleMetadata class for more information.
|
|
||||||
|
|
||||||
Parse a property stream
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
get\_properties(path) can be used to parse any property stream that is
|
|
||||||
not handled by get\_metadata. It returns a dictionary indexed by
|
|
||||||
integers. Each integer is the index of the property, pointing to its
|
|
||||||
value. For example in the standard property stream
|
|
||||||
'05SummaryInformation', the document title is property #2, and the
|
|
||||||
subject is #3.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
p = ole.getproperties('specialprops')
|
|
||||||
|
|
||||||
By default as in the original PIL version, timestamp properties are
|
|
||||||
converted into a number of seconds since Jan 1,1601. With the option
|
|
||||||
convert\_time, you can obtain more convenient Python datetime objects
|
|
||||||
(UTC timezone). If some time properties should not be converted (such as
|
|
||||||
total editing time in '05SummaryInformation'), the list of indexes can
|
|
||||||
be passed as no\_conversion:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
p = ole.getproperties('specialprops', convert_time=True, no_conversion=[10])
|
|
||||||
|
|
||||||
Close the OLE file
|
|
||||||
~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Unless your application is a simple script that terminates after
|
|
||||||
processing an OLE file, do not forget to close each OleFileIO object
|
|
||||||
after parsing to close the file on disk.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
ole.close()
|
|
||||||
|
|
||||||
Use OleFileIO as a script
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
OleFileIO can also be used as a script from the command-line to
|
|
||||||
display the structure of an OLE file and its metadata, for example:
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
PIL/OleFileIO.py myfile.doc
|
|
||||||
|
|
||||||
You can use the option -c to check that all streams can be read fully,
|
|
||||||
and -d to generate very verbose debugging information.
|
|
||||||
|
|
||||||
How to contribute
|
|
||||||
-----------------
|
|
||||||
|
|
||||||
The code is available in `a Mercurial repository on
|
|
||||||
bitbucket <https://bitbucket.org/decalage/olefileio_pl>`_. You may use
|
|
||||||
it to submit enhancements or to report any issue.
|
|
||||||
|
|
||||||
If you would like to help us improve this module, or simply provide
|
|
||||||
feedback, please `contact me <http://decalage.info/contact>`_. You can
|
|
||||||
help in many ways:
|
|
||||||
|
|
||||||
- test this module on different platforms / Python versions
|
|
||||||
- find and report bugs
|
|
||||||
- improve documentation, code samples, docstrings
|
|
||||||
- write unittest test cases
|
|
||||||
- provide tricky malformed files
|
|
||||||
|
|
||||||
How to report bugs
|
|
||||||
------------------
|
|
||||||
|
|
||||||
To report a bug, for example a normal file which is not parsed
|
|
||||||
correctly, please use the `issue reporting
|
|
||||||
page <https://bitbucket.org/decalage/olefileio_pl/issues?status=new&status=open>`_,
|
|
||||||
or if you prefer to do it privately, use this `contact
|
|
||||||
form <http://decalage.info/contact>`_. Please provide all the
|
|
||||||
information about the context and how to reproduce the bug.
|
|
||||||
|
|
||||||
If possible please join the debugging output of OleFileIO. For this,
|
|
||||||
launch the following command :
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
PIL/OleFileIO.py -d -c file >debug.txt
|
|
||||||
|
|
||||||
|
|
||||||
Classes and Methods
|
|
||||||
-------------------
|
|
||||||
|
|
||||||
.. automodule:: PIL.OleFileIO
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
:noindex:
|
|
|
@ -27,7 +27,6 @@ Reference
|
||||||
ImageWin
|
ImageWin
|
||||||
ExifTags
|
ExifTags
|
||||||
TiffTags
|
TiffTags
|
||||||
OleFileIO
|
|
||||||
PSDraw
|
PSDraw
|
||||||
PixelAccess
|
PixelAccess
|
||||||
PyAccess
|
PyAccess
|
||||||
|
|
1
setup.py
1
setup.py
|
@ -771,6 +771,7 @@ try:
|
||||||
include_package_data=True,
|
include_package_data=True,
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
scripts=glob.glob("Scripts/*.py"),
|
scripts=glob.glob("Scripts/*.py"),
|
||||||
|
install_requires=['olefile'],
|
||||||
test_suite='nose.collector',
|
test_suite='nose.collector',
|
||||||
keywords=["Imaging", ],
|
keywords=["Imaging", ],
|
||||||
license='Standard PIL License',
|
license='Standard PIL License',
|
||||||
|
|
Loading…
Reference in New Issue
Block a user