diff --git a/.travis.yml b/.travis.yml index a75c80873..f94f8376a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,7 +20,8 @@ install: - "travis_retry sudo apt-get -qq install libfreetype6-dev liblcms2-dev python-qt4 ghostscript libffi-dev libjpeg-turbo-progs cmake imagemagick" - "travis_retry pip install cffi" - "travis_retry pip install coverage nose" - - "travis_retry pip install pyroma" + # Pyroma tests sometimes hang on PyPy; skip for PyPy + - if [ $TRAVIS_PYTHON_VERSION != "pypy" ]; then travis_retry pip install pyroma; fi - if [ "$TRAVIS_PYTHON_VERSION" == "2.6" ]; then travis_retry pip install unittest2; fi diff --git a/CHANGES.rst b/CHANGES.rst index 2bd4ff0a0..518a4244f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,6 +4,18 @@ Changelog (Pillow) 2.9.0 (Unreleased) ------------------ +- Provide n_frames attribute to multi-frame formats #1261 + [anntzer, radarhere] + +- Add duration and loop set to GifImagePlugin #1172 + [radarhere] + +- Ico files are little endian #1232 + [wiredfool] + +- Upgrade olefile from 0.30 to 0.42b #1226 + [radarhere, decalage2] + - Setting transparency value to 0 when the tRNS contains only null byte(s) #1239 [juztin] @@ -31,6 +43,12 @@ Changelog (Pillow) - Tiff: allow writing floating point tag values #1113 [bpedersen2] +2.8.2 (2015-06-06) +------------------ + +- Bug fix: Fixed Tiff handling of bad EXIF data + [radarhere] + 2.8.1 (2015-04-02) ------------------ diff --git a/Makefile b/Makefile index a729e4088..4d96c497d 100644 --- a/Makefile +++ b/Makefile @@ -1,28 +1,5 @@ -.PHONY: pre clean install test inplace coverage test-dep help docs livedocs - -help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " clean remove build products" - @echo " install make and install" - @echo " test run tests on installed pillow" - @echo " inplace make inplace extension" - @echo " coverage run coverage test (in progress)" - @echo " docs make html docs" - @echo " docserver run an http server on the docs directory" - @echo " test-dep install coveraget and test dependencies" - -pre: - virtualenv . - bin/pip install -r requirements.txt - bin/python setup.py develop - bin/python selftest.py - bin/nosetests Tests/test_*.py - bin/python setup.py install - bin/python test-installed.py - check-manifest - pyroma . - viewdoc +# https://www.gnu.org/software/make/manual/html_node/Phony-Targets.html +.PHONY: clean coverage doc docserve help inplace install install-req release-test sdist test upload upload-test clean: python setup.py clean @@ -30,46 +7,70 @@ clean: rm -r build || true find . -name __pycache__ | xargs rm -r || true -install: - python setup.py install - python selftest.py --installed - -test: - python test-installed.py - -inplace: clean - python setup.py build_ext --inplace - coverage: -# requires nose-cov coverage erase coverage run --parallel-mode --include=PIL/* selftest.py nosetests --with-cov --cov='PIL/' --cov-report=html Tests/test_*.py -# doesn't combine properly before report, -# writing report instead of displaying invalid report +# Doesn't combine properly before report, writing report instead of displaying invalid report. rm -r htmlcov || true coverage combine coverage report -test-dep: - pip install coveralls nose nose-cov pep8 pyflakes - -docs: +doc: $(MAKE) -C docs html -docserver: +docserve: cd docs/_build/html && python -mSimpleHTTPServer 2> /dev/null& -# Test sdist upload via test.pythonpackages.com, no creds required -# .pypirc: -# [test] -# username: -# password: -# repository = http://test.pythonpackages.com -sdisttest: - python setup.py sdist --format=zip upload -r test -sdistup: - python setup.py sdist --format=zip upload - python setup.py sdist upload +help: + @echo "Welcome to Pillow development. Please use \`make ' where is one of" + @echo " clean remove build products" + @echo " coverage run coverage test (in progress)" + @echo " doc make html docs" + @echo " docserve run an http server on the docs directory" + @echo " html to make standalone HTML files" + @echo " inplace make inplace extension" + @echo " install make and install" + @echo " install-req install documentation and test dependencies" + @echo " release-test run code and package tests before release" + @echo " test run tests on installed pillow" + @echo " upload build and upload sdists to PyPI" + @echo " upload-test build and upload sdists to test.pythonpackages.com" + +inplace: clean + python setup.py build_ext --inplace + +install: + python setup.py install + python selftest.py --installed + +install-req: + pip install -r requirements.txt + +release-test: + $(MAKE) install-req + python setup.py develop + python selftest.py + nosetests Tests/test_*.py + python setup.py install + python test-installed.py + check-manifest + pyroma . + viewdoc + sdist: - python setup.py sdist --format=zip + python setup.py sdist --format=gztar,zip + +test: + python test-installed.py + +# https://docs.python.org/2/distutils/packageindex.html#the-pypirc-file +upload-test: +# [test] +# username: +# password: +# repository = http://test.pythonpackages.com + python setup.py sdist --format=gztar,zip upload -r test + +upload: + python setup.py sdist --format=gztar,zip upload diff --git a/PIL/DcxImagePlugin.py b/PIL/DcxImagePlugin.py index 0940b3935..978c90e80 100644 --- a/PIL/DcxImagePlugin.py +++ b/PIL/DcxImagePlugin.py @@ -62,6 +62,10 @@ class DcxImageFile(PcxImageFile): self.__fp = self.fp self.seek(0) + @property + def n_frames(self): + return len(self._offset) + def seek(self, frame): if frame >= len(self._offset): raise EOFError("attempt to seek outside DCX directory") diff --git a/PIL/FliImagePlugin.py b/PIL/FliImagePlugin.py index 4ecaccdc4..0660ddeb6 100644 --- a/PIL/FliImagePlugin.py +++ b/PIL/FliImagePlugin.py @@ -86,9 +86,10 @@ class FliImageFile(ImageFile.ImageFile): self.palette = ImagePalette.raw("RGB", b"".join(palette)) # set things up to decode first frame - self.frame = -1 + self.__frame = -1 self.__fp = self.fp - + self.__rewind = self.fp.tell() + self._n_frames = None self.seek(0) def _palette(self, palette, shift): @@ -109,11 +110,35 @@ class FliImageFile(ImageFile.ImageFile): palette[i] = (r, g, b) i += 1 - def seek(self, frame): + @property + def n_frames(self): + if self._n_frames is None: + current = self.tell() + try: + while True: + self.seek(self.tell() + 1) + except EOFError: + self._n_frames = self.tell() + 1 + self.seek(current) + return self._n_frames - if frame != self.frame + 1: + def seek(self, frame): + if frame == self.__frame: + return + if frame < self.__frame: + self._seek(0) + for f in range(self.__frame + 1, frame + 1): + self._seek(f) + + def _seek(self, frame): + if frame == 0: + self.__frame = -1 + self.__fp.seek(self.__rewind) + self.__offset = 128 + + if frame != self.__frame + 1: raise ValueError("cannot seek to frame %d" % frame) - self.frame = frame + self.__frame = frame # move to next frame self.fp = self.__fp @@ -128,11 +153,10 @@ class FliImageFile(ImageFile.ImageFile): self.decodermaxblock = framesize self.tile = [("fli", (0, 0)+self.size, self.__offset, None)] - self.__offset = self.__offset + framesize + self.__offset += framesize def tell(self): - - return self.frame + return self.__frame # # registry diff --git a/PIL/GifImagePlugin.py b/PIL/GifImagePlugin.py index d2b2f7fee..150773b67 100644 --- a/PIL/GifImagePlugin.py +++ b/PIL/GifImagePlugin.py @@ -87,9 +87,30 @@ class GifImageFile(ImageFile.ImageFile): self.__fp = self.fp # FIXME: hack self.__rewind = self.fp.tell() - self.seek(0) # get ready to read first frame + self._n_frames = None + self._seek(0) # get ready to read first frame + + @property + def n_frames(self): + if self._n_frames is None: + current = self.tell() + try: + while True: + self.seek(self.tell() + 1) + except EOFError: + self._n_frames = self.tell() + 1 + self.seek(current) + return self._n_frames def seek(self, frame): + if frame == self.__frame: + return + if frame < self.__frame: + self._seek(0) + for f in range(self.__frame + 1, frame + 1): + self._seek(f) + + def _seek(self, frame): if frame == 0: # rewind @@ -292,57 +313,10 @@ def _save(im, fp, filename): for s in header: fp.write(s) - flags = 0 - - try: - interlace = im.encoderinfo["interlace"] - except KeyError: - interlace = 1 - - # workaround for @PIL153 - if min(im.size) < 16: - interlace = 0 - - if interlace: - flags = flags | 64 - - try: - transparency = im.encoderinfo["transparency"] - except KeyError: - pass - else: - transparency = int(transparency) - # optimize the block away if transparent color is not used - transparent_color_exists = True - # adjust the transparency index after optimize - if used_palette_colors is not None and len(used_palette_colors) < 256: - for i in range(len(used_palette_colors)): - if used_palette_colors[i] == transparency: - transparency = i - transparent_color_exists = True - break - else: - transparent_color_exists = False - - # transparency extension block - if transparent_color_exists: - fp.write(b"!" + - o8(249) + # extension intro - o8(4) + # length - o8(1) + # transparency info present - o16(0) + # duration - o8(transparency) + # transparency index - o8(0)) - # local image header - fp.write(b"," + - o16(0) + o16(0) + # bounding box - o16(im.size[0]) + # size - o16(im.size[1]) + - o8(flags) + # flags - o8(8)) # bits + get_local_header(fp, im) - im_out.encoderconfig = (8, interlace) + im_out.encoderconfig = (8, get_interlace(im)) ImageFile._save(im_out, fp, [("gif", (0, 0)+im.size, 0, RAWMODE[im_out.mode])]) @@ -356,6 +330,85 @@ def _save(im, fp, filename): pass +def get_interlace(im): + try: + interlace = im.encoderinfo["interlace"] + except KeyError: + interlace = 1 + + # workaround for @PIL153 + if min(im.size) < 16: + interlace = 0 + + return interlace + + +def get_local_header(fp, im, offset=(0, 0)): + transparent_color_exists = False + try: + transparency = im.encoderinfo["transparency"] + except KeyError: + pass + else: + transparency = int(transparency) + # optimize the block away if transparent color is not used + transparent_color_exists = True + + if _get_optimize(im, im.encoderinfo): + used_palette_colors = _get_used_palette_colors(im) + + # adjust the transparency index after optimize + if len(used_palette_colors) < 256: + for i in range(len(used_palette_colors)): + if used_palette_colors[i] == transparency: + transparency = i + transparent_color_exists = True + break + else: + transparent_color_exists = False + + if "duration" in im.encoderinfo: + duration = int(im.encoderinfo["duration"] / 10) + else: + duration = 0 + if transparent_color_exists or duration != 0: + transparency_flag = 1 if transparent_color_exists else 0 + if not transparent_color_exists: + transparency = 0 + + fp.write(b"!" + + o8(249) + # extension intro + o8(4) + # length + o8(transparency_flag) + # transparency info present + o16(duration) + # duration + o8(transparency) + # transparency index + o8(0)) + + if "loop" in im.encoderinfo: + number_of_loops = im.encoderinfo["loop"] + fp.write(b"!" + + o8(255) + # extension intro + o8(11) + + b"NETSCAPE2.0" + + o8(3) + + o8(1) + + o16(number_of_loops) + # number of loops + o8(0)) + + flags = 0 + + if get_interlace(im): + flags = flags | 64 + + fp.write(b"," + + o16(offset[0]) + # offset + o16(offset[1]) + + o16(im.size[0]) + # size + o16(im.size[1]) + + o8(flags) + # flags + o8(8)) # bits + + def _save_netpbm(im, fp, filename): # @@ -405,11 +458,26 @@ def _save_netpbm(im, fp, filename): # -------------------------------------------------------------------- # GIF utilities +def _get_optimize(im, info): + return im.mode in ("P", "L") and info and info.get("optimize", 0) + + +def _get_used_palette_colors(im): + used_palette_colors = [] + + # check which colors are used + i = 0 + for count in im.histogram(): + if count: + used_palette_colors.append(i) + i += 1 + + return used_palette_colors + + def getheader(im, palette=None, info=None): """Return a list of strings representing a GIF header""" - optimize = info and info.get("optimize", 0) - # Header Block # http://www.matthewflickinger.com/lab/whatsinagif/bits_and_bytes.asp header = [ @@ -431,15 +499,8 @@ def getheader(im, palette=None, info=None): used_palette_colors = palette_bytes = None - if im.mode in ("P", "L") and optimize: - used_palette_colors = [] - - # check which colors are used - i = 0 - for count in im.histogram(): - if count: - used_palette_colors.append(i) - i += 1 + if _get_optimize(im, info): + used_palette_colors = _get_used_palette_colors(im) # create the new palette if not every color is used if len(used_palette_colors) < 256: @@ -510,13 +571,7 @@ def getdata(im, offset=(0, 0), **params): im.encoderinfo = params # local image header - fp.write(b"," + - o16(offset[0]) + # offset - o16(offset[1]) + - o16(im.size[0]) + # size - o16(im.size[1]) + - o8(0) + # flags - o8(8)) # bits + get_local_header(fp, im, offset) ImageFile._save(im, fp, [("gif", (0, 0)+im.size, 0, RAWMODE[im.mode])]) diff --git a/PIL/IcoImagePlugin.py b/PIL/IcoImagePlugin.py index 8a2144463..778edaf9a 100644 --- a/PIL/IcoImagePlugin.py +++ b/PIL/IcoImagePlugin.py @@ -48,7 +48,7 @@ def _save(im, fp, filename): width, height = im.size filter(lambda x: False if (x[0] > width or x[1] > height or x[0] > 255 or x[1] > 255) else True, sizes) - fp.write(struct.pack("H", len(sizes))) # idCount(2) + fp.write(struct.pack("= self.info[FRAMES]: diff --git a/PIL/Image.py b/PIL/Image.py index 274e7ee0e..1c95bfca0 100644 --- a/PIL/Image.py +++ b/PIL/Image.py @@ -1810,7 +1810,7 @@ class Image(object): self.readonly = 0 self.pyaccess = None - # FIXME: the different tranform methods need further explanation + # FIXME: the different transform methods need further explanation # instead of bloating the method docs, add a separate chapter. def transform(self, size, method, data=None, resample=NEAREST, fill=1): """ diff --git a/PIL/ImageCms.py b/PIL/ImageCms.py index 6cd2f5d2d..ebf127df3 100644 --- a/PIL/ImageCms.py +++ b/PIL/ImageCms.py @@ -64,7 +64,7 @@ pyCMS 0.0.2 alpha Jan 6, 2002 - Added try/except statements arount type() checks of + Added try/except statements around type() checks of potential CObjects... Python won't let you use type() on them, and raises a TypeError (stupid, if you ask me!) @@ -123,8 +123,8 @@ FLAGS = { "NOTCACHE": 64, # Inhibit 1-pixel cache "NOTPRECALC": 256, "NULLTRANSFORM": 512, # Don't transform anyway - "HIGHRESPRECALC": 1024, # Use more memory to give better accurancy - "LOWRESPRECALC": 2048, # Use less memory to minimize resouces + "HIGHRESPRECALC": 1024, # Use more memory to give better accuracy + "LOWRESPRECALC": 2048, # Use less memory to minimize resources "WHITEBLACKCOMPENSATION": 8192, "BLACKPOINTCOMPENSATION": 8192, "GAMUTCHECK": 4096, # Out of Gamut alarm @@ -573,7 +573,7 @@ def applyTransform(im, transform, inPlace=0): This function applies a pre-calculated transform (from ImageCms.buildTransform() or ImageCms.buildTransformFromOpenProfiles()) to an image. The transform can be used for multiple images, saving - considerable calcuation time if doing the same conversion multiple times. + considerable calculation time if doing the same conversion multiple times. If you want to modify im in-place instead of receiving a new image as the return value, set inPlace to TRUE. This can only be done if @@ -858,7 +858,7 @@ def getDefaultIntent(profile): If an error occurs while trying to obtain the default intent, a PyCMSError is raised. - Use this function to determine the default (and usually best optomized) + Use this function to determine the default (and usually best optimized) rendering intent for this profile. Most profiles support multiple rendering intents, but are intended mostly for one type of conversion. If you wish to use a different intent than returned, use @@ -914,7 +914,7 @@ def isIntentSupported(profile, intent, direction): see the pyCMS documentation for details on rendering intents and what they do. - :param direction: Integer specifing if the profile is to be used for input, + :param direction: Integer specifying if the profile is to be used for input, output, or proof INPUT = 0 (or use ImageCms.DIRECTION_INPUT) diff --git a/PIL/ImageEnhance.py b/PIL/ImageEnhance.py index fbacbee8f..56b5c0199 100644 --- a/PIL/ImageEnhance.py +++ b/PIL/ImageEnhance.py @@ -73,7 +73,7 @@ class Contrast(_Enhance): class Brightness(_Enhance): """Adjust image brightness. - This class can be used to control the brighntess of an image. An + This class can be used to control the brightness of an image. An enhancement factor of 0.0 gives a black image. A factor of 1.0 gives the original image. """ diff --git a/PIL/JpegImagePlugin.py b/PIL/JpegImagePlugin.py index e15042504..5cae90073 100644 --- a/PIL/JpegImagePlugin.py +++ b/PIL/JpegImagePlugin.py @@ -4,7 +4,7 @@ # # JPEG (JFIF) file handling # -# See "Digital Compression and Coding of Continous-Tone Still Images, +# See "Digital Compression and Coding of Continuous-Tone Still Images, # Part 1, Requirements and Guidelines" (CCITT T.81 / ISO 10918-1) # # History: diff --git a/PIL/MicImagePlugin.py b/PIL/MicImagePlugin.py index 5aed618ac..aa41bf359 100644 --- a/PIL/MicImagePlugin.py +++ b/PIL/MicImagePlugin.py @@ -71,6 +71,10 @@ class MicImageFile(TiffImagePlugin.TiffImageFile): self.seek(0) + @property + def n_frames(self): + return len(self.images) + def seek(self, frame): try: diff --git a/PIL/MpoImagePlugin.py b/PIL/MpoImagePlugin.py index c345fd327..9d21728b9 100644 --- a/PIL/MpoImagePlugin.py +++ b/PIL/MpoImagePlugin.py @@ -62,6 +62,10 @@ class MpoImageFile(JpegImagePlugin.JpegImageFile): def load_seek(self, pos): self.__fp.seek(pos) + @property + def n_frames(self): + return self.__framecount + def seek(self, frame): if frame < 0 or frame >= self.__framecount: raise EOFError("no more images in MPO file") diff --git a/PIL/OleFileIO.py b/PIL/OleFileIO.py index d3c1cd9a0..4cf106d97 100755 --- a/PIL/OleFileIO.py +++ b/PIL/OleFileIO.py @@ -1,47 +1,70 @@ #!/usr/bin/env python -## OleFileIO_PL: -## Module to read Microsoft OLE2 files (also called Structured Storage or -## Microsoft Compound Document File Format), such as Microsoft Office -## documents, Image Composer and FlashPix files, Outlook messages, ... -## This version is compatible with Python 2.6+ and 3.x -## version 0.30 2014-02-04 Philippe Lagadec - http://www.decalage.info - -## Project website: http://www.decalage.info/python/olefileio - -## Improved version of the OleFileIO module from PIL library v1.1.6 -## See: http://www.pythonware.com/products/pil/index.htm - -## The Python Imaging Library (PIL) is - -## Copyright (c) 1997-2005 by Secret Labs AB -## Copyright (c) 1995-2005 by Fredrik Lundh - -## OleFileIO_PL changes are Copyright (c) 2005-2014 by Philippe Lagadec - -## See source code and LICENSE.txt for information on usage and redistribution. - -## WARNING: THIS IS (STILL) WORK IN PROGRESS. +# olefile (formerly OleFileIO_PL) version 0.42 2015-01-25 +# +# Module to read/write Microsoft OLE2 files (also called Structured Storage or +# Microsoft Compound Document File Format), such as Microsoft Office 97-2003 +# documents, Image Composer and FlashPix files, Outlook messages, ... +# This version is compatible with Python 2.6+ and 3.x +# +# Project website: http://www.decalage.info/olefile +# +# olefile is copyright (c) 2005-2015 Philippe Lagadec (http://www.decalage.info) +# +# olefile is based on the OleFileIO module from the PIL library v1.1.6 +# See: http://www.pythonware.com/products/pil/index.htm +# +# The Python Imaging Library (PIL) is +# Copyright (c) 1997-2005 by Secret Labs AB +# Copyright (c) 1995-2005 by Fredrik Lundh +# +# See source code and LICENSE.txt for information on usage and redistribution. -# Starting with OleFileIO_PL v0.30, only Python 2.6+ and 3.x is supported +# Since OleFileIO_PL v0.30, only Python 2.6+ and 3.x is supported # This import enables print() as a function rather than a keyword # (main requirement to be compatible with Python 3.x) # The comment on the line below should be printed on Python 2.5 or older: -from __future__ import print_function # This version of OleFileIO_PL requires Python 2.6+ or 3.x. +from __future__ import print_function # This version of olefile requires Python 2.6+ or 3.x. -__author__ = "Philippe Lagadec, Fredrik Lundh (Secret Labs AB)" -__date__ = "2014-02-04" -__version__ = '0.30' +__author__ = "Philippe Lagadec" +__date__ = "2015-01-25" +__version__ = '0.42b' #--- LICENSE ------------------------------------------------------------------ -# OleFileIO_PL is an improved version of the OleFileIO module from the -# Python Imaging Library (PIL). - -# OleFileIO_PL changes are Copyright (c) 2005-2014 by Philippe Lagadec +# olefile (formerly OleFileIO_PL) is copyright (c) 2005-2015 Philippe Lagadec +# (http://www.decalage.info) # +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# ---------- +# PIL License: +# +# olefile is based on source code from the OleFileIO module of the Python +# Imaging Library (PIL) published by Fredrik Lundh under the following license: + # The Python Imaging Library (PIL) is # Copyright (c) 1997-2005 by Secret Labs AB # Copyright (c) 1995-2005 by Fredrik Lundh @@ -67,7 +90,7 @@ __version__ = '0.30' # PERFORMANCE OF THIS SOFTWARE. #----------------------------------------------------------------------------- -# CHANGELOG: (only OleFileIO_PL changes compared to PIL 1.1.6) +# CHANGELOG: (only olefile/OleFileIO_PL changes compared to PIL 1.1.6) # 2005-05-11 v0.10 PL: - a few fixes for Python 2.4 compatibility # (all changes flagged with [PL]) # 2006-02-22 v0.11 PL: - a few fixes for some Office 2003 documents which raise @@ -142,10 +165,29 @@ __version__ = '0.30' # 2014-02-04 v0.30 PL: - upgraded code to support Python 3.x by Martin Panter # - several fixes for Python 2.6 (xrange, MAGIC) # - reused i32 from Pillow's _binary +# 2014-07-18 v0.31 - preliminary support for 4K sectors +# 2014-07-27 v0.31 PL: - a few improvements in OleFileIO.open (header parsing) +# - Fixed loadfat for large files with 4K sectors (issue #3) +# 2014-07-30 v0.32 PL: - added write_sect to write sectors to disk +# - added write_mode option to OleFileIO.__init__ and open +# 2014-07-31 PL: - fixed padding in write_sect for Python 3, added checks +# - added write_stream to write a stream to disk +# 2014-09-26 v0.40 PL: - renamed OleFileIO_PL to olefile +# 2014-11-09 NE: - added support for Jython (Niko Ehrenfeuchter) +# 2014-11-13 v0.41 PL: - improved isOleFile and OleFileIO.open to support OLE +# data in a string buffer and file-like objects. +# 2014-11-21 PL: - updated comments according to Pillow's commits +# 2015-01-24 v0.42 PL: - changed the default path name encoding from Latin-1 +# to UTF-8 on Python 2.x (Unicode on Python 3.x) +# - added path_encoding option to override the default +# - fixed a bug in _list when a storage is empty #----------------------------------------------------------------------------- # TODO (for version 1.0): -# + isOleFile should accept file-like objects like open +# + get rid of print statements, to simplify Python 2.x and 3.x support +# + add is_stream and is_storage +# + remove leading and trailing slashes where a path is used +# + add functions path_list2str and path_str2list # + fix how all the methods handle unicode str and/or bytes as arguments # + add path attrib to _OleDirEntry, set it once and for all in init or # append_kids (then listdir/_list can be simplified) @@ -177,30 +219,16 @@ __version__ = '0.30' # - move all debug code (and maybe dump methods) to a separate module, with # a class which inherits OleFileIO ? # - fix docstrings to follow epydoc format -# - add support for 4K sectors ? # - add support for big endian byte order ? # - create a simple OLE explorer with wxPython # FUTURE EVOLUTIONS to add write support: -# 1) add ability to write a stream back on disk from BytesIO (same size, no -# change in FAT/MiniFAT). -# 2) rename a stream/storage if it doesn't change the RB tree -# 3) use rbtree module to update the red-black tree + any rename -# 4) remove a stream/storage: free sectors in FAT/MiniFAT -# 5) allocate new sectors in FAT/MiniFAT -# 6) create new storage/stream -#----------------------------------------------------------------------------- +# see issue #6 on Bitbucket: +# https://bitbucket.org/decalage/olefileio_pl/issue/6/improve-olefileio_pl-to-write-ole-files + +#----------------------------------------------------------------------------- +# NOTES from PIL 1.1.6: -# -# THIS IS WORK IN PROGRESS -# -# The Python Imaging Library -# $Id$ -# -# stuff to deal with OLE2 Structured Storage files. this module is -# used by PIL to read Image Composer and FlashPix files, but can also -# be used to read other files of this type. -# # History: # 1997-01-20 fl Created # 1997-01-22 fl Fixed 64-bit portability quirk @@ -222,12 +250,6 @@ __version__ = '0.30' # "If this document and functionality of the Software conflict, # the actual functionality of the Software represents the correct # functionality" -- Microsoft, in the OLE format specification -# -# Copyright (c) Secret Labs AB 1997. -# Copyright (c) Fredrik Lundh 1997. -# -# See the README file for information on usage and redistribution. -# #------------------------------------------------------------------------------ @@ -239,8 +261,11 @@ import array import os.path import datetime -#[PL] Define explicitly the public API to avoid private objects in pydoc: -__all__ = ['OleFileIO', 'isOleFile', 'MAGIC'] +#=== COMPATIBILITY WORKAROUNDS ================================================ + +# [PL] Define explicitly the public API to avoid private objects in pydoc: +#TODO: add more +# __all__ = ['OleFileIO', 'isOleFile', 'MAGIC'] # For Python 3.x, need to redefine long as int: if str is not bytes: @@ -254,29 +279,54 @@ except: # no xrange, for Python 3 it was renamed as range: iterrange = range -#[PL] workaround to fix an issue with array item size on 64 bits systems: +# [PL] workaround to fix an issue with array item size on 64 bits systems: if array.array('L').itemsize == 4: # on 32 bits platforms, long integers in an array are 32 bits: UINT32 = 'L' elif array.array('I').itemsize == 4: # on 64 bits platforms, integers in an array are 32 bits: UINT32 = 'I' +elif array.array('i').itemsize == 4: + # On 64 bit Jython, signed integers ('i') are the only way to store our 32 + # bit values in an array in a *somewhat* reasonable way, as the otherwise + # perfectly suited 'H' (unsigned int, 32 bits) results in a completely + # unusable behaviour. This is most likely caused by the fact that Java + # doesn't have unsigned values, and thus Jython's "array" implementation, + # which is based on "jarray", doesn't have them either. + # NOTE: to trick Jython into converting the values it would normally + # interpret as "signed" into "unsigned", a binary-and operation with + # 0xFFFFFFFF can be used. This way it is possible to use the same comparing + # operations on all platforms / implementations. The corresponding code + # lines are flagged with a 'JYTHON-WORKAROUND' tag below. + UINT32 = 'i' else: raise ValueError('Need to fix a bug with 32 bit arrays, please contact author...') -#[PL] These workarounds were inspired from the Path module +# [PL] These workarounds were inspired from the Path module # (see http://www.jorendorff.com/articles/python/path/) try: basestring except NameError: basestring = str -#[PL] Experimental setting: if True, OLE filenames will be kept in Unicode +# [PL] Experimental setting: if True, OLE filenames will be kept in Unicode # if False (default PIL behaviour), all filenames are converted to Latin-1. -KEEP_UNICODE_NAMES = False +KEEP_UNICODE_NAMES = True -#[PL] DEBUG display mode: False by default, use set_debug_mode() or "-d" on +if sys.version_info[0] < 3: + # On Python 2.x, the default encoding for path names is UTF-8: + DEFAULT_PATH_ENCODING = 'utf-8' +else: + # On Python 3.x, the default encoding for path names is Unicode (None): + DEFAULT_PATH_ENCODING = None + + +#=== DEBUGGING =============================================================== + +#TODO: replace this by proper logging + +# [PL] DEBUG display mode: False by default, use set_debug_mode() or "-d" on # command line to change it. DEBUG_MODE = False @@ -287,13 +337,15 @@ def debug_print(msg): def debug_pass(msg): pass + + debug = debug_pass def set_debug_mode(debug_mode): """ Set debug mode on or off, to control display of debugging messages. - mode: True or False + :param mode: True or False """ global DEBUG_MODE, debug DEBUG_MODE = debug_mode @@ -302,18 +354,22 @@ def set_debug_mode(debug_mode): else: debug = debug_pass + +#=== CONSTANTS =============================================================== + +# magic bytes that should be at the beginning of every OLE file: MAGIC = b'\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1' # [PL]: added constants for Sector IDs (from AAF specifications) -MAXREGSECT = 0xFFFFFFFA; # maximum SECT -DIFSECT = 0xFFFFFFFC; # (-4) denotes a DIFAT sector in a FAT -FATSECT = 0xFFFFFFFD; # (-3) denotes a FAT sector in a FAT -ENDOFCHAIN = 0xFFFFFFFE; # (-2) end of a virtual stream chain -FREESECT = 0xFFFFFFFF; # (-1) unallocated sector +MAXREGSECT = 0xFFFFFFFA # (-6) maximum SECT +DIFSECT = 0xFFFFFFFC # (-4) denotes a DIFAT sector in a FAT +FATSECT = 0xFFFFFFFD # (-3) denotes a FAT sector in a FAT +ENDOFCHAIN = 0xFFFFFFFE # (-2) end of a virtual stream chain +FREESECT = 0xFFFFFFFF # (-1) unallocated sector # [PL]: added constants for Directory Entry IDs (from AAF specifications) -MAXREGSID = 0xFFFFFFFA; # maximum directory entry ID -NOSTREAM = 0xFFFFFFFF; # (-1) unallocated directory entry +MAXREGSID = 0xFFFFFFFA # (-6) maximum directory entry ID +NOSTREAM = 0xFFFFFFFF # (-1) unallocated directory entry # [PL] object types in storage (from AAF specifications) STGTY_EMPTY = 0 # empty directory entry (according to OpenOffice.org doc) @@ -328,15 +384,15 @@ STGTY_ROOT = 5 # element is a root storage # -------------------------------------------------------------------- # property types -VT_EMPTY=0; VT_NULL=1; VT_I2=2; VT_I4=3; VT_R4=4; VT_R8=5; VT_CY=6; -VT_DATE=7; VT_BSTR=8; VT_DISPATCH=9; VT_ERROR=10; VT_BOOL=11; -VT_VARIANT=12; VT_UNKNOWN=13; VT_DECIMAL=14; VT_I1=16; VT_UI1=17; -VT_UI2=18; VT_UI4=19; VT_I8=20; VT_UI8=21; VT_INT=22; VT_UINT=23; -VT_VOID=24; VT_HRESULT=25; VT_PTR=26; VT_SAFEARRAY=27; VT_CARRAY=28; -VT_USERDEFINED=29; VT_LPSTR=30; VT_LPWSTR=31; VT_FILETIME=64; -VT_BLOB=65; VT_STREAM=66; VT_STORAGE=67; VT_STREAMED_OBJECT=68; -VT_STORED_OBJECT=69; VT_BLOB_OBJECT=70; VT_CF=71; VT_CLSID=72; -VT_VECTOR=0x1000; +VT_EMPTY = 0; VT_NULL = 1; VT_I2 = 2; VT_I4 = 3; VT_R4 = 4; VT_R8 = 5; VT_CY = 6; +VT_DATE = 7; VT_BSTR = 8; VT_DISPATCH = 9; VT_ERROR = 10; VT_BOOL = 11; +VT_VARIANT = 12; VT_UNKNOWN = 13; VT_DECIMAL = 14; VT_I1 = 16; VT_UI1 = 17; +VT_UI2 = 18; VT_UI4 = 19; VT_I8 = 20; VT_UI8 = 21; VT_INT = 22; VT_UINT = 23; +VT_VOID = 24; VT_HRESULT = 25; VT_PTR = 26; VT_SAFEARRAY = 27; VT_CARRAY = 28; +VT_USERDEFINED = 29; VT_LPSTR = 30; VT_LPWSTR = 31; VT_FILETIME = 64; +VT_BLOB = 65; VT_STREAM = 66; VT_STORAGE = 67; VT_STREAMED_OBJECT = 68; +VT_STORED_OBJECT = 69; VT_BLOB_OBJECT = 70; VT_CF = 71; VT_CLSID = 72; +VT_VECTOR = 0x1000; # map property id to name (for debugging purposes) @@ -352,7 +408,7 @@ for keyword, var in list(vars().items()): WORD_CLSID = "00020900-0000-0000-C000-000000000046" #TODO: check Excel, PPT, ... -#[PL]: Defect levels to classify parsing errors - see OleFileIO._raise_defect() +# [PL]: Defect levels to classify parsing errors - see OleFileIO._raise_defect() DEFECT_UNSURE = 10 # a case which looks weird, but not sure it's a defect DEFECT_POTENTIAL = 20 # a potential defect DEFECT_INCORRECT = 30 # an error according to specifications, but parsing @@ -360,23 +416,45 @@ DEFECT_INCORRECT = 30 # an error according to specifications, but parsing DEFECT_FATAL = 40 # an error which cannot be ignored, parsing is # impossible -#[PL] add useful constants to __all__: -for key in list(vars().keys()): - if key.startswith('STGTY_') or key.startswith('DEFECT_'): - __all__.append(key) +# Minimal size of an empty OLE file, with 512-bytes sectors = 1536 bytes +# (this is used in isOleFile and OleFile.open) +MINIMAL_OLEFILE_SIZE = 1536 + +# [PL] add useful constants to __all__: +# for key in list(vars().keys()): +# if key.startswith('STGTY_') or key.startswith('DEFECT_'): +# __all__.append(key) -#--- FUNCTIONS ---------------------------------------------------------------- +#=== FUNCTIONS =============================================================== -def isOleFile (filename): +def isOleFile(filename): """ - Test if file is an OLE container (according to its header). + Test if a file is an OLE container (according to the magic bytes in its header). + + :param filename: string-like or file-like object, OLE file to parse + + - if filename is a string smaller than 1536 bytes, it is the path + of the file to open. (bytes or unicode string) + - if filename is a string longer than 1535 bytes, it is parsed + as the content of an OLE file in memory. (bytes type only) + - if filename is a file-like object (with read and seek methods), + it is parsed as-is. - :param filename: file name or path (str, unicode) :returns: True if OLE, False otherwise. """ - f = open(filename, 'rb') - header = f.read(len(MAGIC)) + # check if filename is a string-like or file-like object: + if hasattr(filename, 'read'): + # file-like object: use it directly + header = filename.read(len(MAGIC)) + # just in case, seek back to start of file: + filename.seek(0) + elif isinstance(filename, bytes) and len(filename) >= MINIMAL_OLEFILE_SIZE: + # filename is a bytes string containing the OLE file to be parsed: + header = filename[:len(MAGIC)] + else: + # string-like object: filename of file on disk + header = open(filename, 'rb').read(len(MAGIC)) if header == MAGIC: return True else: @@ -399,23 +477,20 @@ def i16(c, o = 0): """ Converts a 2-bytes (16 bits) string to an integer. - :param c: string containing bytes to convert - :param o: offset of bytes to convert in string + c: string containing bytes to convert + o: offset of bytes to convert in string """ - return i8(c[o]) | (i8(c[o+1])<<8) + return struct.unpack("=len(fat): + if sect < 0 or sect >= len(fat): debug('sect=%d (%X) / len(fat)=%d' % (sect, sect, len(fat))) - debug('i=%d / nb_sectors=%d' %(i, nb_sectors)) + debug('i=%d / nb_sectors=%d' % (i, nb_sectors)) ## tmp_data = b"".join(data) ## f = open('test_debug.bin', 'wb') ## f.write(tmp_data) @@ -721,7 +771,7 @@ class _OleStream(io.BytesIO): # Note: if sector is the last of the file, sometimes it is not a # complete sector (of 512 or 4K), so we may read less than # sectorsize. - if len(sector_data)!=sectorsize and sect!=(len(fat)-1): + if len(sector_data) != sectorsize and sect != (len(fat)-1): debug('sect=%d / len(fat)=%d, seek=%d / filesize=%d, len read=%d' % (sect, len(fat), offset+sectorsize*sect, filesize, len(sector_data))) debug('seek+len(read)=%d' % (offset+sectorsize*sect+len(sector_data))) @@ -729,11 +779,11 @@ class _OleStream(io.BytesIO): data.append(sector_data) # jump to next sector in the FAT: try: - sect = fat[sect] + sect = fat[sect] & 0xFFFFFFFF # JYTHON-WORKAROUND except IndexError: # [PL] if pointer is out of the FAT an exception is raised raise IOError('incorrect OLE FAT, sector index out of range') - #[PL] Last sector should be a "end of chain" marker: + # [PL] Last sector should be a "end of chain" marker: if sect != ENDOFCHAIN: raise IOError('incorrect last sector index in OLE stream') data = b"".join(data) @@ -762,7 +812,7 @@ class _OleDirectoryEntry(object): """ OLE2 Directory Entry """ - #[PL] parsing code moved from OleFileIO.loaddirectory + # [PL] parsing code moved from OleFileIO.loaddirectory # struct to parse directory entries: # <: little-endian byte order, standard sizes @@ -827,23 +877,26 @@ class _OleDirectoryEntry(object): sizeHigh ) = struct.unpack(_OleDirectoryEntry.STRUCT_DIRENTRY, entry) if self.entry_type not in [STGTY_ROOT, STGTY_STORAGE, STGTY_STREAM, STGTY_EMPTY]: - olefile._raise_defect(DEFECT_INCORRECT, 'unhandled OLE storage type') + olefile.raise_defect(DEFECT_INCORRECT, 'unhandled OLE storage type') # only first directory entry can (and should) be root: if self.entry_type == STGTY_ROOT and sid != 0: - olefile._raise_defect(DEFECT_INCORRECT, 'duplicate OLE root entry') + olefile.raise_defect(DEFECT_INCORRECT, 'duplicate OLE root entry') if sid == 0 and self.entry_type != STGTY_ROOT: - olefile._raise_defect(DEFECT_INCORRECT, 'incorrect OLE root entry') + olefile.raise_defect(DEFECT_INCORRECT, 'incorrect OLE root entry') #debug (struct.unpack(fmt_entry, entry[:len_entry])) # name should be at most 31 unicode characters + null character, # so 64 bytes in total (31*2 + 2): - if namelength>64: - olefile._raise_defect(DEFECT_INCORRECT, 'incorrect DirEntry name length') + if namelength > 64: + olefile.raise_defect(DEFECT_INCORRECT, 'incorrect DirEntry name length') # if exception not raised, namelength is set to the maximum value: namelength = 64 # only characters without ending null char are kept: name = name[:(namelength-2)] - # name is converted from unicode to Latin-1: - self.name = _unicode(name) + #TODO: check if the name is actually followed by a null unicode character ([MS-CFB] 2.6.1) + #TODO: check if the name does not contain forbidden characters: + # [MS-CFB] 2.6.1: "The following characters are illegal and MUST NOT be part of the name: '/', '\', ':', '!'." + # name is converted from UTF-16LE to the path encoding specified in the OleFileIO: + self.name = olefile._decode_utf16_str(name) debug('DirEntry SID=%d: %s' % (self.sid, repr(self.name))) debug(' - type: %d' % self.entry_type) @@ -858,21 +911,21 @@ class _OleDirectoryEntry(object): if sizeHigh != 0 and sizeHigh != 0xFFFFFFFF: debug('sectorsize=%d, sizeLow=%d, sizeHigh=%d (%X)' % (olefile.sectorsize, sizeLow, sizeHigh, sizeHigh)) - olefile._raise_defect(DEFECT_UNSURE, 'incorrect OLE stream size') + olefile.raise_defect(DEFECT_UNSURE, 'incorrect OLE stream size') self.size = sizeLow else: - self.size = sizeLow + (long(sizeHigh)<<32) + self.size = sizeLow + (long(sizeHigh) << 32) debug(' - size: %d (sizeLow=%d, sizeHigh=%d)' % (self.size, sizeLow, sizeHigh)) self.clsid = _clsid(clsid) # a storage should have a null size, BUT some implementations such as # Word 8 for Mac seem to allow non-null values => Potential defect: if self.entry_type == STGTY_STORAGE and self.size != 0: - olefile._raise_defect(DEFECT_POTENTIAL, 'OLE storage with size>0') + olefile.raise_defect(DEFECT_POTENTIAL, 'OLE storage with size>0') # check if stream is not already referenced elsewhere: - if self.entry_type in (STGTY_ROOT, STGTY_STREAM) and self.size>0: + if self.entry_type in (STGTY_ROOT, STGTY_STREAM) and self.size > 0: if self.size < olefile.minisectorcutoff \ - and self.entry_type==STGTY_STREAM: # only streams can be in MiniFAT + and self.entry_type == STGTY_STREAM: # only streams can be in MiniFAT # ministream object minifat = True else: @@ -907,8 +960,8 @@ class _OleDirectoryEntry(object): Walk through red-black tree of children of this directory entry to add all of them to the kids list. (recursive method) - child_sid : index of child directory entry to use, or None when called - first time for the root. (only used during recursion) + :param child_sid : index of child directory entry to use, or None when called + first time for the root. (only used during recursion) """ # [PL] this method was added to use simple recursion instead of a complex # algorithm. @@ -916,8 +969,8 @@ class _OleDirectoryEntry(object): if child_sid == NOSTREAM: return # check if child SID is in the proper range: - if child_sid<0 or child_sid>=len(self.olefile.direntries): - self.olefile._raise_defect(DEFECT_FATAL, 'OLE DirEntry index out of range') + if child_sid < 0 or child_sid >= len(self.olefile.direntries): + self.olefile.raise_defect(DEFECT_FATAL, 'OLE DirEntry index out of range') # get child direntry: child = self.olefile._load_direntry(child_sid) #direntries[child_sid] debug('append_kids: child_sid=%d - %s - sid_left=%d, sid_right=%d, sid_child=%d' @@ -929,7 +982,7 @@ class _OleDirectoryEntry(object): # Check if its name is not already used (case-insensitive): name_lower = child.name.lower() if name_lower in self.kids_dict: - self.olefile._raise_defect(DEFECT_INCORRECT, + self.olefile.raise_defect(DEFECT_INCORRECT, "Duplicate filename in OLE storage") # Then the child_sid _OleDirectoryEntry object is appended to the # kids list and dictionary: @@ -937,7 +990,7 @@ class _OleDirectoryEntry(object): self.kids_dict[name_lower] = child # Check if kid was not already referenced in a storage: if child.used: - self.olefile._raise_defect(DEFECT_INCORRECT, + self.olefile.raise_defect(DEFECT_INCORRECT, 'OLE Entry referenced more than once') child.used = True # Finally walk through right side of the tree: @@ -983,7 +1036,7 @@ class _OleDirectoryEntry(object): Return modification time of a directory entry. :returns: None if modification time is null, a python datetime object - otherwise (UTC timezone) + otherwise (UTC timezone) new in version 0.26 """ @@ -996,7 +1049,7 @@ class _OleDirectoryEntry(object): Return creation time of a directory entry. :returns: None if modification time is null, a python datetime object - otherwise (UTC timezone) + otherwise (UTC timezone) new in version 0.26 """ @@ -1037,34 +1090,60 @@ class OleFileIO(object): TIFF files). """ - def __init__(self, filename = None, raise_defects=DEFECT_FATAL): + def __init__(self, filename=None, raise_defects=DEFECT_FATAL, + write_mode=False, debug=False, path_encoding=DEFAULT_PATH_ENCODING): """ - Constructor for OleFileIO class. + Constructor for the OleFileIO class. :param filename: file to open. + + - if filename is a string smaller than 1536 bytes, it is the path + of the file to open. (bytes or unicode string) + - if filename is a string longer than 1535 bytes, it is parsed + as the content of an OLE file in memory. (bytes type only) + - if filename is a file-like object (with read, seek and tell methods), + it is parsed as-is. + :param raise_defects: minimal level for defects to be raised as exceptions. - (use DEFECT_FATAL for a typical application, DEFECT_INCORRECT for a - security-oriented application, see source code for details) + (use DEFECT_FATAL for a typical application, DEFECT_INCORRECT for a + security-oriented application, see source code for details) + + :param write_mode: bool, if True the file is opened in read/write mode instead + of read-only by default. + + :param debug: bool, set debug mode + + :param path_encoding: None or str, name of the codec to use for path + names (streams and storages), or None for Unicode. + Unicode by default on Python 3+, UTF-8 on Python 2.x. + (new in olefile 0.42, was hardcoded to Latin-1 until olefile v0.41) """ + set_debug_mode(debug) # minimal level for defects to be raised as exceptions: self._raise_defects_level = raise_defects # list of defects/issues not raised as exceptions: # tuples of (exception type, message) self.parsing_issues = [] + self.write_mode = write_mode + self.path_encoding = path_encoding + self._filesize = None + self.fp = None if filename: - self.open(filename) + self.open(filename, write_mode=write_mode) - def _raise_defect(self, defect_level, message, exception_type=IOError): + def raise_defect(self, defect_level, message, exception_type=IOError): """ This method should be called for any defect found during file parsing. It may raise an IOError exception according to the minimal level chosen for the OleFileIO object. :param defect_level: defect level, possible values are: - DEFECT_UNSURE : a case which looks weird, but not sure it's a defect - DEFECT_POTENTIAL : a potential defect - DEFECT_INCORRECT : an error according to specifications, but parsing can go on - DEFECT_FATAL : an error which cannot be ignored, parsing is impossible + + - DEFECT_UNSURE : a case which looks weird, but not sure it's a defect + - DEFECT_POTENTIAL : a potential defect + - DEFECT_INCORRECT : an error according to specifications, but parsing can go on + - DEFECT_FATAL : an error which cannot be ignored, parsing is impossible + :param message: string describing the defect, used with raised exception. :param exception_type: exception class to be raised, IOError by default """ @@ -1075,31 +1154,68 @@ class OleFileIO(object): # just record the issue, no exception raised: self.parsing_issues.append((exception_type, message)) - def open(self, filename): + def _decode_utf16_str(self, utf16_str, errors='replace'): """ - Open an OLE2 file. - Reads the header, FAT and directory. + Decode a string encoded in UTF-16 LE format, as found in the OLE + directory or in property streams. Return a string encoded + according to the path_encoding specified for the OleFileIO object. - :param filename: string-like or file-like object + :param utf16_str: bytes string encoded in UTF-16 LE format + :param errors: str, see python documentation for str.decode() + :return: str, encoded according to path_encoding """ - #[PL] check if filename is a string-like or file-like object: + unicode_str = utf16_str.decode('UTF-16LE', errors) + if self.path_encoding: + # an encoding has been specified for path names: + return unicode_str.encode(self.path_encoding, errors) + else: + # path_encoding=None, return the Unicode string as-is: + return unicode_str + + def open(self, filename, write_mode=False): + """ + Open an OLE2 file in read-only or read/write mode. + Read and parse the header, FAT and directory. + + :param filename: string-like or file-like object, OLE file to parse + + - if filename is a string smaller than 1536 bytes, it is the path + of the file to open. (bytes or unicode string) + - if filename is a string longer than 1535 bytes, it is parsed + as the content of an OLE file in memory. (bytes type only) + - if filename is a file-like object (with read, seek and tell methods), + it is parsed as-is. + + :param write_mode: bool, if True the file is opened in read/write mode instead + of read-only by default. (ignored if filename is not a path) + """ + self.write_mode = write_mode + # [PL] check if filename is a string-like or file-like object: # (it is better to check for a read() method) if hasattr(filename, 'read'): - # file-like object + #TODO: also check seek and tell methods? + # file-like object: use it directly self.fp = filename + elif isinstance(filename, bytes) and len(filename) >= MINIMAL_OLEFILE_SIZE: + # filename is a bytes string containing the OLE file to be parsed: + # convert it to BytesIO + self.fp = io.BytesIO(filename) else: # string-like object: filename of file on disk - #TODO: if larger than 1024 bytes, this could be the actual data => BytesIO - self.fp = open(filename, "rb") - # old code fails if filename is not a plain string: - #if isinstance(filename, (bytes, basestring)): - # self.fp = open(filename, "rb") - #else: - # self.fp = filename + if self.write_mode: + # open file in mode 'read with update, binary' + # According to https://docs.python.org/2/library/functions.html#open + # 'w' would truncate the file, 'a' may only append on some Unixes + mode = 'r+b' + else: + # read-only mode by default + mode = 'rb' + self.fp = open(filename, mode) # obtain the filesize by using seek and tell, which should work on most # file-like objects: #TODO: do it above, using getsize with filename when possible? #TODO: fix code to fail with clear exception when filesize cannot be obtained + filesize = 0 self.fp.seek(0, os.SEEK_END) try: filesize = self.fp.tell() @@ -1115,7 +1231,7 @@ class OleFileIO(object): header = self.fp.read(512) if len(header) != 512 or header[:8] != MAGIC: - self._raise_defect(DEFECT_FATAL, "not an OLE2 structured storage file") + self.raise_defect(DEFECT_FATAL, "not an OLE2 structured storage file") # [PL] header structure according to AAF specifications: ##Header @@ -1156,7 +1272,7 @@ class OleFileIO(object): # '<' indicates little-endian byte ordering for Intel (cf. struct module help) fmt_header = '<8s16sHHHHHHLLLLLLLLLL' header_size = struct.calcsize(fmt_header) - debug( "fmt_header size = %d, +FAT = %d" % (header_size, header_size + 109*4) ) + debug("fmt_header size = %d, +FAT = %d" % (header_size, header_size + 109*4)) header1 = header[:header_size] ( self.Sig, @@ -1177,49 +1293,62 @@ class OleFileIO(object): self.sectDifStart, self.csectDif ) = struct.unpack(fmt_header, header1) - debug(struct.unpack(fmt_header, header1)) + debug(struct.unpack(fmt_header, header1)) if self.Sig != MAGIC: # OLE signature should always be present - self._raise_defect(DEFECT_FATAL, "incorrect OLE signature") + self.raise_defect(DEFECT_FATAL, "incorrect OLE signature") if self.clsid != bytearray(16): # according to AAF specs, CLSID should always be zero - self._raise_defect(DEFECT_INCORRECT, "incorrect CLSID in OLE header") - debug( "MinorVersion = %d" % self.MinorVersion ) - debug( "DllVersion = %d" % self.DllVersion ) + self.raise_defect(DEFECT_INCORRECT, "incorrect CLSID in OLE header") + debug("MinorVersion = %d" % self.MinorVersion) + debug("DllVersion = %d" % self.DllVersion) if self.DllVersion not in [3, 4]: # version 3: usual format, 512 bytes per sector # version 4: large format, 4K per sector - self._raise_defect(DEFECT_INCORRECT, "incorrect DllVersion in OLE header") - debug( "ByteOrder = %X" % self.ByteOrder ) + self.raise_defect(DEFECT_INCORRECT, "incorrect DllVersion in OLE header") + debug("ByteOrder = %X" % self.ByteOrder) if self.ByteOrder != 0xFFFE: # For now only common little-endian documents are handled correctly - self._raise_defect(DEFECT_FATAL, "incorrect ByteOrder in OLE header") + self.raise_defect(DEFECT_FATAL, "incorrect ByteOrder in OLE header") # TODO: add big-endian support for documents created on Mac ? + # But according to [MS-CFB] ? v20140502, ByteOrder MUST be 0xFFFE. self.SectorSize = 2**self.SectorShift - debug( "SectorSize = %d" % self.SectorSize ) + debug("SectorSize = %d" % self.SectorSize) if self.SectorSize not in [512, 4096]: - self._raise_defect(DEFECT_INCORRECT, "incorrect SectorSize in OLE header") - if (self.DllVersion==3 and self.SectorSize!=512) \ - or (self.DllVersion==4 and self.SectorSize!=4096): - self._raise_defect(DEFECT_INCORRECT, "SectorSize does not match DllVersion in OLE header") + self.raise_defect(DEFECT_INCORRECT, "incorrect SectorSize in OLE header") + if (self.DllVersion == 3 and self.SectorSize != 512) \ + or (self.DllVersion == 4 and self.SectorSize != 4096): + self.raise_defect(DEFECT_INCORRECT, "SectorSize does not match DllVersion in OLE header") self.MiniSectorSize = 2**self.MiniSectorShift - debug( "MiniSectorSize = %d" % self.MiniSectorSize ) + debug("MiniSectorSize = %d" % self.MiniSectorSize) if self.MiniSectorSize not in [64]: - self._raise_defect(DEFECT_INCORRECT, "incorrect MiniSectorSize in OLE header") + self.raise_defect(DEFECT_INCORRECT, "incorrect MiniSectorSize in OLE header") if self.Reserved != 0 or self.Reserved1 != 0: - self._raise_defect(DEFECT_INCORRECT, "incorrect OLE header (non-null reserved bytes)") - debug( "csectDir = %d" % self.csectDir ) - if self.SectorSize==512 and self.csectDir!=0: - self._raise_defect(DEFECT_INCORRECT, "incorrect csectDir in OLE header") - debug( "csectFat = %d" % self.csectFat ) - debug( "sectDirStart = %X" % self.sectDirStart ) - debug( "signature = %d" % self.signature ) + self.raise_defect(DEFECT_INCORRECT, "incorrect OLE header (non-null reserved bytes)") + debug("csectDir = %d" % self.csectDir) + # Number of directory sectors (only allowed if DllVersion != 3) + if self.SectorSize == 512 and self.csectDir != 0: + self.raise_defect(DEFECT_INCORRECT, "incorrect csectDir in OLE header") + debug("csectFat = %d" % self.csectFat) + # csectFat = number of FAT sectors in the file + debug("sectDirStart = %X" % self.sectDirStart) + # sectDirStart = 1st sector containing the directory + debug("signature = %d" % self.signature) # Signature should be zero, BUT some implementations do not follow this # rule => only a potential defect: + # (according to MS-CFB, may be != 0 for applications supporting file + # transactions) if self.signature != 0: - self._raise_defect(DEFECT_POTENTIAL, "incorrect OLE header (signature>0)") + self.raise_defect(DEFECT_POTENTIAL, "incorrect OLE header (signature>0)") debug("MiniSectorCutoff = %d" % self.MiniSectorCutoff) + # MS-CFB: This integer field MUST be set to 0x00001000. This field + # specifies the maximum size of a user-defined data stream allocated + # from the mini FAT and mini stream, and that cutoff is 4096 bytes. + # Any user-defined data stream larger than or equal to this cutoff size + # must be allocated as normal sectors from the FAT. + if self.MiniSectorCutoff != 0x1000: + self.raise_defect(DEFECT_INCORRECT, "incorrect MiniSectorCutoff in OLE header") debug("MiniFatStart = %X" % self.MiniFatStart) debug("csectMiniFat = %d" % self.csectMiniFat) debug("sectDifStart = %X" % self.sectDifStart) @@ -1227,11 +1356,15 @@ class OleFileIO(object): # calculate the number of sectors in the file # (-1 because header doesn't count) - self.nb_sect = ( (filesize + self.SectorSize-1) // self.SectorSize) - 1 - debug( "Number of sectors in the file: %d" % self.nb_sect ) + self.nb_sect = ((filesize + self.SectorSize-1) // self.SectorSize) - 1 + debug("Number of sectors in the file: %d" % self.nb_sect) + #TODO: change this test, because an OLE file MAY contain other data + # after the last sector. - # file clsid (probably never used, so we don't store it) - #clsid = _clsid(header[8:24]) + # file clsid + self.clsid = _clsid(header[8:24]) + + #TODO: remove redundant attributes, and fix the code which uses them? self.sectorsize = self.SectorSize #1 << i16(header, 30) self.minisectorsize = self.MiniSectorSize #1 << i16(header, 32) self.minisectorcutoff = self.MiniSectorCutoff # i32(header, 56) @@ -1248,7 +1381,7 @@ class OleFileIO(object): # Load file allocation tables self.loadfat(header) - # Load direcory. This sets both the direntries list (ordered by sid) + # Load directory. This sets both the direntries list (ordered by sid) # and the root (ordered by hierarchy) members. self.loaddirectory(self.sectDirStart)#i32(header, 48)) self.ministream = None @@ -1265,8 +1398,9 @@ class OleFileIO(object): Checks if a stream has not been already referenced elsewhere. This method should only be called once for each known stream, and only if stream size is not null. - :param first_sect: index of first sector of the stream in FAT - :param minifat: if True, stream is located in the MiniFAT, else in the FAT + + :param first_sect: int, index of first sector of the stream in FAT + :param minifat: bool, if True, stream is located in the MiniFAT, else in the FAT """ if minifat: debug('_check_duplicate_stream: sect=%d in MiniFAT' % first_sect) @@ -1274,13 +1408,13 @@ class OleFileIO(object): else: debug('_check_duplicate_stream: sect=%d in FAT' % first_sect) # some values can be safely ignored (not a real stream): - if first_sect in (DIFSECT,FATSECT,ENDOFCHAIN,FREESECT): + if first_sect in (DIFSECT, FATSECT, ENDOFCHAIN, FREESECT): return used_streams = self._used_streams_fat #TODO: would it be more efficient using a dict or hash values, instead # of a list of long ? if first_sect in used_streams: - self._raise_defect(DEFECT_INCORRECT, 'Stream referenced twice') + self.raise_defect(DEFECT_INCORRECT, 'Stream referenced twice') else: used_streams.append(first_sect) @@ -1290,7 +1424,7 @@ class OleFileIO(object): if not DEBUG_MODE: return # dictionary to convert special FAT values in human-readable strings - VPL=8 # valeurs par ligne (8+1 * 8+1 = 81) + VPL = 8 # values per line (8+1 * 8+1 = 81) fatnames = { FREESECT: "..free..", ENDOFCHAIN: "[ END. ]", @@ -1307,25 +1441,28 @@ class OleFileIO(object): index = l*VPL print("%8X:" % (firstindex+index), end=" ") for i in range(index, index+VPL): - if i>=nbsect: + if i >= nbsect: break sect = fat[i] - if sect in fatnames: - nom = fatnames[sect] + aux = sect & 0xFFFFFFFF # JYTHON-WORKAROUND + if aux in fatnames: + name = fatnames[aux] else: if sect == i+1: - nom = " --->" + name = " --->" else: - nom = "%8X" % sect - print(nom, end=" ") + name = "%8X" % sect + print(name, end=" ") print() def dumpsect(self, sector, firstindex=0): "Displays a sector in a human-readable form, for debugging purpose." if not DEBUG_MODE: return - VPL=8 # number of values per line (8+1 * 8+1 = 81) + VPL = 8 # number of values per line (8+1 * 8+1 = 81) tab = array.array(UINT32, sector) + if sys.byteorder == 'big': + tab.byteswap() nbsect = len(tab) nlines = (nbsect+VPL-1)//VPL print("index", end=" ") @@ -1336,11 +1473,11 @@ class OleFileIO(object): index = l*VPL print("%8X:" % (firstindex+index), end=" ") for i in range(index, index+VPL): - if i>=nbsect: + if i >= nbsect: break sect = tab[i] - nom = "%8X" % sect - print(nom, end=" ") + name = "%8X" % sect + print(name, end=" ") print() def sect2array(self, sect): @@ -1371,9 +1508,11 @@ class OleFileIO(object): self.dumpsect(sect) # The FAT is a sector chain starting at the first index of itself. for isect in fat1: - #print("isect = %X" % isect) + isect = isect & 0xFFFFFFFF # JYTHON-WORKAROUND + debug("isect = %X" % isect) if isect == ENDOFCHAIN or isect == FREESECT: # the end of the sector chain has been reached + debug("found end of sector chain") break # read the FAT sector s = self.getsect(isect) @@ -1387,12 +1526,13 @@ class OleFileIO(object): """ Load the FAT table. """ - # The header contains a sector numbers - # for the first 109 FAT sectors. Additional sectors are - # described by DIF blocks + # The 1st sector of the file contains sector numbers for the first 109 + # FAT sectors, right after the header which is 76 bytes long. + # (always 109, whatever the sector size: 512 bytes = 76+4*109) + # Additional sectors are described by DIF blocks sect = header[76:512] - debug( "len(sect)=%d, so %d integers" % (len(sect), len(sect)//4) ) + debug("len(sect)=%d, so %d integers" % (len(sect), len(sect)//4)) #fat = [] # [PL] FAT is an array of 32 bits unsigned ints, it's more effective # to use an array than a list in Python. @@ -1402,7 +1542,7 @@ class OleFileIO(object): #self.dumpfat(self.fat) ## for i in range(0, len(sect), 4): ## ix = i32(sect, i) -## #[PL] if ix == -2 or ix == -1: # ix == 0xFFFFFFFE or ix == 0xFFFFFFFF: +## # [PL] if ix == -2 or ix == -1: # ix == 0xFFFFFFFE or ix == 0xFFFFFFFF: ## if ix == 0xFFFFFFFE or ix == 0xFFFFFFFF: ## break ## s = self.getsect(ix) @@ -1414,14 +1554,17 @@ class OleFileIO(object): if self.csectFat <= 109: # there must be at least 109 blocks in header and the rest in # DIFAT, so number of sectors must be >109. - self._raise_defect(DEFECT_INCORRECT, 'incorrect DIFAT, not enough sectors') + self.raise_defect(DEFECT_INCORRECT, 'incorrect DIFAT, not enough sectors') if self.sectDifStart >= self.nb_sect: # initial DIFAT block index must be valid - self._raise_defect(DEFECT_FATAL, 'incorrect DIFAT, first index out of range') + self.raise_defect(DEFECT_FATAL, 'incorrect DIFAT, first index out of range') debug("DIFAT analysis...") # We compute the necessary number of DIFAT sectors : - # (each DIFAT sector = 127 pointers + 1 towards next DIFAT sector) - nb_difat = (self.csectFat-109 + 126)//127 + # Number of pointers per DIFAT sector = (sectorsize/4)-1 + # (-1 because the last pointer is the next DIFAT sector number) + nb_difat_sectors = (self.sectorsize//4)-1 + # (if 512 bytes: each DIFAT sector = 127 pointers + 1 towards next DIFAT sector) + nb_difat = (self.csectFat-109 + nb_difat_sectors-1)//nb_difat_sectors debug("nb_difat = %d" % nb_difat) if self.csectDif != nb_difat: raise IOError('incorrect DIFAT') @@ -1432,9 +1575,9 @@ class OleFileIO(object): sector_difat = self.getsect(isect_difat) difat = self.sect2array(sector_difat) self.dumpsect(sector_difat) - self.loadfat_sect(difat[:127]) + self.loadfat_sect(difat[:nb_difat_sectors]) # last DIFAT pointer is next DIFAT sector: - isect_difat = difat[127] + isect_difat = difat[nb_difat_sectors] debug("next DIFAT sector: %X" % isect_difat) # checks: if isect_difat not in [ENDOFCHAIN, FREESECT]: @@ -1474,11 +1617,11 @@ class OleFileIO(object): (self.minifatsect, self.csectMiniFat, used_size, stream_size, nb_minisectors)) if used_size > stream_size: # This is not really a problem, but may indicate a wrong implementation: - self._raise_defect(DEFECT_INCORRECT, 'OLE MiniStream is larger than MiniFAT') + self.raise_defect(DEFECT_INCORRECT, 'OLE MiniStream is larger than MiniFAT') # In any case, first read stream_size: s = self._open(self.minifatsect, stream_size, force_FAT=True).read() - #[PL] Old code replaced by an array: - #self.minifat = [i32(s, i) for i in range(0, len(s), 4)] + # [PL] Old code replaced by an array: + # self.minifat = [i32(s, i) for i in range(0, len(s), 4)] self.minifat = self.sect2array(s) # Then shrink the array to used size, to avoid indexes out of MiniStream: debug('MiniFAT shrunk from %d to %d sectors' % (len(self.minifat), nb_minisectors)) @@ -1491,27 +1634,59 @@ class OleFileIO(object): """ Read given sector from file on disk. - :param sect: sector index + :param sect: int, sector index :returns: a string containing the sector data. """ - # [PL] this original code was wrong when sectors are 4KB instead of + # From [MS-CFB]: A sector number can be converted into a byte offset + # into the file by using the following formula: + # (sector number + 1) x Sector Size. + # This implies that sector #0 of the file begins at byte offset Sector + # Size, not at 0. + + # [PL] the original code in PIL was wrong when sectors are 4KB instead of # 512 bytes: - #self.fp.seek(512 + self.sectorsize * sect) - #[PL]: added safety checks: - #print("getsect(%X)" % sect) + # self.fp.seek(512 + self.sectorsize * sect) + # [PL]: added safety checks: + # print("getsect(%X)" % sect) try: self.fp.seek(self.sectorsize * (sect+1)) except: debug('getsect(): sect=%X, seek=%d, filesize=%d' % (sect, self.sectorsize*(sect+1), self._filesize)) - self._raise_defect(DEFECT_FATAL, 'OLE sector index out of range') + self.raise_defect(DEFECT_FATAL, 'OLE sector index out of range') sector = self.fp.read(self.sectorsize) if len(sector) != self.sectorsize: debug('getsect(): sect=%X, read=%d, sectorsize=%d' % (sect, len(sector), self.sectorsize)) - self._raise_defect(DEFECT_FATAL, 'incomplete OLE sector') + self.raise_defect(DEFECT_FATAL, 'incomplete OLE sector') return sector + def write_sect(self, sect, data, padding=b'\x00'): + """ + Write given sector to file on disk. + + :param sect: int, sector index + :param data: bytes, sector data + :param padding: single byte, padding character if data < sector size + """ + if not isinstance(data, bytes): + raise TypeError("write_sect: data must be a bytes string") + if not isinstance(padding, bytes) or len(padding) != 1: + raise TypeError("write_sect: padding must be a bytes string of 1 char") + #TODO: we could allow padding=None for no padding at all + try: + self.fp.seek(self.sectorsize * (sect+1)) + except: + debug('write_sect(): sect=%X, seek=%d, filesize=%d' % + (sect, self.sectorsize*(sect+1), self._filesize)) + self.raise_defect(DEFECT_FATAL, 'OLE sector index out of range') + if len(data) < self.sectorsize: + # add padding + data += padding * (self.sectorsize - len(data)) + elif len(data) < self.sectorsize: + raise ValueError("Data is larger than sector size") + self.fp.write(data) + def loaddirectory(self, sect): """ Load the directory. @@ -1525,14 +1700,14 @@ class OleFileIO(object): # (stream size is not known in advance) self.directory_fp = self._open(sect) - #[PL] to detect malformed documents and avoid DoS attacks, the maximum + # [PL] to detect malformed documents and avoid DoS attacks, the maximum # number of directory entries can be calculated: max_entries = self.directory_fp.size // 128 debug('loaddirectory: size=%d, max_entries=%d' % (self.directory_fp.size, max_entries)) # Create list of directory entries - #self.direntries = [] + # self.direntries = [] # We start with a list of "None" object self.direntries = [None] * max_entries ## for sid in iterrange(max_entries): @@ -1541,13 +1716,13 @@ class OleFileIO(object): ## break ## self.direntries.append(_OleDirectoryEntry(entry, sid, self)) # load root entry: - self._load_direntry(0) + root_entry = self._load_direntry(0) # Root entry is the first entry: self.root = self.direntries[0] # read and build all storage trees, starting from the root: self.root.build_storage_tree() - def _load_direntry (self, sid): + def _load_direntry(self, sid): """ Load a directory entry from the directory. This method should only be called once for each storage/stream when @@ -1555,14 +1730,15 @@ class OleFileIO(object): :param sid: index of storage/stream in the directory. :returns: a _OleDirectoryEntry object + :exception IOError: if the entry has always been referenced. """ # check if SID is OK: - if sid<0 or sid>=len(self.direntries): - self._raise_defect(DEFECT_FATAL, "OLE directory index out of range") + if sid < 0 or sid >= len(self.direntries): + self.raise_defect(DEFECT_FATAL, "OLE directory index out of range") # check if entry was already referenced: if self.direntries[sid] is not None: - self._raise_defect(DEFECT_INCORRECT, + self.raise_defect(DEFECT_INCORRECT, "double reference for OLE stream/storage") # if exception not raised, return the object return self.direntries[sid] @@ -1585,7 +1761,7 @@ class OleFileIO(object): :param start: index of first sector :param size: size of stream (or nothing if size is unknown) :param force_FAT: if False (default), stream will be opened in FAT or MiniFAT - according to size. If True, it will always be opened in FAT. + according to size. If True, it will always be opened in FAT. """ debug('OleFileIO.open(): sect=%d, size=%d, force_FAT=%s' % (start, size, str(force_FAT))) @@ -1602,46 +1778,52 @@ class OleFileIO(object): (self.root.isectStart, size_ministream)) self.ministream = self._open(self.root.isectStart, size_ministream, force_FAT=True) - return _OleStream(self.ministream, start, size, 0, - self.minisectorsize, self.minifat, - self.ministream.size) + return _OleStream(fp=self.ministream, sect=start, size=size, + offset=0, sectorsize=self.minisectorsize, + fat=self.minifat, filesize=self.ministream.size) else: # standard stream - return _OleStream(self.fp, start, size, 512, - self.sectorsize, self.fat, self._filesize) + return _OleStream(fp=self.fp, sect=start, size=size, + offset=self.sectorsize, + sectorsize=self.sectorsize, fat=self.fat, + filesize=self._filesize) def _list(self, files, prefix, node, streams=True, storages=False): """ - (listdir helper) + listdir helper + :param files: list of files to fill in :param prefix: current location in storage tree (list of names) :param node: current node (_OleDirectoryEntry object) :param streams: bool, include streams if True (True by default) - new in v0.26 :param storages: bool, include storages if True (False by default) - new in v0.26 - (note: the root storage is never included) + (note: the root storage is never included) """ prefix = prefix + [node.name] for entry in node.kids: - if entry.kids: + if entry.entry_type == STGTY_STORAGE: # this is a storage if storages: # add it to the list files.append(prefix[1:] + [entry.name]) # check its kids self._list(files, prefix, entry, streams, storages) - else: + elif entry.entry_type == STGTY_STREAM: # this is a stream if streams: # add it to the list files.append(prefix[1:] + [entry.name]) + else: + self.raise_defect(DEFECT_INCORRECT, 'The directory tree contains an entry which is not a stream nor a storage.') def listdir(self, streams=True, storages=False): """ - Return a list of streams stored in this file + Return a list of streams and/or storages stored in this file :param streams: bool, include streams if True (True by default) - new in v0.26 :param storages: bool, include storages if True (False by default) - new in v0.26 (note: the root storage is never included) + :returns: list of stream and/or storage paths """ files = [] self._list(files, [], self.root, streams, storages) @@ -1656,10 +1838,11 @@ class OleFileIO(object): - a string using Unix path syntax, for example: 'storage_1/storage_1.2/stream' - - a list of storage filenames, path to the desired stream/storage. + - or a list of storage filenames, path to the desired stream/storage. Example: ['storage_1', 'storage_1.2', 'stream'] + :returns: sid of requested filename - raise IOError if file not found + :exception IOError: if file not found """ # if filename is a string instead of a list, split it on slashes to @@ -1680,12 +1863,13 @@ class OleFileIO(object): def openstream(self, filename): """ Open a stream as a read-only file object (BytesIO). + Note: filename is case-insensitive. :param filename: path of stream in storage tree (except root entry), either: - a string using Unix path syntax, for example: 'storage_1/storage_1.2/stream' - - a list of storage filenames, path to the desired stream/storage. + - or a list of storage filenames, path to the desired stream/storage. Example: ['storage_1', 'storage_1.2', 'stream'] :returns: file object (read-only) @@ -1697,6 +1881,66 @@ class OleFileIO(object): raise IOError("this file is not a stream") return self._open(entry.isectStart, entry.size) + def write_stream(self, stream_name, data): + """ + Write a stream to disk. For now, it is only possible to replace an + existing stream by data of the same size. + + :param stream_name: path of stream in storage tree (except root entry), either: + + - a string using Unix path syntax, for example: + 'storage_1/storage_1.2/stream' + - or a list of storage filenames, path to the desired stream/storage. + Example: ['storage_1', 'storage_1.2', 'stream'] + + :param data: bytes, data to be written, must be the same size as the original + stream. + """ + if not isinstance(data, bytes): + raise TypeError("write_stream: data must be a bytes string") + sid = self._find(stream_name) + entry = self.direntries[sid] + if entry.entry_type != STGTY_STREAM: + raise IOError("this is not a stream") + size = entry.size + if size != len(data): + raise ValueError("write_stream: data must be the same size as the existing stream") + if size < self.minisectorcutoff: + raise NotImplementedError("Writing a stream in MiniFAT is not implemented yet") + sect = entry.isectStart + # number of sectors to write + nb_sectors = (size + (self.sectorsize-1)) // self.sectorsize + debug('nb_sectors = %d' % nb_sectors) + for i in range(nb_sectors): + # try: + # self.fp.seek(offset + self.sectorsize * sect) + # except: + # debug('sect=%d, seek=%d' % + # (sect, offset+self.sectorsize*sect)) + # raise IOError('OLE sector index out of range') + # extract one sector from data, the last one being smaller: + if i < (nb_sectors-1): + data_sector = data[i*self.sectorsize:(i+1)*self.sectorsize] + #TODO: comment this if it works + assert(len(data_sector) == self.sectorsize) + else: + data_sector = data[i*self.sectorsize:] + # TODO: comment this if it works + debug('write_stream: size=%d sectorsize=%d data_sector=%d size%%sectorsize=%d' + % (size, self.sectorsize, len(data_sector), size % self.sectorsize)) + assert(len(data_sector) % self.sectorsize == size % self.sectorsize) + self.write_sect(sect, data_sector) +# self.fp.write(data_sector) + # jump to next sector in the FAT: + try: + sect = self.fat[sect] + except IndexError: + # [PL] if pointer is out of the FAT an exception is raised + raise IOError('incorrect OLE FAT, sector index out of range') + # [PL] Last sector should be a "end of chain" marker: + if sect != ENDOFCHAIN: + raise IOError('incorrect last sector index in OLE stream') + def get_type(self, filename): """ Test if given filename exists as a stream or a storage in the OLE @@ -1750,12 +1994,13 @@ class OleFileIO(object): """ Test if given filename exists as a stream or a storage in the OLE container. + Note: filename is case-insensitive. :param filename: path of stream in storage tree. (see openstream for syntax) :returns: True if object exist, else False. """ try: - self._find(filename) + sid = self._find(filename) return True except: return False @@ -1767,7 +2012,7 @@ class OleFileIO(object): :param filename: path of stream in storage tree (see openstream for syntax) :returns: size in bytes (long integer) :exception IOError: if file not found - :exception TypeError: if this is not a stream + :exception TypeError: if this is not a stream. """ sid = self._find(filename) entry = self.direntries[sid] @@ -1790,9 +2035,11 @@ class OleFileIO(object): :param filename: path of stream in storage tree (see openstream for syntax) :param convert_time: bool, if True timestamps will be converted to Python datetime :param no_conversion: None or list of int, timestamps not to be converted - (for example total editing time is not a real timestamp) + (for example total editing time is not a real timestamp) + :returns: a dictionary of values indexed by id (integer) """ + # REFERENCE: [MS-OLEPS] https://msdn.microsoft.com/en-us/library/dd942421.aspx # make sure no_conversion is a list, just to simplify code below: if no_conversion is None: no_conversion = [] @@ -1808,11 +2055,11 @@ class OleFileIO(object): try: # header s = fp.read(28) - # clsid = _clsid(s[8:24]) + clsid = _clsid(s[8:24]) # format id s = fp.read(20) - # fmtid = _clsid(s[:16]) + fmtid = _clsid(s[:16]) fp.seek(i32(s, 16)) # get section @@ -1825,12 +2072,12 @@ class OleFileIO(object): # a fatal error when parsing the whole file msg = 'Error while parsing properties header in stream %s: %s' % ( repr(streampath), exc) - self._raise_defect(DEFECT_INCORRECT, msg, type(exc)) + self.raise_defect(DEFECT_INCORRECT, msg, type(exc)) return data for i in range(num_props): try: - id = 0 # just in case of an exception + id = 0 # just in case of an exception id = i32(s, 8+i*8) offset = i32(s, 12+i*8) type = i32(s, offset) @@ -1840,7 +2087,7 @@ class OleFileIO(object): # test for common types first (should perhaps use # a dictionary instead?) - if type == VT_I2: # 16-bit signed integer + if type == VT_I2: # 16-bit signed integer value = i16(s, offset+4) if value >= 32768: value = value - 65536 @@ -1852,12 +2099,12 @@ class OleFileIO(object): # see http://msdn.microsoft.com/en-us/library/cc230330.aspx value = i32(s, offset+4) elif type in (VT_UI4, VT_UINT): # 4-byte unsigned integer - value = i32(s, offset+4) # FIXME + value = i32(s, offset+4) # FIXME elif type in (VT_BSTR, VT_LPSTR): # CodePageString, see http://msdn.microsoft.com/en-us/library/dd942354.aspx # size is a 32 bits integer, including the null terminator, and # possibly trailing or embedded null chars - # TODO: if codepage is unicode, the string should be converted as such + #TODO: if codepage is unicode, the string should be converted as such count = i32(s, offset+4) value = s[offset+8:offset+8+count-1] # remove all null chars: @@ -1873,14 +2120,14 @@ class OleFileIO(object): # "the string should NOT contain embedded or additional trailing # null characters." count = i32(s, offset+4) - value = _unicode(s[offset+8:offset+8+count*2]) + value = self._decode_utf16_str(s[offset+8:offset+8+count*2]) elif type == VT_FILETIME: value = long(i32(s, offset+4)) + (long(i32(s, offset+8)) << 32) # FILETIME is a 64-bit int: "number of 100ns periods # since Jan 1,1601". if convert_time and id not in no_conversion: debug('Converting property #%d to python datetime, value=%d=%fs' - %(id, value, float(value)/10000000)) + % (id, value, float(value) / 10000000)) # convert FILETIME to Python datetime.datetime # inspired from http://code.activestate.com/recipes/511425-filetime-to-datetime/ _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) @@ -1889,8 +2136,8 @@ class OleFileIO(object): else: # legacy code kept for backward compatibility: returns a # number of seconds since Jan 1,1601 - value = value // 10000000 # seconds - elif type == VT_UI1: # 1-byte unsigned integer + value = value // 10000000 # seconds + elif type == VT_UI1: # 1-byte unsigned integer value = i8(s[offset+4]) elif type == VT_CLSID: value = _clsid(s[offset+4:offset+20]) @@ -1904,7 +2151,7 @@ class OleFileIO(object): # see http://msdn.microsoft.com/en-us/library/cc237864.aspx value = bool(i16(s, offset+4)) else: - value = None # everything else yields "None" + value = None # everything else yields "None" debug('property id=%d: type=%d not implemented in parser yet' % (id, type)) # missing: VT_EMPTY, VT_NULL, VT_R4, VT_R8, VT_CY, VT_DATE, @@ -1926,7 +2173,7 @@ class OleFileIO(object): # a DEFECT_INCORRECT, because parsing can go on msg = 'Error while parsing property id %d in stream %s: %s' % ( id, repr(streampath), exc) - self._raise_defect(DEFECT_INCORRECT, msg, type(exc)) + self.raise_defect(DEFECT_INCORRECT, msg, type(exc)) return data @@ -1951,15 +2198,18 @@ if __name__ == "__main__": # [PL] display quick usage info if launched from command-line if len(sys.argv) <= 1: - print(__doc__) - print(""" -Launched from command line, this script parses OLE files and prints info. + print('olefile version %s %s - %s' % (__version__, __date__, __author__)) + print( +""" +Launched from the command line, this script parses OLE files and prints info. -Usage: OleFileIO_PL.py [-d] [-c] [file2 ...] +Usage: olefile.py [-d] [-c] [file2 ...] Options: --d : debug mode (display a lot of debug information, for developers only) +-d : debug mode (displays a lot of debug information, for developers only) -c : check all streams (for debugging purposes) + +For more information, see http://www.decalage.info/olefile """) sys.exit() @@ -1976,7 +2226,7 @@ Options: check_streams = True continue - ole = OleFileIO(filename) #, raise_defects=DEFECT_INCORRECT) + ole = OleFileIO(filename)#, raise_defects=DEFECT_INCORRECT) print("-" * 68) print(filename) print("-" * 68) @@ -1987,14 +2237,14 @@ Options: props = ole.getproperties(streamname, convert_time=True) props = sorted(props.items()) for k, v in props: - #[PL]: avoid to display too large or binary values: + # [PL]: avoid to display too large or binary values: if isinstance(v, (basestring, bytes)): if len(v) > 50: v = v[:50] if isinstance(v, bytes): # quick and dirty binary check: for c in (1, 2, 3, 4, 5, 6, 7, 11, 12, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31): + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31): if c in bytearray(v): v = '(binary data)' break @@ -2015,11 +2265,11 @@ Options: print('NOT a stream : type=%d' % st_type) print() -## for streamname in ole.listdir(): -## # print name using repr() to convert binary chars to \xNN: -## print('-', repr('/'.join(streamname)),'-', end=' ') -## print(ole.getmtime(streamname)) -## print() +# for streamname in ole.listdir(): +# # print name using repr() to convert binary chars to \xNN: +# print('-', repr('/'.join(streamname)),'-', end=' ') +# print(ole.getmtime(streamname)) +# print() print('Modification/Creation times of all directory entries:') for entry in ole.direntries: @@ -2051,3 +2301,5 @@ Options: print('None') ## except IOError as v: ## print("***", "cannot read", file, "-", v) + +# this code was developed while listening to The Wedding Present "Sea Monsters" diff --git a/PIL/PsdImagePlugin.py b/PIL/PsdImagePlugin.py index 02c94a860..d30695adb 100644 --- a/PIL/PsdImagePlugin.py +++ b/PIL/PsdImagePlugin.py @@ -132,6 +132,10 @@ class PsdImageFile(ImageFile.ImageFile): self._fp = self.fp self.frame = 0 + @property + def n_frames(self): + return len(self.layers) + def seek(self, layer): # seek to given layer (1..max) if layer == self.frame: diff --git a/PIL/SpiderImagePlugin.py b/PIL/SpiderImagePlugin.py index f1ccb67f6..7de5156b1 100644 --- a/PIL/SpiderImagePlugin.py +++ b/PIL/SpiderImagePlugin.py @@ -127,12 +127,12 @@ class SpiderImageFile(ImageFile.ImageFile): if self.istack == 0 and self.imgnumber == 0: # stk=0, img=0: a regular 2D image offset = hdrlen - self.nimages = 1 + self._nimages = 1 elif self.istack > 0 and self.imgnumber == 0: # stk>0, img=0: Opening the stack for the first time self.imgbytes = int(h[12]) * int(h[2]) * 4 self.hdrlen = hdrlen - self.nimages = int(h[26]) + self._nimages = int(h[26]) # Point to the first image in the stack offset = hdrlen * 2 self.imgnumber = 1 @@ -154,6 +154,10 @@ class SpiderImageFile(ImageFile.ImageFile): (self.rawmode, 0, 1))] self.__fp = self.fp # FIXME: hack + @property + def n_frames(self): + return self._nimages + # 1st image index is zero (although SPIDER imgnumber starts at 1) def tell(self): if self.imgnumber < 1: @@ -164,7 +168,7 @@ class SpiderImageFile(ImageFile.ImageFile): def seek(self, frame): if self.istack == 0: return - if frame >= self.nimages: + if frame >= self._nimages: raise EOFError("attempt to seek past end of file") self.stkoffset = self.hdrlen + frame * (self.hdrlen + self.imgbytes) self.fp = self.__fp diff --git a/PIL/TiffImagePlugin.py b/PIL/TiffImagePlugin.py index ad085451b..59de84273 100644 --- a/PIL/TiffImagePlugin.py +++ b/PIL/TiffImagePlugin.py @@ -294,7 +294,7 @@ class ImageFileDirectory(collections.MutableMapping): def named(self): """ - Returns the complete tag dictionary, with named tags where posible. + Returns the complete tag dictionary, with named tags where possible. """ from PIL import TiffTags result = {} @@ -426,6 +426,11 @@ class ImageFileDirectory(collections.MutableMapping): for i in range(i16(fp.read(2))): ifd = fp.read(12) + if len(ifd) != 12: + warnings.warn("Possibly corrupt EXIF data. " + "Expecting to read 12 bytes but only got %d." + % (len(ifd))) + continue tag, typ = i16(ifd), i16(ifd, 2) @@ -476,7 +481,14 @@ class ImageFileDirectory(collections.MutableMapping): else: print("- value:", self[tag]) - self.next = i32(fp.read(4)) + ifd = fp.read(4) + if len(ifd) != 4: + warnings.warn("Possibly corrupt EXIF data. " + "Expecting to read 4 bytes but only got %d." + % (len(ifd))) + return + + self.next = i32(ifd) # save primitives @@ -636,6 +648,8 @@ class TiffImageFile(ImageFile.ImageFile): self.__first = self.__next = self.ifd.i32(ifh, 4) self.__frame = -1 self.__fp = self.fp + self._frame_pos = [] + self._n_frames = None if Image.DEBUG: print("*** TiffImageFile._open ***") @@ -645,28 +659,30 @@ class TiffImageFile(ImageFile.ImageFile): # and load the first frame self._seek(0) + @property + def n_frames(self): + if self._n_frames is None: + current = self.tell() + try: + while True: + self._seek(self.tell() + 1) + except EOFError: + self._n_frames = self.tell() + 1 + self.seek(current) + return self._n_frames + def seek(self, frame): "Select a given frame as current image" - if frame < 0: - frame = 0 - self._seek(frame) + self._seek(max(frame, 0)) # Questionable backwards compatibility. # Create a new core image object on second and # subsequent frames in the image. Image may be # different size/mode. Image._decompression_bomb_check(self.size) self.im = Image.core.new(self.mode, self.size) - def tell(self): - "Return the current frame number" - return self._tell() - def _seek(self, frame): self.fp = self.__fp - if frame < self.__frame: - # rewind file - self.__frame = -1 - self.__next = self.__first - while self.__frame < frame: + while len(self._frame_pos) <= frame: if not self.__next: raise EOFError("no more images in TIFF file") if Image.DEBUG: @@ -676,14 +692,19 @@ class TiffImageFile(ImageFile.ImageFile): # was passed to libtiff, invalidating the buffer self.fp.tell() self.fp.seek(self.__next) + self._frame_pos.append(self.__next) if Image.DEBUG: print("Loading tags, location: %s" % self.fp.tell()) self.tag.load(self.fp) self.__next = self.tag.next self.__frame += 1 + self.fp.seek(self._frame_pos[frame]) + self.tag.load(self.fp) + self.__frame = frame self._setup() - def _tell(self): + def tell(self): + "Return the current frame number" return self.__frame def _decoder(self, rawmode, layer, tile=None): @@ -749,7 +770,7 @@ class TiffImageFile(ImageFile.ImageFile): # # Rearranging for supporting byteio items, since they have a fileno # that returns an IOError if there's no underlying fp. Easier to - # dea. with here by reordering. + # deal with here by reordering. if Image.DEBUG: print("have getvalue. just sending in a string from getvalue") n, err = decoder.decode(self.fp.getvalue()) @@ -985,7 +1006,7 @@ class TiffImageFile(ImageFile.ImageFile): # Write TIFF files # little endian is default except for image modes with -# explict big endian byte-order +# explicit big endian byte-order SAVE_INFO = { # mode => rawmode, byteorder, photometrics, diff --git a/README.rst b/README.rst index 5e91b7a7a..5dbbcb231 100644 --- a/README.rst +++ b/README.rst @@ -6,19 +6,23 @@ Python Imaging Library (Fork) Pillow is the "friendly PIL fork" by `Alex Clark and Contributors `_. PIL is the Python Imaging Library by Fredrik Lundh and Contributors. -.. image:: https://travis-ci.org/python-pillow/Pillow.svg?branch=master +.. + image:: https://travis-ci.org/python-pillow/Pillow.svg?branch=master :target: https://travis-ci.org/python-pillow/Pillow :alt: Travis CI build status (Linux) -.. image:: https://travis-ci.org/python-pillow/pillow-wheels.svg?branch=latest +.. + image:: https://travis-ci.org/python-pillow/pillow-wheels.svg?branch=latest :target: https://travis-ci.org/python-pillow/pillow-wheels :alt: Travis CI build status (OS X) -.. image:: https://img.shields.io/pypi/v/pillow.svg +.. + image:: https://img.shields.io/pypi/v/pillow.svg :target: https://pypi.python.org/pypi/Pillow/ :alt: Latest PyPI version -.. image:: https://img.shields.io/pypi/dm/pillow.svg +.. + image:: https://img.shields.io/pypi/dm/pillow.svg :target: https://pypi.python.org/pypi/Pillow/ :alt: Number of PyPI downloads diff --git a/Scripts/gifmaker.py b/Scripts/gifmaker.py index 5d1781499..28c6fb25d 100644 --- a/Scripts/gifmaker.py +++ b/Scripts/gifmaker.py @@ -75,8 +75,8 @@ def makedelta(fp, sequence): for im in sequence: - # - # FIXME: write graphics control block before each frame + # To specify duration, add the time in milliseconds to getdata(), + # e.g. getdata(im, duration=1000) if not previous: diff --git a/Tests/images/hopper.im b/Tests/images/hopper.im new file mode 100644 index 000000000..ceeb01049 Binary files /dev/null and b/Tests/images/hopper.im differ diff --git a/Tests/images/hopper_bad_exif.jpg b/Tests/images/hopper_bad_exif.jpg new file mode 100644 index 000000000..4cfeea1dc Binary files /dev/null and b/Tests/images/hopper_bad_exif.jpg differ diff --git a/Tests/images/hopper_merged.psd b/Tests/images/hopper_merged.psd new file mode 100644 index 000000000..6b5c7ebe1 Binary files /dev/null and b/Tests/images/hopper_merged.psd differ diff --git a/Tests/test_file_dcx.py b/Tests/test_file_dcx.py index 2acf7b459..7d2ae32d8 100644 --- a/Tests/test_file_dcx.py +++ b/Tests/test_file_dcx.py @@ -30,6 +30,10 @@ class TestFileDcx(PillowTestCase): # Assert self.assertEqual(frame, 0) + def test_n_frames(self): + im = Image.open(TEST_FILE) + self.assertEqual(im.n_frames, 1) + def test_seek_too_far(self): # Arrange im = Image.open(TEST_FILE) diff --git a/Tests/test_file_fli.py b/Tests/test_file_fli.py index e6634c799..04c2006c9 100644 --- a/Tests/test_file_fli.py +++ b/Tests/test_file_fli.py @@ -18,6 +18,10 @@ class TestFileFli(PillowTestCase): self.assertEqual(im.size, (128, 128)) self.assertEqual(im.format, "FLI") + def test_n_frames(self): + im = Image.open(test_file) + self.assertEqual(im.n_frames, 2) + if __name__ == '__main__': unittest.main() diff --git a/Tests/test_file_gif.py b/Tests/test_file_gif.py index 1aa8a36bc..0e9e65a18 100644 --- a/Tests/test_file_gif.py +++ b/Tests/test_file_gif.py @@ -134,6 +134,10 @@ class TestFileGif(PillowTestCase): except EOFError: self.assertEqual(framecount, 5) + def test_n_frames(self): + im = Image.open("Tests/images/iss634.gif") + self.assertEqual(im.n_frames, 43) + def test_dispose_none(self): img = Image.open("Tests/images/dispose_none.gif") try: @@ -169,6 +173,33 @@ class TestFileGif(PillowTestCase): # first frame self.assertEqual(img.histogram()[img.info['transparency']], 0) + def test_duration(self): + duration = 1000 + + out = self.tempfile('temp.gif') + fp = open(out, "wb") + im = Image.new('L', (100, 100), '#000') + for s in GifImagePlugin.getheader(im)[0] + GifImagePlugin.getdata(im, duration=duration): + fp.write(s) + fp.write(b";") + fp.close() + reread = Image.open(out) + + self.assertEqual(reread.info['duration'], duration) + + def test_number_of_loops(self): + number_of_loops = 2 + + out = self.tempfile('temp.gif') + fp = open(out, "wb") + im = Image.new('L', (100, 100), '#000') + for s in GifImagePlugin.getheader(im)[0] + GifImagePlugin.getdata(im, loop=number_of_loops): + fp.write(s) + fp.write(b";") + fp.close() + reread = Image.open(out) + + self.assertEqual(reread.info['loop'], number_of_loops) if __name__ == '__main__': unittest.main() diff --git a/Tests/test_file_im.py b/Tests/test_file_im.py new file mode 100644 index 000000000..24e00b2f0 --- /dev/null +++ b/Tests/test_file_im.py @@ -0,0 +1,33 @@ +from helper import unittest, PillowTestCase, hopper + +from PIL import Image + +# sample im +TEST_IM = "Tests/images/hopper.im" + + +class TestFileIm(PillowTestCase): + + def test_sanity(self): + im = Image.open(TEST_IM) + im.load() + self.assertEqual(im.mode, "RGB") + self.assertEqual(im.size, (128, 128)) + self.assertEqual(im.format, "IM") + + def test_n_frames(self): + im = Image.open(TEST_IM) + self.assertEqual(im.n_frames, 1) + + def test_roundtrip(self): + out = self.tempfile('temp.im') + im = hopper() + im.save(out) + reread = Image.open(out) + + self.assert_image_equal(reread, im) + +if __name__ == '__main__': + unittest.main() + +# End of file diff --git a/Tests/test_file_mpo.py b/Tests/test_file_mpo.py index 7850744af..1a0ebc453 100644 --- a/Tests/test_file_mpo.py +++ b/Tests/test_file_mpo.py @@ -95,6 +95,10 @@ class TestFileMpo(PillowTestCase): im.seek(0) self.assertEqual(im.tell(), 0) + def test_n_frames(self): + im = Image.open("Tests/images/sugarshack.mpo") + self.assertEqual(im.n_frames, 2) + def test_image_grab(self): for test_file in test_files: im = Image.open(test_file) diff --git a/Tests/test_file_psd.py b/Tests/test_file_psd.py index 51b8cf3f4..dca3601b2 100644 --- a/Tests/test_file_psd.py +++ b/Tests/test_file_psd.py @@ -16,6 +16,13 @@ class TestImagePsd(PillowTestCase): self.assertEqual(im.size, (128, 128)) self.assertEqual(im.format, "PSD") + def test_n_frames(self): + im = Image.open("Tests/images/hopper_merged.psd") + self.assertEqual(im.n_frames, 1) + + im = Image.open(test_file) + self.assertEqual(im.n_frames, 2) + if __name__ == '__main__': unittest.main() diff --git a/Tests/test_file_spider.py b/Tests/test_file_spider.py index 7bfedad1a..7d24b2fe5 100644 --- a/Tests/test_file_spider.py +++ b/Tests/test_file_spider.py @@ -42,6 +42,10 @@ class TestImageSpider(PillowTestCase): # Assert self.assertEqual(index, 0) + def test_n_frames(self): + im = Image.open(TEST_FILE) + self.assertEqual(im.n_frames, 1) + def test_loadImageSeries(self): # Arrange not_spider_file = "Tests/images/hopper.ppm" diff --git a/Tests/test_file_tiff.py b/Tests/test_file_tiff.py index c51f46a04..02a63586c 100644 --- a/Tests/test_file_tiff.py +++ b/Tests/test_file_tiff.py @@ -3,6 +3,8 @@ from helper import unittest, PillowTestCase, hopper, py3 from PIL import Image, TiffImagePlugin +import struct + class TestFileTiff(PillowTestCase): @@ -77,6 +79,12 @@ class TestFileTiff(PillowTestCase): im._setup() self.assertEqual(im.info['dpi'], (72., 72.)) + def test_bad_exif(self): + try: + Image.open('Tests/images/hopper_bad_exif.jpg')._getexif() + except struct.error: + self.fail("Bad EXIF data should not pass incorrect values to _binary unpack") + def test_little_endian(self): im = Image.open('Tests/images/16bit.cropped.tif') self.assertEqual(im.getpixel((0, 0)), 480) @@ -142,6 +150,13 @@ class TestFileTiff(PillowTestCase): self.assertEqual( im.getextrema(), (-3.140936851501465, 3.140684127807617)) + def test_n_frames(self): + im = Image.open('Tests/images/multipage-lastframe.tif') + self.assertEqual(im.n_frames, 1) + + im = Image.open('Tests/images/multipage.tiff') + self.assertEqual(im.n_frames, 3) + def test_multipage(self): # issue #862 im = Image.open('Tests/images/multipage.tiff') diff --git a/Tests/test_image_resize.py b/Tests/test_image_resize.py index 414758529..d59f8ba14 100644 --- a/Tests/test_image_resize.py +++ b/Tests/test_image_resize.py @@ -54,7 +54,7 @@ class TestImagingCoreResize(PillowTestCase): # Make an image with one colored pixel, in one channel. # When resized, that channel should be the same as a GS image. # Other channels should be unaffected. - # The R and A channels should not swap, which is indicitive of + # The R and A channels should not swap, which is indicative of # an endianness issues. samples = { diff --git a/Tests/test_imagedraw.py b/Tests/test_imagedraw.py index 3b9919834..a1ed20a3a 100644 --- a/Tests/test_imagedraw.py +++ b/Tests/test_imagedraw.py @@ -300,32 +300,32 @@ class TestImageDraw(PillowTestCase): img, draw = self.create_base_image_draw((20, 20)) draw.line((5, 5, 14, 5), BLACK, 2) self.assert_image_equal( - img, expected, 'line straigth horizontal normal 2px wide failed') + img, expected, 'line straight horizontal normal 2px wide failed') expected = Image.open(os.path.join(IMAGES_PATH, 'line_horizontal_w2px_inverted.png')) expected.load() img, draw = self.create_base_image_draw((20, 20)) draw.line((14, 5, 5, 5), BLACK, 2) self.assert_image_equal( - img, expected, 'line straigth horizontal inverted 2px wide failed') + img, expected, 'line straight horizontal inverted 2px wide failed') expected = Image.open(os.path.join(IMAGES_PATH, 'line_horizontal_w3px.png')) expected.load() img, draw = self.create_base_image_draw((20, 20)) draw.line((5, 5, 14, 5), BLACK, 3) self.assert_image_equal( - img, expected, 'line straigth horizontal normal 3px wide failed') + img, expected, 'line straight horizontal normal 3px wide failed') img, draw = self.create_base_image_draw((20, 20)) draw.line((14, 5, 5, 5), BLACK, 3) self.assert_image_equal( - img, expected, 'line straigth horizontal inverted 3px wide failed') + img, expected, 'line straight horizontal inverted 3px wide failed') expected = Image.open(os.path.join(IMAGES_PATH, 'line_horizontal_w101px.png')) expected.load() img, draw = self.create_base_image_draw((200, 110)) draw.line((5, 55, 195, 55), BLACK, 101) self.assert_image_equal( - img, expected, 'line straigth horizontal 101px wide failed') + img, expected, 'line straight horizontal 101px wide failed') def test_line_h_s1_w2(self): self.skipTest('failing') @@ -344,32 +344,32 @@ class TestImageDraw(PillowTestCase): img, draw = self.create_base_image_draw((20, 20)) draw.line((5, 5, 5, 14), BLACK, 2) self.assert_image_equal( - img, expected, 'line straigth vertical normal 2px wide failed') + img, expected, 'line straight vertical normal 2px wide failed') expected = Image.open(os.path.join(IMAGES_PATH, 'line_vertical_w2px_inverted.png')) expected.load() img, draw = self.create_base_image_draw((20, 20)) draw.line((5, 14, 5, 5), BLACK, 2) self.assert_image_equal( - img, expected, 'line straigth vertical inverted 2px wide failed') + img, expected, 'line straight vertical inverted 2px wide failed') expected = Image.open(os.path.join(IMAGES_PATH, 'line_vertical_w3px.png')) expected.load() img, draw = self.create_base_image_draw((20, 20)) draw.line((5, 5, 5, 14), BLACK, 3) self.assert_image_equal( - img, expected, 'line straigth vertical normal 3px wide failed') + img, expected, 'line straight vertical normal 3px wide failed') img, draw = self.create_base_image_draw((20, 20)) draw.line((5, 14, 5, 5), BLACK, 3) self.assert_image_equal( - img, expected, 'line straigth vertical inverted 3px wide failed') + img, expected, 'line straight vertical inverted 3px wide failed') expected = Image.open(os.path.join(IMAGES_PATH, 'line_vertical_w101px.png')) expected.load() img, draw = self.create_base_image_draw((110, 200)) draw.line((55, 5, 55, 195), BLACK, 101) self.assert_image_equal(img, expected, - 'line straigth vertical 101px wide failed') + 'line straight vertical 101px wide failed') expected = Image.open(os.path.join(IMAGES_PATH, 'line_vertical_slope1px_w2px.png')) expected.load() diff --git a/_imagingcms.c b/_imagingcms.c index 3b822006a..cda7c5f1f 100644 --- a/_imagingcms.c +++ b/_imagingcms.c @@ -278,7 +278,7 @@ findLCMStype(char* PILmode) return TYPE_YCbCr_8; } else if (strcmp(PILmode, "LAB") == 0) { - // LabX equvalent like ALab, but not reversed -- no #define in lcms2 + // LabX equivalent like ALab, but not reversed -- no #define in lcms2 return (COLORSPACE_SH(PT_LabV2)|CHANNELS_SH(3)|BYTES_SH(1)|EXTRA_SH(1)); } diff --git a/_imagingft.c b/_imagingft.c index ad40ee425..d8f6d6338 100644 --- a/_imagingft.c +++ b/_imagingft.c @@ -572,7 +572,7 @@ setup_module(PyObject* m) { PyType_Ready(&Font_Type); if (FT_Init_FreeType(&library)) - return 0; /* leave it uninitalized */ + return 0; /* leave it uninitialized */ FT_Library_Version(library, &major, &minor, &patch); diff --git a/encode.c b/encode.c index fef102176..6bdb8c71a 100644 --- a/encode.c +++ b/encode.c @@ -737,7 +737,7 @@ PyImaging_LibTiffEncoderNew(PyObject* self, PyObject* args) return NULL; } - // While failes on 64 bit machines, complains that pos is an int instead of a Py_ssize_t + // While fails on 64 bit machines, complains that pos is an int instead of a Py_ssize_t // while (PyDict_Next(dir, &pos, &key, &value)) { for (pos=0;pos 64). For a quantization to 256 colors all 64 coarse colors will be used @@ -421,7 +421,7 @@ int quantize_octree(Pixel *pixelData, /* add fine colors to the lookup cube */ add_lookup_buckets(lookupCube, paletteBuckets, nFineColors, nCoarseColors); - /* create result pixles and map palatte indices */ + /* create result pixels and map palette indices */ qp = malloc(sizeof(Pixel)*nPixels); if (!qp) goto error; map_image_pixels(pixelData, nPixels, lookupCube, qp); diff --git a/libImaging/Resample.c b/libImaging/Resample.c index a87f2db83..597fca3e9 100644 --- a/libImaging/Resample.c +++ b/libImaging/Resample.c @@ -2,7 +2,7 @@ * The Python Imaging Library * $Id$ * - * Pillow image resamling support + * Pillow image resampling support * * history: * 2002-03-09 fl Created (for PIL 1.1.3) diff --git a/libImaging/TiffDecode.c b/libImaging/TiffDecode.c index 76bd887a7..25336e7fa 100644 --- a/libImaging/TiffDecode.c +++ b/libImaging/TiffDecode.c @@ -245,7 +245,7 @@ int ImagingLibTiffDecode(Imaging im, ImagingCodecState state, UINT8* buffer, int // back in. Can't use read encoded stripe. // This thing pretty much requires that I have the whole image in one shot. - // Prehaps a stub version would work better??? + // Perhaps a stub version would work better??? while(state->y < state->ysize){ if (TIFFReadScanline(tiff, (tdata_t)state->buffer, (uint32)state->y, 0) == -1) { TRACE(("Decode Error, row %d\n", state->y)); diff --git a/libImaging/Unpack.c b/libImaging/Unpack.c index 9db43147f..522e9b04c 100644 --- a/libImaging/Unpack.c +++ b/libImaging/Unpack.c @@ -808,7 +808,7 @@ unpackI12_I16(UINT8* out, const UINT8* in, int pixels){ FillOrder = 2 should be used only when BitsPerSample = 1 and the data is either uncompressed or compressed using CCITT 1D - or 2D compression, to avoid potentially ambigous situations. + or 2D compression, to avoid potentially ambiguous situations. Yeah. I thought so. We'll see how well people read the spec. We've got several fillorder=2 modes in TiffImagePlugin.py diff --git a/requirements.txt b/requirements.txt index 86ddbd771..bd37d7ba9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,8 @@ -# Testing reqs +# Testing and documentation requirements -e . +-r docs/requirements.txt +coveralls nose +nose-cov +pep8 +pyflakes