From 1dd3bef61507847375772ffd207e8451fa5c897e Mon Sep 17 00:00:00 2001
From: Andrew Murray <radarhere@gmail.com>
Date: Sun, 10 May 2015 23:55:33 +1000
Subject: [PATCH] Upgraded OleFileIO to 0.42b

---
 PIL/OleFileIO.py | 941 +++++++++++++++++++++++++++++++----------------
 1 file changed, 616 insertions(+), 325 deletions(-)

diff --git a/PIL/OleFileIO.py b/PIL/OleFileIO.py
index c804dd454..d787e59ed 100755
--- a/PIL/OleFileIO.py
+++ b/PIL/OleFileIO.py
@@ -1,47 +1,70 @@
 #!/usr/bin/env python
-## OleFileIO_PL:
-## Module to read Microsoft OLE2 files (also called Structured Storage or
-## Microsoft Compound Document File Format), such as Microsoft Office
-## documents, Image Composer and FlashPix files, Outlook messages, ...
-## This version is compatible with Python 2.6+ and 3.x
 
-## version 0.30 2014-02-04 Philippe Lagadec - http://www.decalage.info
-
-## Project website: http://www.decalage.info/python/olefileio
-
-## Improved version of the OleFileIO module from PIL library v1.1.6
-## See: http://www.pythonware.com/products/pil/index.htm
-
-## The Python Imaging Library (PIL) is
-
-##     Copyright (c) 1997-2005 by Secret Labs AB
-##     Copyright (c) 1995-2005 by Fredrik Lundh
-
-## OleFileIO_PL changes are Copyright (c) 2005-2014 by Philippe Lagadec
-
-## See source code and LICENSE.txt for information on usage and redistribution.
-
-## WARNING: THIS IS (STILL) WORK IN PROGRESS.
+# olefile (formerly OleFileIO_PL) version 0.42 2015-01-25
+#
+# Module to read/write Microsoft OLE2 files (also called Structured Storage or
+# Microsoft Compound Document File Format), such as Microsoft Office 97-2003
+# documents, Image Composer and FlashPix files, Outlook messages, ...
+# This version is compatible with Python 2.6+ and 3.x
+#
+# Project website: http://www.decalage.info/olefile
+#
+# olefile is copyright (c) 2005-2015 Philippe Lagadec (http://www.decalage.info)
+#
+# olefile is based on the OleFileIO module from the PIL library v1.1.6
+# See: http://www.pythonware.com/products/pil/index.htm
+#
+# The Python Imaging Library (PIL) is
+# Copyright (c) 1997-2005 by Secret Labs AB
+# Copyright (c) 1995-2005 by Fredrik Lundh
+#
+# See source code and LICENSE.txt for information on usage and redistribution.
 
 
-# Starting with OleFileIO_PL v0.30, only Python 2.6+ and 3.x is supported
+# Since OleFileIO_PL v0.30, only Python 2.6+ and 3.x is supported
 # This import enables print() as a function rather than a keyword
 # (main requirement to be compatible with Python 3.x)
 # The comment on the line below should be printed on Python 2.5 or older:
-from __future__ import print_function  # This version of OleFileIO_PL requires Python 2.6+ or 3.x.
+from __future__ import print_function   # This version of olefile requires Python 2.6+ or 3.x.
 
 
-__author__  = "Philippe Lagadec, Fredrik Lundh (Secret Labs AB)"
-__date__    = "2014-02-04"
-__version__ = '0.30'
+__author__  = "Philippe Lagadec"
+__date__    = "2015-01-25"
+__version__ = '0.42b'
 
 #--- LICENSE ------------------------------------------------------------------
 
-# OleFileIO_PL is an improved version of the OleFileIO module from the
-# Python Imaging Library (PIL).
-
-# OleFileIO_PL changes are Copyright (c) 2005-2014 by Philippe Lagadec
+# olefile (formerly OleFileIO_PL) is copyright (c) 2005-2015 Philippe Lagadec
+# (http://www.decalage.info)
 #
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+#  * Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ----------
+# PIL License:
+#
+# olefile is based on source code from the OleFileIO module of the Python
+# Imaging Library (PIL) published by Fredrik Lundh under the following license:
+
 # The Python Imaging Library (PIL) is
 #    Copyright (c) 1997-2005 by Secret Labs AB
 #    Copyright (c) 1995-2005 by Fredrik Lundh
@@ -67,7 +90,7 @@ __version__ = '0.30'
 # PERFORMANCE OF THIS SOFTWARE.
 
 #-----------------------------------------------------------------------------
-# CHANGELOG: (only OleFileIO_PL changes compared to PIL 1.1.6)
+# CHANGELOG: (only olefile/OleFileIO_PL changes compared to PIL 1.1.6)
 # 2005-05-11 v0.10 PL: - a few fixes for Python 2.4 compatibility
 #                        (all changes flagged with [PL])
 # 2006-02-22 v0.11 PL: - a few fixes for some Office 2003 documents which raise
@@ -142,10 +165,29 @@ __version__ = '0.30'
 # 2014-02-04 v0.30 PL: - upgraded code to support Python 3.x by Martin Panter
 #                      - several fixes for Python 2.6 (xrange, MAGIC)
 #                      - reused i32 from Pillow's _binary
+# 2014-07-18 v0.31     - preliminary support for 4K sectors
+# 2014-07-27 v0.31 PL: - a few improvements in OleFileIO.open (header parsing)
+#                      - Fixed loadfat for large files with 4K sectors (issue #3)
+# 2014-07-30 v0.32 PL: - added write_sect to write sectors to disk
+#                      - added write_mode option to OleFileIO.__init__ and open
+# 2014-07-31       PL: - fixed padding in write_sect for Python 3, added checks
+#                      - added write_stream to write a stream to disk
+# 2014-09-26 v0.40 PL: - renamed OleFileIO_PL to olefile
+# 2014-11-09       NE: - added support for Jython (Niko Ehrenfeuchter)
+# 2014-11-13 v0.41 PL: - improved isOleFile and OleFileIO.open to support OLE
+#                        data in a string buffer and file-like objects.
+# 2014-11-21       PL: - updated comments according to Pillow's commits
+# 2015-01-24 v0.42 PL: - changed the default path name encoding from Latin-1
+#                        to UTF-8 on Python 2.x (Unicode on Python 3.x)
+#                      - added path_encoding option to override the default
+#                      - fixed a bug in _list when a storage is empty
 
 #-----------------------------------------------------------------------------
 # TODO (for version 1.0):
-# + isOleFile should accept file-like objects like open
+# + get rid of print statements, to simplify Python 2.x and 3.x support
+# + add is_stream and is_storage
+# + remove leading and trailing slashes where a path is used
+# + add functions path_list2str and path_str2list
 # + fix how all the methods handle unicode str and/or bytes as arguments
 # + add path attrib to _OleDirEntry, set it once and for all in init or
 #   append_kids (then listdir/_list can be simplified)
@@ -177,30 +219,16 @@ __version__ = '0.30'
 # - move all debug code (and maybe dump methods) to a separate module, with
 #   a class which inherits OleFileIO ?
 # - fix docstrings to follow epydoc format
-# - add support for 4K sectors ?
 # - add support for big endian byte order ?
 # - create a simple OLE explorer with wxPython
 
 # FUTURE EVOLUTIONS to add write support:
-# 1) add ability to write a stream back on disk from BytesIO (same size, no
-#    change in FAT/MiniFAT).
-# 2) rename a stream/storage if it doesn't change the RB tree
-# 3) use rbtree module to update the red-black tree + any rename
-# 4) remove a stream/storage: free sectors in FAT/MiniFAT
-# 5) allocate new sectors in FAT/MiniFAT
-# 6) create new storage/stream
-#-----------------------------------------------------------------------------
+# see issue #6 on Bitbucket:
+# https://bitbucket.org/decalage/olefileio_pl/issue/6/improve-olefileio_pl-to-write-ole-files
+
+#-----------------------------------------------------------------------------
+# NOTES from PIL 1.1.6:
 
-#
-# THIS IS WORK IN PROGRESS
-#
-# The Python Imaging Library
-# $Id$
-#
-# stuff to deal with OLE2 Structured Storage files.  this module is
-# used by PIL to read Image Composer and FlashPix files, but can also
-# be used to read other files of this type.
-#
 # History:
 # 1997-01-20 fl   Created
 # 1997-01-22 fl   Fixed 64-bit portability quirk
@@ -222,25 +250,19 @@ __version__ = '0.30'
 # "If this document and functionality of the Software conflict,
 #  the actual functionality of the Software represents the correct
 #  functionality" -- Microsoft, in the OLE format specification
-#
-# Copyright (c) Secret Labs AB 1997.
-# Copyright (c) Fredrik Lundh 1997.
-#
-# See the README file for information on usage and redistribution.
-#
 
 #------------------------------------------------------------------------------
 
 
 import io
 import sys
-import struct
-import array
-import os.path
-import datetime
+import struct, array, os.path, datetime
+
+#=== COMPATIBILITY WORKAROUNDS ================================================
 
 #[PL] Define explicitly the public API to avoid private objects in pydoc:
-__all__ = ['OleFileIO', 'isOleFile', 'MAGIC']
+#TODO: add more
+# __all__ = ['OleFileIO', 'isOleFile', 'MAGIC']
 
 # For Python 3.x, need to redefine long as int:
 if str is not bytes:
@@ -261,39 +283,66 @@ if array.array('L').itemsize == 4:
 elif array.array('I').itemsize == 4:
     # on 64 bits platforms, integers in an array are 32 bits:
     UINT32 = 'I'
+elif array.array('i').itemsize == 4:
+    # On 64 bit Jython, signed integers ('i') are the only way to store our 32
+    # bit values in an array in a *somewhat* reasonable way, as the otherwise
+    # perfectly suited 'H' (unsigned int, 32 bits) results in a completely
+    # unusable behaviour. This is most likely caused by the fact that Java
+    # doesn't have unsigned values, and thus Jython's "array" implementation,
+    # which is based on "jarray", doesn't have them either.
+    # NOTE: to trick Jython into converting the values it would normally
+    # interpret as "signed" into "unsigned", a binary-and operation with
+    # 0xFFFFFFFF can be used. This way it is possible to use the same comparing
+    # operations on all platforms / implementations. The corresponding code
+    # lines are flagged with a 'JYTHON-WORKAROUND' tag below.
+    UINT32 = 'i'
 else:
     raise ValueError('Need to fix a bug with 32 bit arrays, please contact author...')
 
 
 #[PL] These workarounds were inspired from the Path module
 # (see http://www.jorendorff.com/articles/python/path/)
+#TODO: test with old Python versions
+
+# Pre-2.3 workaround for basestring.
 try:
     basestring
 except NameError:
-    basestring = str
+    try:
+        # is Unicode supported (Python >2.0 or >1.6 ?)
+        basestring = (str, unicode)
+    except NameError:
+        basestring = str
 
 #[PL] Experimental setting: if True, OLE filenames will be kept in Unicode
 # if False (default PIL behaviour), all filenames are converted to Latin-1.
-KEEP_UNICODE_NAMES = False
+KEEP_UNICODE_NAMES = True
+
+if sys.version_info[0] < 3:
+    # On Python 2.x, the default encoding for path names is UTF-8:
+    DEFAULT_PATH_ENCODING = 'utf-8'
+else:
+    # On Python 3.x, the default encoding for path names is Unicode (None):
+    DEFAULT_PATH_ENCODING = None
+
+
+#=== DEBUGGING ===============================================================
+
+#TODO: replace this by proper logging
 
 #[PL] DEBUG display mode: False by default, use set_debug_mode() or "-d" on
 # command line to change it.
 DEBUG_MODE = False
-
-
 def debug_print(msg):
     print(msg)
-
-
 def debug_pass(msg):
     pass
 debug = debug_pass
 
-
 def set_debug_mode(debug_mode):
     """
     Set debug mode on or off, to control display of debugging messages.
-    mode: True or False
+    :param mode: True or False
     """
     global DEBUG_MODE, debug
     DEBUG_MODE = debug_mode
@@ -302,26 +351,30 @@ def set_debug_mode(debug_mode):
     else:
         debug = debug_pass
 
+
+#=== CONSTANTS ===============================================================
+
+# magic bytes that should be at the beginning of every OLE file:
 MAGIC = b'\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1'
 
-# [PL]: added constants for Sector IDs (from AAF specifications)
-MAXREGSECT = 0xFFFFFFFA; # maximum SECT
-DIFSECT    = 0xFFFFFFFC; # (-4) denotes a DIFAT sector in a FAT
-FATSECT    = 0xFFFFFFFD; # (-3) denotes a FAT sector in a FAT
-ENDOFCHAIN = 0xFFFFFFFE; # (-2) end of a virtual stream chain
-FREESECT   = 0xFFFFFFFF; # (-1) unallocated sector
+#[PL]: added constants for Sector IDs (from AAF specifications)
+MAXREGSECT = 0xFFFFFFFA # (-6) maximum SECT
+DIFSECT    = 0xFFFFFFFC # (-4) denotes a DIFAT sector in a FAT
+FATSECT    = 0xFFFFFFFD # (-3) denotes a FAT sector in a FAT
+ENDOFCHAIN = 0xFFFFFFFE # (-2) end of a virtual stream chain
+FREESECT   = 0xFFFFFFFF # (-1) unallocated sector
 
-# [PL]: added constants for Directory Entry IDs (from AAF specifications)
-MAXREGSID  = 0xFFFFFFFA; # maximum directory entry ID
-NOSTREAM   = 0xFFFFFFFF; # (-1) unallocated directory entry
+#[PL]: added constants for Directory Entry IDs (from AAF specifications)
+MAXREGSID  = 0xFFFFFFFA # (-6) maximum directory entry ID
+NOSTREAM   = 0xFFFFFFFF # (-1) unallocated directory entry
 
-# [PL] object types in storage (from AAF specifications)
-STGTY_EMPTY     = 0  # empty directory entry (according to OpenOffice.org doc)
-STGTY_STORAGE   = 1  # element is a storage object
-STGTY_STREAM    = 2  # element is a stream object
-STGTY_LOCKBYTES = 3  # element is an ILockBytes object
-STGTY_PROPERTY  = 4  # element is an IPropertyStorage object
-STGTY_ROOT      = 5  # element is a root storage
+#[PL] object types in storage (from AAF specifications)
+STGTY_EMPTY     = 0 # empty directory entry (according to OpenOffice.org doc)
+STGTY_STORAGE   = 1 # element is a storage object
+STGTY_STREAM    = 2 # element is a stream object
+STGTY_LOCKBYTES = 3 # element is an ILockBytes object
+STGTY_PROPERTY  = 4 # element is an IPropertyStorage object
+STGTY_ROOT      = 5 # element is a root storage
 
 
 #
@@ -353,30 +406,52 @@ WORD_CLSID = "00020900-0000-0000-C000-000000000046"
 #TODO: check Excel, PPT, ...
 
 #[PL]: Defect levels to classify parsing errors - see OleFileIO._raise_defect()
-DEFECT_UNSURE =    10   # a case which looks weird, but not sure it's a defect
-DEFECT_POTENTIAL = 20   # a potential defect
-DEFECT_INCORRECT = 30   # an error according to specifications, but parsing
-                        # can go on
-DEFECT_FATAL =     40   # an error which cannot be ignored, parsing is
-                        # impossible
+DEFECT_UNSURE =    10    # a case which looks weird, but not sure it's a defect
+DEFECT_POTENTIAL = 20    # a potential defect
+DEFECT_INCORRECT = 30    # an error according to specifications, but parsing
+                         # can go on
+DEFECT_FATAL =     40    # an error which cannot be ignored, parsing is
+                         # impossible
+
+# Minimal size of an empty OLE file, with 512-bytes sectors = 1536 bytes
+# (this is used in isOleFile and OleFile.open)
+MINIMAL_OLEFILE_SIZE = 1536
 
 #[PL] add useful constants to __all__:
-for key in list(vars().keys()):
-    if key.startswith('STGTY_') or key.startswith('DEFECT_'):
-        __all__.append(key)
+# for key in list(vars().keys()):
+#     if key.startswith('STGTY_') or key.startswith('DEFECT_'):
+#         __all__.append(key)
 
 
-#--- FUNCTIONS ----------------------------------------------------------------
+#=== FUNCTIONS ===============================================================
 
 def isOleFile (filename):
     """
-    Test if file is an OLE container (according to its header).
+    Test if a file is an OLE container (according to the magic bytes in its header).
+
+    :param filename: string-like or file-like object, OLE file to parse
+
+        - if filename is a string smaller than 1536 bytes, it is the path
+          of the file to open. (bytes or unicode string)
+        - if filename is a string longer than 1535 bytes, it is parsed
+          as the content of an OLE file in memory. (bytes type only)
+        - if filename is a file-like object (with read and seek methods),
+          it is parsed as-is.
 
-    :param filename: file name or path (str, unicode)
     :returns: True if OLE, False otherwise.
     """
-    f = open(filename, 'rb')
-    header = f.read(len(MAGIC))
+    # check if filename is a string-like or file-like object:
+    if hasattr(filename, 'read'):
+        # file-like object: use it directly
+        header = filename.read(len(MAGIC))
+        # just in case, seek back to start of file:
+        filename.seek(0)
+    elif isinstance(filename, bytes) and len(filename) >= MINIMAL_OLEFILE_SIZE:
+        # filename is a bytes string containing the OLE file to be parsed:
+        header = filename[:len(MAGIC)]
+    else:
+        # string-like object: filename of file on disk
+        header = open(filename, 'rb').read(len(MAGIC))
     if header == MAGIC:
         return True
     else:
@@ -434,41 +509,17 @@ def _clsid(clsid):
             tuple(map(i8, clsid[8:16]))))
 
 
-# UNICODE support:
-# (necessary to handle storages/streams names which use Unicode)
-
-def _unicode(s, errors='replace'):
-    """
-    Map unicode string to Latin 1. (Python with Unicode support)
-
-    :param s: UTF-16LE unicode string to convert to Latin-1
-    :param errors: 'replace', 'ignore' or 'strict'.
-    """
-    #TODO: test if it OleFileIO works with Unicode strings, instead of
-    #      converting to Latin-1.
-    try:
-        # First the string is converted to plain Unicode:
-        # (assuming it is encoded as UTF-16 little-endian)
-        u = s.decode('UTF-16LE', errors)
-        if bytes is not str or KEEP_UNICODE_NAMES:
-            return u
-        else:
-            # Second the unicode string is converted to Latin-1
-            return u.encode('latin_1', errors)
-    except:
-        # there was an error during Unicode to Latin-1 conversion:
-        raise IOError('incorrect Unicode name')
-
 
 def filetime2datetime(filetime):
-    """
-    convert FILETIME (64 bits int) to Python datetime.datetime
-    """
-    # TODO: manage exception when microseconds is too large
-    # inspired from http://code.activestate.com/recipes/511425-filetime-to-datetime/
-    _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0)
-    #debug('timedelta days=%d' % (filetime//(10*1000000*3600*24)))
-    return _FILETIME_null_date + datetime.timedelta(microseconds=filetime//10)
+        """
+        convert FILETIME (64 bits int) to Python datetime.datetime
+        """
+        # TODO: manage exception when microseconds is too large
+        # inspired from http://code.activestate.com/recipes/511425-filetime-to-datetime/
+        _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0)
+        #debug('timedelta days=%d' % (filetime//(10*1000000*3600*24)))
+        return _FILETIME_null_date + datetime.timedelta(microseconds=filetime//10)
+
 
 
 #=== CLASSES ==================================================================
@@ -578,6 +629,7 @@ class OleMetadata:
         self.language = None
         self.doc_version = None
 
+
     def parse_properties(self, olefile):
         """
         Parse standard properties of an OLE file, from the streams
@@ -639,6 +691,7 @@ class _OleStream(io.BytesIO):
     fat table arguments.
 
     Attributes:
+
         - size: actual size of data stream, after it was opened.
     """
 
@@ -650,18 +703,18 @@ class _OleStream(io.BytesIO):
         """
         Constructor for _OleStream class.
 
-        :param fp        : file object, the OLE container or the MiniFAT stream
-        :param sect      : sector index of first sector in the stream
-        :param size      : total size of the stream
-        :param offset    : offset in bytes for the first FAT or MiniFAT sector
+        :param fp: file object, the OLE container or the MiniFAT stream
+        :param sect: sector index of first sector in the stream
+        :param size: total size of the stream
+        :param offset: offset in bytes for the first FAT or MiniFAT sector
         :param sectorsize: size of one sector
-        :param fat       : array/list of sector indexes (FAT or MiniFAT)
-        :param filesize  : size of OLE file (for debugging)
-        :returns    : a BytesIO instance containing the OLE stream
+        :param fat: array/list of sector indexes (FAT or MiniFAT)
+        :param filesize: size of OLE file (for debugging)
+        :returns: a BytesIO instance containing the OLE stream
         """
         debug('_OleStream.__init__:')
         debug('  sect=%d (%X), size=%d, offset=%d, sectorsize=%d, len(fat)=%d, fp=%s'
-            %(sect, sect, size, offset, sectorsize, len(fat), repr(fp)))
+            %(sect,sect,size,offset,sectorsize,len(fat), repr(fp)))
         #[PL] To detect malformed documents with FAT loops, we compute the
         # expected number of sectors in the stream:
         unknown_size = False
@@ -729,7 +782,7 @@ class _OleStream(io.BytesIO):
             data.append(sector_data)
             # jump to next sector in the FAT:
             try:
-                sect = fat[sect]
+                sect = fat[sect] & 0xFFFFFFFF  # JYTHON-WORKAROUND
             except IndexError:
                 # [PL] if pointer is out of the FAT an exception is raised
                 raise IOError('incorrect OLE FAT, sector index out of range')
@@ -787,6 +840,7 @@ class _OleDirectoryEntry:
     DIRENTRY_SIZE = 128
     assert struct.calcsize(STRUCT_DIRENTRY) == DIRENTRY_SIZE
 
+
     def __init__(self, entry, sid, olefile):
         """
         Constructor for an _OleDirectoryEntry object.
@@ -842,8 +896,11 @@ class _OleDirectoryEntry:
             namelength = 64
         # only characters without ending null char are kept:
         name = name[:(namelength-2)]
-        # name is converted from unicode to Latin-1:
-        self.name = _unicode(name)
+        #TODO: check if the name is actually followed by a null unicode character ([MS-CFB] 2.6.1)
+        #TODO: check if the name does not contain forbidden characters:
+        # [MS-CFB] 2.6.1: "The following characters are illegal and MUST NOT be part of the name: '/', '\', ':', '!'."
+        # name is converted from UTF-16LE to the path encoding specified in the OleFileIO:
+        self.name = olefile._decode_utf16_str(name)
 
         debug('DirEntry SID=%d: %s' % (self.sid, repr(self.name)))
         debug(' - type: %d' % self.entry_type)
@@ -879,6 +936,8 @@ class _OleDirectoryEntry:
                 minifat = False
             olefile._check_duplicate_stream(self.isectStart, minifat)
 
+
+
     def build_storage_tree(self):
         """
         Read and build the red-black tree attached to this _OleDirectoryEntry
@@ -902,15 +961,16 @@ class _OleDirectoryEntry:
             # (see rich comparison methods in this class)
             self.kids.sort()
 
+
     def append_kids(self, child_sid):
         """
         Walk through red-black tree of children of this directory entry to add
         all of them to the kids list. (recursive method)
 
-        child_sid : index of child directory entry to use, or None when called
-                    first time for the root. (only used during recursion)
+        :param child_sid : index of child directory entry to use, or None when called
+            first time for the root. (only used during recursion)
         """
-        # [PL] this method was added to use simple recursion instead of a complex
+        #[PL] this method was added to use simple recursion instead of a complex
         # algorithm.
         # if this is not a storage or a leaf of the tree, nothing to do:
         if child_sid == NOSTREAM:
@@ -945,6 +1005,7 @@ class _OleDirectoryEntry:
         # Afterwards build kid's own tree if it's also a storage:
         child.build_storage_tree()
 
+
     def __eq__(self, other):
         "Compare entries by name"
         return self.name == other.name
@@ -964,6 +1025,7 @@ class _OleDirectoryEntry:
     #TODO: replace by the same function as MS implementation ?
     # (order by name length first, then case-insensitive order)
 
+
     def dump(self, tab = 0):
         "Dump this entry, and all its subentries (for debug purposes only)"
         TYPES = ["(invalid)", "(storage)", "(stream)", "(lockbytes)",
@@ -978,12 +1040,13 @@ class _OleDirectoryEntry:
         for kid in self.kids:
             kid.dump(tab + 2)
 
+
     def getmtime(self):
         """
         Return modification time of a directory entry.
 
         :returns: None if modification time is null, a python datetime object
-        otherwise (UTC timezone)
+            otherwise (UTC timezone)
 
         new in version 0.26
         """
@@ -991,12 +1054,13 @@ class _OleDirectoryEntry:
             return None
         return filetime2datetime(self.modifyTime)
 
+
     def getctime(self):
         """
         Return creation time of a directory entry.
 
         :returns: None if modification time is null, a python datetime object
-        otherwise (UTC timezone)
+            otherwise (UTC timezone)
 
         new in version 0.26
         """
@@ -1012,8 +1076,7 @@ class OleFileIO:
     OLE container object
 
     This class encapsulates the interface to an OLE 2 structured
-    storage file.  Use the :py:meth:`~PIL.OleFileIO.OleFileIO.listdir` and
-    :py:meth:`~PIL.OleFileIO.OleFileIO.openstream` methods to
+    storage file.  Use the listdir and openstream methods to
     access the contents of this file.
 
     Object names are given as a list of strings, one for each subentry
@@ -1037,22 +1100,47 @@ class OleFileIO:
     TIFF files).
     """
 
-    def __init__(self, filename = None, raise_defects=DEFECT_FATAL):
+    def __init__(self, filename=None, raise_defects=DEFECT_FATAL,
+                 write_mode=False, debug=False, path_encoding=DEFAULT_PATH_ENCODING):
         """
-        Constructor for OleFileIO class.
+        Constructor for the OleFileIO class.
 
         :param filename: file to open.
+
+            - if filename is a string smaller than 1536 bytes, it is the path
+              of the file to open. (bytes or unicode string)
+            - if filename is a string longer than 1535 bytes, it is parsed
+              as the content of an OLE file in memory. (bytes type only)
+            - if filename is a file-like object (with read, seek and tell methods),
+              it is parsed as-is.
+
         :param raise_defects: minimal level for defects to be raised as exceptions.
-        (use DEFECT_FATAL for a typical application, DEFECT_INCORRECT for a
-        security-oriented application, see source code for details)
+            (use DEFECT_FATAL for a typical application, DEFECT_INCORRECT for a
+            security-oriented application, see source code for details)
+
+        :param write_mode: bool, if True the file is opened in read/write mode instead
+            of read-only by default.
+
+        :param debug: bool, set debug mode
+
+        :param path_encoding: None or str, name of the codec to use for path
+            names (streams and storages), or None for Unicode.
+            Unicode by default on Python 3+, UTF-8 on Python 2.x.
+            (new in olefile 0.42, was hardcoded to Latin-1 until olefile v0.41)
         """
+        set_debug_mode(debug)
         # minimal level for defects to be raised as exceptions:
         self._raise_defects_level = raise_defects
         # list of defects/issues not raised as exceptions:
         # tuples of (exception type, message)
         self.parsing_issues = []
+        self.write_mode = write_mode
+        self.path_encoding = path_encoding
+        self._filesize = None
+        self.fp = None
         if filename:
-            self.open(filename)
+            self.open(filename, write_mode=write_mode)
+
 
     def _raise_defect(self, defect_level, message, exception_type=IOError):
         """
@@ -1061,10 +1149,12 @@ class OleFileIO:
         for the OleFileIO object.
 
         :param defect_level: defect level, possible values are:
-            DEFECT_UNSURE    : a case which looks weird, but not sure it's a defect
-            DEFECT_POTENTIAL : a potential defect
-            DEFECT_INCORRECT : an error according to specifications, but parsing can go on
-            DEFECT_FATAL     : an error which cannot be ignored, parsing is impossible
+
+            - DEFECT_UNSURE    : a case which looks weird, but not sure it's a defect
+            - DEFECT_POTENTIAL : a potential defect
+            - DEFECT_INCORRECT : an error according to specifications, but parsing can go on
+            - DEFECT_FATAL     : an error which cannot be ignored, parsing is impossible
+
         :param message: string describing the defect, used with raised exception.
         :param exception_type: exception class to be raised, IOError by default
         """
@@ -1075,31 +1165,70 @@ class OleFileIO:
             # just record the issue, no exception raised:
             self.parsing_issues.append((exception_type, message))
 
-    def open(self, filename):
-        """
-        Open an OLE2 file.
-        Reads the header, FAT and directory.
 
-        :param filename: string-like or file-like object
+    def _decode_utf16_str(self, utf16_str, errors='replace'):
         """
+        Decode a string encoded in UTF-16 LE format, as found in the OLE
+        directory or in property streams. Return a string encoded
+        according to the path_encoding specified for the OleFileIO object.
+
+        :param utf16_str: bytes string encoded in UTF-16 LE format
+        :param errors: str, see python documentation for str.decode()
+        :return: str, encoded according to path_encoding
+        """
+        unicode_str = utf16_str.decode('UTF-16LE', errors)
+        if self.path_encoding:
+            # an encoding has been specified for path names:
+            return unicode_str.encode(self.path_encoding, errors)
+        else:
+            # path_encoding=None, return the Unicode string as-is:
+            return unicode_str
+
+
+    def open(self, filename, write_mode=False):
+        """
+        Open an OLE2 file in read-only or read/write mode.
+        Read and parse the header, FAT and directory.
+
+        :param filename: string-like or file-like object, OLE file to parse
+
+            - if filename is a string smaller than 1536 bytes, it is the path
+              of the file to open. (bytes or unicode string)
+            - if filename is a string longer than 1535 bytes, it is parsed
+              as the content of an OLE file in memory. (bytes type only)
+            - if filename is a file-like object (with read, seek and tell methods),
+              it is parsed as-is.
+
+        :param write_mode: bool, if True the file is opened in read/write mode instead
+            of read-only by default. (ignored if filename is not a path)
+        """
+        self.write_mode = write_mode
         #[PL] check if filename is a string-like or file-like object:
         # (it is better to check for a read() method)
         if hasattr(filename, 'read'):
-            # file-like object
+            #TODO: also check seek and tell methods?
+            # file-like object: use it directly
             self.fp = filename
+        elif isinstance(filename, bytes) and len(filename) >= MINIMAL_OLEFILE_SIZE:
+            # filename is a bytes string containing the OLE file to be parsed:
+            # convert it to BytesIO
+            self.fp = io.BytesIO(filename)
         else:
             # string-like object: filename of file on disk
-            #TODO: if larger than 1024 bytes, this could be the actual data => BytesIO
-            self.fp = open(filename, "rb")
-        # old code fails if filename is not a plain string:
-        #if isinstance(filename, (bytes, basestring)):
-        #    self.fp = open(filename, "rb")
-        #else:
-        #    self.fp = filename
+            if self.write_mode:
+                # open file in mode 'read with update, binary'
+                # According to https://docs.python.org/2/library/functions.html#open
+                # 'w' would truncate the file, 'a' may only append on some Unixes
+                mode = 'r+b'
+            else:
+                # read-only mode by default
+                mode = 'rb'
+            self.fp = open(filename, mode)
         # obtain the filesize by using seek and tell, which should work on most
         # file-like objects:
         #TODO: do it above, using getsize with filename when possible?
         #TODO: fix code to fail with clear exception when filesize cannot be obtained
+        filesize=0
         self.fp.seek(0, os.SEEK_END)
         try:
             filesize = self.fp.tell()
@@ -1177,7 +1306,7 @@ class OleFileIO:
             self.sectDifStart,
             self.csectDif
         ) = struct.unpack(fmt_header, header1)
-        debug(struct.unpack(fmt_header, header1))
+        debug( struct.unpack(fmt_header,    header1))
 
         if self.Sig != MAGIC:
             # OLE signature should always be present
@@ -1196,6 +1325,7 @@ class OleFileIO:
             # For now only common little-endian documents are handled correctly
             self._raise_defect(DEFECT_FATAL, "incorrect ByteOrder in OLE header")
             # TODO: add big-endian support for documents created on Mac ?
+            # But according to [MS-CFB] ? v20140502, ByteOrder MUST be 0xFFFE.
         self.SectorSize = 2**self.SectorShift
         debug( "SectorSize   = %d" % self.SectorSize )
         if self.SectorSize not in [512, 4096]:
@@ -1210,28 +1340,44 @@ class OleFileIO:
         if self.Reserved != 0 or self.Reserved1 != 0:
             self._raise_defect(DEFECT_INCORRECT, "incorrect OLE header (non-null reserved bytes)")
         debug( "csectDir     = %d" % self.csectDir )
+        # Number of directory sectors (only allowed if DllVersion != 3)
         if self.SectorSize==512 and self.csectDir!=0:
             self._raise_defect(DEFECT_INCORRECT, "incorrect csectDir in OLE header")
         debug( "csectFat     = %d" % self.csectFat )
+        # csectFat = number of FAT sectors in the file
         debug( "sectDirStart = %X" % self.sectDirStart )
+        # sectDirStart = 1st sector containing the directory
         debug( "signature    = %d" % self.signature )
         # Signature should be zero, BUT some implementations do not follow this
         # rule => only a potential defect:
+        # (according to MS-CFB, may be != 0 for applications supporting file
+        # transactions)
         if self.signature != 0:
             self._raise_defect(DEFECT_POTENTIAL, "incorrect OLE header (signature>0)")
-        debug("MiniSectorCutoff = %d" % self.MiniSectorCutoff)
-        debug("MiniFatStart     = %X" % self.MiniFatStart)
-        debug("csectMiniFat     = %d" % self.csectMiniFat)
-        debug("sectDifStart     = %X" % self.sectDifStart)
-        debug("csectDif         = %d" % self.csectDif)
+        debug( "MiniSectorCutoff = %d" % self.MiniSectorCutoff )
+        # MS-CFB: This integer field MUST be set to 0x00001000. This field
+        # specifies the maximum size of a user-defined data stream allocated
+        # from the mini FAT and mini stream, and that cutoff is 4096 bytes.
+        # Any user-defined data stream larger than or equal to this cutoff size
+        # must be allocated as normal sectors from the FAT.
+        if self.MiniSectorCutoff != 0x1000:
+            self._raise_defect(DEFECT_INCORRECT, "incorrect MiniSectorCutoff in OLE header")
+        debug( "MiniFatStart     = %X" % self.MiniFatStart )
+        debug( "csectMiniFat     = %d" % self.csectMiniFat )
+        debug( "sectDifStart     = %X" % self.sectDifStart )
+        debug( "csectDif         = %d" % self.csectDif )
 
         # calculate the number of sectors in the file
         # (-1 because header doesn't count)
         self.nb_sect = ( (filesize + self.SectorSize-1) // self.SectorSize) - 1
         debug( "Number of sectors in the file: %d" % self.nb_sect )
+        #TODO: change this test, because an OLE file MAY contain other data
+        # after the last sector.
 
-        # file clsid (probably never used, so we don't store it)
-        #clsid = _clsid(header[8:24])
+        # file clsid
+        self.clsid = _clsid(header[8:24])
+
+        #TODO: remove redundant attributes, and fix the code which uses them?
         self.sectorsize = self.SectorSize #1 << i16(header, 30)
         self.minisectorsize = self.MiniSectorSize  #1 << i16(header, 32)
         self.minisectorcutoff = self.MiniSectorCutoff # i32(header, 56)
@@ -1254,19 +1400,22 @@ class OleFileIO:
         self.ministream = None
         self.minifatsect = self.MiniFatStart #i32(header, 60)
 
+
     def close(self):
         """
         close the OLE file, to release the file object
         """
         self.fp.close()
 
+
     def _check_duplicate_stream(self, first_sect, minifat=False):
         """
         Checks if a stream has not been already referenced elsewhere.
         This method should only be called once for each known stream, and only
         if stream size is not null.
-        :param first_sect: index of first sector of the stream in FAT
-        :param minifat: if True, stream is located in the MiniFAT, else in the FAT
+
+        :param first_sect: int, index of first sector of the stream in FAT
+        :param minifat: bool, if True, stream is located in the MiniFAT, else in the FAT
         """
         if minifat:
             debug('_check_duplicate_stream: sect=%d in MiniFAT' % first_sect)
@@ -1284,13 +1433,14 @@ class OleFileIO:
         else:
             used_streams.append(first_sect)
 
+
     def dumpfat(self, fat, firstindex=0):
         "Displays a part of FAT in human-readable form for debugging purpose"
         # [PL] added only for debug
         if not DEBUG_MODE:
             return
         # dictionary to convert special FAT values in human-readable strings
-        VPL=8 # valeurs par ligne (8+1 * 8+1 = 81)
+        VPL = 8 # values per line (8+1 * 8+1 = 81)
         fatnames = {
             FREESECT:   "..free..",
             ENDOFCHAIN: "[ END. ]",
@@ -1310,22 +1460,26 @@ class OleFileIO:
                 if i>=nbsect:
                     break
                 sect = fat[i]
-                if sect in fatnames:
-                    nom = fatnames[sect]
+                aux = sect & 0xFFFFFFFF  # JYTHON-WORKAROUND
+                if aux in fatnames:
+                    name = fatnames[aux]
                 else:
                     if sect == i+1:
-                        nom = "    --->"
+                        name = "    --->"
                     else:
-                        nom = "%8X" % sect
-                print(nom, end=" ")
+                        name = "%8X" % sect
+                print(name, end=" ")
             print()
 
+
     def dumpsect(self, sector, firstindex=0):
         "Displays a sector in a human-readable form, for debugging purpose."
         if not DEBUG_MODE:
             return
         VPL=8 # number of values per line (8+1 * 8+1 = 81)
         tab = array.array(UINT32, sector)
+        if sys.byteorder == 'big':
+            tab.byteswap()
         nbsect = len(tab)
         nlines = (nbsect+VPL-1)//VPL
         print("index", end=" ")
@@ -1339,8 +1493,8 @@ class OleFileIO:
                 if i>=nbsect:
                     break
                 sect = tab[i]
-                nom = "%8X" % sect
-                print(nom, end=" ")
+                name = "%8X" % sect
+                print(name, end=" ")
             print()
 
     def sect2array(self, sect):
@@ -1354,6 +1508,7 @@ class OleFileIO:
             a.byteswap()
         return a
 
+
     def loadfat_sect(self, sect):
         """
         Adds the indexes of the given sector to the FAT
@@ -1371,9 +1526,11 @@ class OleFileIO:
             self.dumpsect(sect)
         # The FAT is a sector chain starting at the first index of itself.
         for isect in fat1:
-            #print("isect = %X" % isect)
+            isect = isect & 0xFFFFFFFF  # JYTHON-WORKAROUND
+            debug("isect = %X" % isect)
             if isect == ENDOFCHAIN or isect == FREESECT:
                 # the end of the sector chain has been reached
+                debug("found end of sector chain")
                 break
             # read the FAT sector
             s = self.getsect(isect)
@@ -1383,13 +1540,15 @@ class OleFileIO:
             self.fat = self.fat + nextfat
         return isect
 
+
     def loadfat(self, header):
         """
         Load the FAT table.
         """
-        # The header contains a sector  numbers
-        # for the first 109 FAT sectors.  Additional sectors are
-        # described by DIF blocks
+        # The 1st sector of the file contains sector numbers for the first 109
+        # FAT sectors, right after the header which is 76 bytes long.
+        # (always 109, whatever the sector size: 512 bytes = 76+4*109)
+        # Additional sectors are described by DIF blocks
 
         sect = header[76:512]
         debug( "len(sect)=%d, so %d integers" % (len(sect), len(sect)//4) )
@@ -1418,24 +1577,27 @@ class OleFileIO:
             if self.sectDifStart >= self.nb_sect:
                 # initial DIFAT block index must be valid
                 self._raise_defect(DEFECT_FATAL, 'incorrect DIFAT, first index out of range')
-            debug("DIFAT analysis...")
+            debug( "DIFAT analysis..." )
             # We compute the necessary number of DIFAT sectors :
-            # (each DIFAT sector = 127 pointers + 1 towards next DIFAT sector)
-            nb_difat = (self.csectFat-109 + 126)//127
-            debug("nb_difat = %d" % nb_difat)
+            # Number of pointers per DIFAT sector = (sectorsize/4)-1
+            # (-1 because the last pointer is the next DIFAT sector number)
+            nb_difat_sectors = (self.sectorsize//4)-1
+            # (if 512 bytes: each DIFAT sector = 127 pointers + 1 towards next DIFAT sector)
+            nb_difat = (self.csectFat-109 + nb_difat_sectors-1)//nb_difat_sectors
+            debug( "nb_difat = %d" % nb_difat )
             if self.csectDif != nb_difat:
                 raise IOError('incorrect DIFAT')
             isect_difat = self.sectDifStart
             for i in iterrange(nb_difat):
-                debug("DIFAT block %d, sector %X" % (i, isect_difat))
+                debug( "DIFAT block %d, sector %X" % (i, isect_difat) )
                 #TODO: check if corresponding FAT SID = DIFSECT
                 sector_difat = self.getsect(isect_difat)
                 difat = self.sect2array(sector_difat)
                 self.dumpsect(sector_difat)
-                self.loadfat_sect(difat[:127])
+                self.loadfat_sect(difat[:nb_difat_sectors])
                 # last DIFAT pointer is next DIFAT sector:
-                isect_difat = difat[127]
-                debug("next DIFAT sector: %X" % isect_difat)
+                isect_difat = difat[nb_difat_sectors]
+                debug( "next DIFAT sector: %X" % isect_difat )
             # checks:
             if isect_difat not in [ENDOFCHAIN, FREESECT]:
                 # last DIFAT pointer value must be ENDOFCHAIN or FREESECT
@@ -1453,6 +1615,7 @@ class OleFileIO:
         debug('\nFAT:')
         self.dumpfat(self.fat)
 
+
     def loadminifat(self):
         """
         Load the MiniFAT table.
@@ -1491,10 +1654,16 @@ class OleFileIO:
         """
         Read given sector from file on disk.
 
-        :param sect: sector index
+        :param sect: int, sector index
         :returns: a string containing the sector data.
         """
-        # [PL] this original code was wrong when sectors are 4KB instead of
+        # From [MS-CFB]: A sector number can be converted into a byte offset
+        # into the file by using the following formula:
+        # (sector number + 1) x Sector Size.
+        # This implies that sector #0 of the file begins at byte offset Sector
+        # Size, not at 0.
+
+        # [PL] the original code in PIL was wrong when sectors are 4KB instead of
         # 512 bytes:
         #self.fp.seek(512 + self.sectorsize * sect)
         #[PL]: added safety checks:
@@ -1512,6 +1681,34 @@ class OleFileIO:
             self._raise_defect(DEFECT_FATAL, 'incomplete OLE sector')
         return sector
 
+
+    def write_sect(self, sect, data, padding=b'\x00'):
+        """
+        Write given sector to file on disk.
+
+        :param sect: int, sector index
+        :param data: bytes, sector data
+        :param padding: single byte, padding character if data < sector size
+        """
+        if not isinstance(data, bytes):
+            raise TypeError("write_sect: data must be a bytes string")
+        if not isinstance(padding, bytes) or len(padding)!=1:
+            raise TypeError("write_sect: padding must be a bytes string of 1 char")
+        #TODO: we could allow padding=None for no padding at all
+        try:
+            self.fp.seek(self.sectorsize * (sect+1))
+        except:
+            debug('write_sect(): sect=%X, seek=%d, filesize=%d' %
+                (sect, self.sectorsize*(sect+1), self._filesize))
+            self._raise_defect(DEFECT_FATAL, 'OLE sector index out of range')
+        if len(data) < self.sectorsize:
+            # add padding
+            data += padding * (self.sectorsize - len(data))
+        elif len(data) < self.sectorsize:
+            raise ValueError("Data is larger than sector size")
+        self.fp.write(data)
+
+
     def loaddirectory(self, sect):
         """
         Load the directory.
@@ -1541,12 +1738,13 @@ class OleFileIO:
 ##                break
 ##            self.direntries.append(_OleDirectoryEntry(entry, sid, self))
         # load root entry:
-        self._load_direntry(0)
+        root_entry = self._load_direntry(0)
         # Root entry is the first entry:
         self.root = self.direntries[0]
         # read and build all storage trees, starting from the root:
         self.root.build_storage_tree()
 
+
     def _load_direntry (self, sid):
         """
         Load a directory entry from the directory.
@@ -1555,6 +1753,7 @@ class OleFileIO:
 
         :param sid: index of storage/stream in the directory.
         :returns: a _OleDirectoryEntry object
+
         :exception IOError: if the entry has always been referenced.
         """
         # check if SID is OK:
@@ -1571,12 +1770,14 @@ class OleFileIO:
         self.direntries[sid] = _OleDirectoryEntry(entry, sid, self)
         return self.direntries[sid]
 
+
     def dumpdirectory(self):
         """
         Dump directory (for debugging only)
         """
         self.root.dump()
 
+
     def _open(self, start, size = 0x7FFFFFFF, force_FAT=False):
         """
         Open a stream, either in FAT or MiniFAT according to its size.
@@ -1585,7 +1786,7 @@ class OleFileIO:
         :param start: index of first sector
         :param size: size of stream (or nothing if size is unknown)
         :param force_FAT: if False (default), stream will be opened in FAT or MiniFAT
-                   according to size. If True, it will always be opened in FAT.
+            according to size. If True, it will always be opened in FAT.
         """
         debug('OleFileIO.open(): sect=%d, size=%d, force_FAT=%s' %
             (start, size, str(force_FAT)))
@@ -1602,51 +1803,60 @@ class OleFileIO:
                     (self.root.isectStart, size_ministream))
                 self.ministream = self._open(self.root.isectStart,
                     size_ministream, force_FAT=True)
-            return _OleStream(self.ministream, start, size, 0,
-                              self.minisectorsize, self.minifat,
-                              self.ministream.size)
+            return _OleStream(fp=self.ministream, sect=start, size=size,
+                              offset=0, sectorsize=self.minisectorsize,
+                              fat=self.minifat, filesize=self.ministream.size)
         else:
             # standard stream
-            return _OleStream(self.fp, start, size, 512,
-                              self.sectorsize, self.fat, self._filesize)
+            return _OleStream(fp=self.fp, sect=start, size=size,
+                              offset=self.sectorsize,
+                              sectorsize=self.sectorsize, fat=self.fat,
+                              filesize=self._filesize)
+
 
     def _list(self, files, prefix, node, streams=True, storages=False):
         """
-        (listdir helper)
+        listdir helper
+
         :param files: list of files to fill in
         :param prefix: current location in storage tree (list of names)
         :param node: current node (_OleDirectoryEntry object)
         :param streams: bool, include streams if True (True by default) - new in v0.26
         :param storages: bool, include storages if True (False by default) - new in v0.26
-        (note: the root storage is never included)
+            (note: the root storage is never included)
         """
         prefix = prefix + [node.name]
         for entry in node.kids:
-            if entry.kids:
+            if entry.entry_type == STGTY_STORAGE:
                 # this is a storage
                 if storages:
                     # add it to the list
                     files.append(prefix[1:] + [entry.name])
                 # check its kids
                 self._list(files, prefix, entry, streams, storages)
-            else:
+            elif entry.entry_type == STGTY_STREAM:
                 # this is a stream
                 if streams:
                     # add it to the list
                     files.append(prefix[1:] + [entry.name])
+            else:
+                self._raise_defect(DEFECT_INCORRECT, 'The directory tree contains an entry which is not a stream nor a storage.')
+
 
     def listdir(self, streams=True, storages=False):
         """
-        Return a list of streams stored in this file
+        Return a list of streams and/or storages stored in this file
 
         :param streams: bool, include streams if True (True by default) - new in v0.26
         :param storages: bool, include storages if True (False by default) - new in v0.26
             (note: the root storage is never included)
+        :returns: list of stream and/or storage paths
         """
         files = []
         self._list(files, [], self.root, streams, storages)
         return files
 
+
     def _find(self, filename):
         """
         Returns directory entry of given filename. (openstream helper)
@@ -1656,10 +1866,11 @@ class OleFileIO:
 
             - a string using Unix path syntax, for example:
               'storage_1/storage_1.2/stream'
-            - a list of storage filenames, path to the desired stream/storage.
+            - or a list of storage filenames, path to the desired stream/storage.
               Example: ['storage_1', 'storage_1.2', 'stream']
+
         :returns: sid of requested filename
-        raise IOError if file not found
+        :exception IOError: if file not found
         """
 
         # if filename is a string instead of a list, split it on slashes to
@@ -1677,15 +1888,17 @@ class OleFileIO:
             node = kid
         return node.sid
 
+
     def openstream(self, filename):
         """
         Open a stream as a read-only file object (BytesIO).
+        Note: filename is case-insensitive.
 
         :param filename: path of stream in storage tree (except root entry), either:
 
             - a string using Unix path syntax, for example:
               'storage_1/storage_1.2/stream'
-            - a list of storage filenames, path to the desired stream/storage.
+            - or a list of storage filenames, path to the desired stream/storage.
               Example: ['storage_1', 'storage_1.2', 'stream']
 
         :returns: file object (read-only)
@@ -1697,6 +1910,68 @@ class OleFileIO:
             raise IOError("this file is not a stream")
         return self._open(entry.isectStart, entry.size)
 
+
+    def write_stream(self, stream_name, data):
+        """
+        Write a stream to disk. For now, it is only possible to replace an
+        existing stream by data of the same size.
+
+        :param stream_name: path of stream in storage tree (except root entry), either:
+
+            - a string using Unix path syntax, for example:
+              'storage_1/storage_1.2/stream'
+            - or a list of storage filenames, path to the desired stream/storage.
+              Example: ['storage_1', 'storage_1.2', 'stream']
+
+        :param data: bytes, data to be written, must be the same size as the original
+            stream.
+        """
+        if not isinstance(data, bytes):
+            raise TypeError("write_stream: data must be a bytes string")
+        sid = self._find(stream_name)
+        entry = self.direntries[sid]
+        if entry.entry_type != STGTY_STREAM:
+            raise IOError("this is not a stream")
+        size = entry.size
+        if size != len(data):
+            raise ValueError("write_stream: data must be the same size as the existing stream")
+        if size < self.minisectorcutoff:
+            raise NotImplementedError("Writing a stream in MiniFAT is not implemented yet")
+        sect = entry.isectStart
+        # number of sectors to write
+        nb_sectors = (size + (self.sectorsize-1)) // self.sectorsize
+        debug('nb_sectors = %d' % nb_sectors)
+        for i in range(nb_sectors):
+##            try:
+##                self.fp.seek(offset + self.sectorsize * sect)
+##            except:
+##                debug('sect=%d, seek=%d' %
+##                    (sect, offset+self.sectorsize*sect))
+##                raise IOError('OLE sector index out of range')
+            # extract one sector from data, the last one being smaller:
+            if i<(nb_sectors-1):
+                data_sector = data [i*self.sectorsize : (i+1)*self.sectorsize]
+                #TODO: comment this if it works
+                assert(len(data_sector)==self.sectorsize)
+            else:
+                data_sector = data [i*self.sectorsize:]
+                #TODO: comment this if it works
+                debug('write_stream: size=%d sectorsize=%d data_sector=%d size%%sectorsize=%d'
+                    % (size, self.sectorsize, len(data_sector), size % self.sectorsize))
+                assert(len(data_sector) % self.sectorsize==size % self.sectorsize)
+            self.write_sect(sect, data_sector)
+##            self.fp.write(data_sector)
+            # jump to next sector in the FAT:
+            try:
+                sect = self.fat[sect]
+            except IndexError:
+                # [PL] if pointer is out of the FAT an exception is raised
+                raise IOError('incorrect OLE FAT, sector index out of range')
+        #[PL] Last sector should be a "end of chain" marker:
+        if sect != ENDOFCHAIN:
+            raise IOError('incorrect last sector index in OLE stream')
+
+
     def get_type(self, filename):
         """
         Test if given filename exists as a stream or a storage in the OLE
@@ -1716,6 +1991,7 @@ class OleFileIO:
         except:
             return False
 
+
     def getmtime(self, filename):
         """
         Return modification time of a stream/storage.
@@ -1731,6 +2007,7 @@ class OleFileIO:
         entry = self.direntries[sid]
         return entry.getmtime()
 
+
     def getctime(self, filename):
         """
         Return creation time of a stream/storage.
@@ -1746,20 +2023,23 @@ class OleFileIO:
         entry = self.direntries[sid]
         return entry.getctime()
 
+
     def exists(self, filename):
         """
         Test if given filename exists as a stream or a storage in the OLE
         container.
+        Note: filename is case-insensitive.
 
         :param filename: path of stream in storage tree. (see openstream for syntax)
         :returns: True if object exist, else False.
         """
         try:
-            self._find(filename)
+            sid = self._find(filename)
             return True
         except:
             return False
 
+
     def get_size(self, filename):
         """
         Return size of a stream in the OLE container, in bytes.
@@ -1767,7 +2047,7 @@ class OleFileIO:
         :param filename: path of stream in storage tree (see openstream for syntax)
         :returns: size in bytes (long integer)
         :exception IOError: if file not found
-        :exception TypeError: if this is not a stream
+        :exception TypeError: if this is not a stream.
         """
         sid = self._find(filename)
         entry = self.direntries[sid]
@@ -1776,6 +2056,7 @@ class OleFileIO:
             raise TypeError('object is not an OLE stream')
         return entry.size
 
+
     def get_rootentry_name(self):
         """
         Return root entry name. Should usually be 'Root Entry' or 'R' in most
@@ -1783,6 +2064,7 @@ class OleFileIO:
         """
         return self.root.name
 
+
     def getproperties(self, filename, convert_time=False, no_conversion=None):
         """
         Return properties described in substream.
@@ -1791,10 +2073,12 @@ class OleFileIO:
         :param convert_time: bool, if True timestamps will be converted to Python datetime
         :param no_conversion: None or list of int, timestamps not to be converted
             (for example total editing time is not a real timestamp)
+
         :returns: a dictionary of values indexed by id (integer)
         """
+        #REFERENCE: [MS-OLEPS] https://msdn.microsoft.com/en-us/library/dd942421.aspx
         # make sure no_conversion is a list, just to simplify code below:
-        if no_conversion is None:
+        if no_conversion == None:
             no_conversion = []
         # stream path as a string to report exceptions:
         streampath = filename
@@ -1808,11 +2092,11 @@ class OleFileIO:
         try:
             # header
             s = fp.read(28)
-            # clsid = _clsid(s[8:24])
+            clsid = _clsid(s[8:24])
 
             # format id
             s = fp.read(20)
-            # fmtid = _clsid(s[:16])
+            fmtid = _clsid(s[:16])
             fp.seek(i32(s, 16))
 
             # get section
@@ -1830,34 +2114,34 @@ class OleFileIO:
 
         for i in range(num_props):
             try:
-                id = 0  # just in case of an exception
+                id = 0 # just in case of an exception
                 id = i32(s, 8+i*8)
                 offset = i32(s, 12+i*8)
                 type = i32(s, offset)
 
-                debug('property id=%d: type=%d offset=%X' % (id, type, offset))
+                debug ('property id=%d: type=%d offset=%X' % (id, type, offset))
 
                 # test for common types first (should perhaps use
                 # a dictionary instead?)
 
-                if type == VT_I2:  # 16-bit signed integer
+                if type == VT_I2: # 16-bit signed integer
                     value = i16(s, offset+4)
                     if value >= 32768:
                         value = value - 65536
-                elif type == VT_UI2:  # 2-byte unsigned integer
+                elif type == VT_UI2: # 2-byte unsigned integer
                     value = i16(s, offset+4)
                 elif type in (VT_I4, VT_INT, VT_ERROR):
                     # VT_I4: 32-bit signed integer
                     # VT_ERROR: HRESULT, similar to 32-bit signed integer,
                     # see http://msdn.microsoft.com/en-us/library/cc230330.aspx
                     value = i32(s, offset+4)
-                elif type in (VT_UI4, VT_UINT):  # 4-byte unsigned integer
-                    value = i32(s, offset+4)  # FIXME
+                elif type in (VT_UI4, VT_UINT): # 4-byte unsigned integer
+                    value = i32(s, offset+4) # FIXME
                 elif type in (VT_BSTR, VT_LPSTR):
                     # CodePageString, see http://msdn.microsoft.com/en-us/library/dd942354.aspx
                     # size is a 32 bits integer, including the null terminator, and
                     # possibly trailing or embedded null chars
-                    # TODO: if codepage is unicode, the string should be converted as such
+                    #TODO: if codepage is unicode, the string should be converted as such
                     count = i32(s, offset+4)
                     value = s[offset+8:offset+8+count-1]
                     # remove all null chars:
@@ -1873,9 +2157,9 @@ class OleFileIO:
                     # "the string should NOT contain embedded or additional trailing
                     # null characters."
                     count = i32(s, offset+4)
-                    value = _unicode(s[offset+8:offset+8+count*2])
+                    value = self._decode_utf16_str(s[offset+8:offset+8+count*2])
                 elif type == VT_FILETIME:
-                    value = long(i32(s, offset+4)) + (long(i32(s, offset+8)) << 32)
+                    value = long(i32(s, offset+4)) + (long(i32(s, offset+8))<<32)
                     # FILETIME is a 64-bit int: "number of 100ns periods
                     # since Jan 1,1601".
                     if convert_time and id not in no_conversion:
@@ -1889,8 +2173,8 @@ class OleFileIO:
                     else:
                         # legacy code kept for backward compatibility: returns a
                         # number of seconds since Jan 1,1601
-                        value = value // 10000000  # seconds
-                elif type == VT_UI1:  # 1-byte unsigned integer
+                        value = value // 10000000 # seconds
+                elif type == VT_UI1: # 1-byte unsigned integer
                     value = i8(s[offset+4])
                 elif type == VT_CLSID:
                     value = _clsid(s[offset+4:offset+20])
@@ -1904,8 +2188,8 @@ class OleFileIO:
                     # see http://msdn.microsoft.com/en-us/library/cc237864.aspx
                     value = bool(i16(s, offset+4))
                 else:
-                    value = None  # everything else yields "None"
-                    debug('property id=%d: type=%d not implemented in parser yet' % (id, type))
+                    value = None # everything else yields "None"
+                    debug ('property id=%d: type=%d not implemented in parser yet' % (id, type))
 
                 # missing: VT_EMPTY, VT_NULL, VT_R4, VT_R8, VT_CY, VT_DATE,
                 # VT_DECIMAL, VT_I1, VT_I8, VT_UI8,
@@ -1917,8 +2201,8 @@ class OleFileIO:
                 # type of items, e.g. VT_VECTOR|VT_BSTR
                 # see http://msdn.microsoft.com/en-us/library/dd942011.aspx
 
-                # print("%08x" % id, repr(value), end=" ")
-                # print("(%s)" % VT[i32(s, offset) & 0xFFF])
+                #print("%08x" % id, repr(value), end=" ")
+                #print("(%s)" % VT[i32(s, offset) & 0xFFF])
 
                 data[id] = value
             except BaseException as exc:
@@ -1949,105 +2233,112 @@ class OleFileIO:
 
 if __name__ == "__main__":
 
+    import sys
+
     # [PL] display quick usage info if launched from command-line
     if len(sys.argv) <= 1:
-        print(__doc__)
-        print("""
-Launched from command line, this script parses OLE files and prints info.
+        print('olefile version %s %s - %s' % (__version__, __date__, __author__))
+        print(
+"""
+Launched from the command line, this script parses OLE files and prints info.
 
-Usage: OleFileIO_PL.py [-d] [-c] <file> [file2 ...]
+Usage: olefile.py [-d] [-c] <file> [file2 ...]
 
 Options:
--d : debug mode (display a lot of debug information, for developers only)
+-d : debug mode (displays a lot of debug information, for developers only)
 -c : check all streams (for debugging purposes)
+
+For more information, see http://www.decalage.info/olefile
 """)
         sys.exit()
 
     check_streams = False
     for filename in sys.argv[1:]:
-        # try:
-        # OPTIONS:
-        if filename == '-d':
-            # option to switch debug mode on:
-            set_debug_mode(True)
-            continue
-        if filename == '-c':
-            # option to switch check streams mode on:
-            check_streams = True
-            continue
+##      try:
+            # OPTIONS:
+            if filename == '-d':
+                # option to switch debug mode on:
+                set_debug_mode(True)
+                continue
+            if filename == '-c':
+                # option to switch check streams mode on:
+                check_streams = True
+                continue
 
-        ole = OleFileIO(filename)  #, raise_defects=DEFECT_INCORRECT)
-        print("-" * 68)
-        print(filename)
-        print("-" * 68)
-        ole.dumpdirectory()
-        for streamname in ole.listdir():
-            if streamname[-1][0] == "\005":
-                print(streamname, ": properties")
-                props = ole.getproperties(streamname, convert_time=True)
-                props = sorted(props.items())
-                for k, v in props:
-                    #[PL]: avoid to display too large or binary values:
-                    if isinstance(v, (basestring, bytes)):
-                        if len(v) > 50:
-                            v = v[:50]
-                    if isinstance(v, bytes):
-                        # quick and dirty binary check:
-                        for c in (1, 2, 3, 4, 5, 6, 7, 11, 12, 14, 15, 16, 17, 18, 19, 20,
-                            21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31):
-                            if c in bytearray(v):
-                                v = '(binary data)'
-                                break
-                    print("   ", k, v)
-
-        if check_streams:
-            # Read all streams to check if there are errors:
-            print('\nChecking streams...')
+            ole = OleFileIO(filename)#, raise_defects=DEFECT_INCORRECT)
+            print("-" * 68)
+            print(filename)
+            print("-" * 68)
+            ole.dumpdirectory()
             for streamname in ole.listdir():
-                # print name using repr() to convert binary chars to \xNN:
-                print('-', repr('/'.join(streamname)), '-', end=' ')
-                st_type = ole.get_type(streamname)
-                if st_type == STGTY_STREAM:
-                    print('size %d' % ole.get_size(streamname))
-                    # just try to read stream in memory:
-                    ole.openstream(streamname)
-                else:
-                    print('NOT a stream : type=%d' % st_type)
+                if streamname[-1][0] == "\005":
+                    print(streamname, ": properties")
+                    props = ole.getproperties(streamname, convert_time=True)
+                    props = sorted(props.items())
+                    for k, v in props:
+                        #[PL]: avoid to display too large or binary values:
+                        if isinstance(v, (basestring, bytes)):
+                            if len(v) > 50:
+                                v = v[:50]
+                        if isinstance(v, bytes):
+                            # quick and dirty binary check:
+                            for c in (1,2,3,4,5,6,7,11,12,14,15,16,17,18,19,20,
+                                21,22,23,24,25,26,27,28,29,30,31):
+                                if c in bytearray(v):
+                                    v = '(binary data)'
+                                    break
+                        print("   ", k, v)
+
+            if check_streams:
+                # Read all streams to check if there are errors:
+                print('\nChecking streams...')
+                for streamname in ole.listdir():
+                    # print name using repr() to convert binary chars to \xNN:
+                    print('-', repr('/'.join(streamname)),'-', end=' ')
+                    st_type = ole.get_type(streamname)
+                    if st_type == STGTY_STREAM:
+                        print('size %d' % ole.get_size(streamname))
+                        # just try to read stream in memory:
+                        ole.openstream(streamname)
+                    else:
+                        print('NOT a stream : type=%d' % st_type)
+                print()
+
+##            for streamname in ole.listdir():
+##                # print name using repr() to convert binary chars to \xNN:
+##                print('-', repr('/'.join(streamname)),'-', end=' ')
+##                print(ole.getmtime(streamname))
+##            print()
+
+            print('Modification/Creation times of all directory entries:')
+            for entry in ole.direntries:
+                if entry is not None:
+                    print('- %s: mtime=%s ctime=%s' % (entry.name,
+                        entry.getmtime(), entry.getctime()))
             print()
 
-##        for streamname in ole.listdir():
-##            # print name using repr() to convert binary chars to \xNN:
-##            print('-', repr('/'.join(streamname)),'-', end=' ')
-##            print(ole.getmtime(streamname))
-##        print()
+            # parse and display metadata:
+            meta = ole.get_metadata()
+            meta.dump()
+            print()
+            #[PL] Test a few new methods:
+            root = ole.get_rootentry_name()
+            print('Root entry name: "%s"' % root)
+            if ole.exists('worddocument'):
+                print("This is a Word document.")
+                print("type of stream 'WordDocument':", ole.get_type('worddocument'))
+                print("size :", ole.get_size('worddocument'))
+                if ole.exists('macros/vba'):
+                    print("This document may contain VBA macros.")
 
-        print('Modification/Creation times of all directory entries:')
-        for entry in ole.direntries:
-            if entry is not None:
-                print('- %s: mtime=%s ctime=%s' % (entry.name,
-                    entry.getmtime(), entry.getctime()))
-        print()
-
-        # parse and display metadata:
-        meta = ole.get_metadata()
-        meta.dump()
-        print()
-        # [PL] Test a few new methods:
-        root = ole.get_rootentry_name()
-        print('Root entry name: "%s"' % root)
-        if ole.exists('worddocument'):
-            print("This is a Word document.")
-            print("type of stream 'WordDocument':", ole.get_type('worddocument'))
-            print("size :", ole.get_size('worddocument'))
-            if ole.exists('macros/vba'):
-                print("This document may contain VBA macros.")
-
-        # print parsing issues:
-        print('\nNon-fatal issues raised during parsing:')
-        if ole.parsing_issues:
-            for exctype, msg in ole.parsing_issues:
-                print('- %s: %s' % (exctype.__name__, msg))
-        else:
-            print('None')
+            # print parsing issues:
+            print('\nNon-fatal issues raised during parsing:')
+            if ole.parsing_issues:
+                for exctype, msg in ole.parsing_issues:
+                    print('- %s: %s' % (exctype.__name__, msg))
+            else:
+                print('None')
 ##      except IOError as v:
 ##          print("***", "cannot read", file, "-", v)
+
+# this code was developed while listening to The Wedding Present "Sea Monsters"
\ No newline at end of file