mirror of
https://github.com/python-pillow/Pillow.git
synced 2025-01-27 09:44:31 +03:00
- new class OleMetadata to parse standard properties
- added get_metadata method to OleFileIO
This commit is contained in:
parent
25158fe8b1
commit
d5166fd97e
123
PIL/OleFileIO.py
123
PIL/OleFileIO.py
|
@ -6,7 +6,7 @@ OleFileIO_PL:
|
||||||
Microsoft Compound Document File Format), such as Microsoft Office
|
Microsoft Compound Document File Format), such as Microsoft Office
|
||||||
documents, Image Composer and FlashPix files, Outlook messages, ...
|
documents, Image Composer and FlashPix files, Outlook messages, ...
|
||||||
|
|
||||||
version 0.24 2013-05-03 Philippe Lagadec - http://www.decalage.info
|
version 0.24 2013-05-05 Philippe Lagadec - http://www.decalage.info
|
||||||
|
|
||||||
Project website: http://www.decalage.info/python/olefileio
|
Project website: http://www.decalage.info/python/olefileio
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ WARNING: THIS IS (STILL) WORK IN PROGRESS.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__author__ = "Philippe Lagadec, Fredrik Lundh (Secret Labs AB)"
|
__author__ = "Philippe Lagadec, Fredrik Lundh (Secret Labs AB)"
|
||||||
__date__ = "2013-05-03"
|
__date__ = "2013-05-05"
|
||||||
__version__ = '0.24'
|
__version__ = '0.24'
|
||||||
|
|
||||||
#--- LICENSE ------------------------------------------------------------------
|
#--- LICENSE ------------------------------------------------------------------
|
||||||
|
@ -110,9 +110,11 @@ __version__ = '0.24'
|
||||||
# (https://bitbucket.org/decalage/olefileio_pl/issue/7)
|
# (https://bitbucket.org/decalage/olefileio_pl/issue/7)
|
||||||
# - added close method to OleFileIO (fixed issue #2)
|
# - added close method to OleFileIO (fixed issue #2)
|
||||||
# 2012-07-25 v0.23 PL: - added support for file-like objects (patch by mete0r_kr)
|
# 2012-07-25 v0.23 PL: - added support for file-like objects (patch by mete0r_kr)
|
||||||
# 2013-05-03 v0.24 PL: - getproperties: added conversion from filetime to python
|
# 2013-05-05 v0.24 PL: - getproperties: added conversion from filetime to python
|
||||||
# datetime
|
# datetime
|
||||||
# - main: displays properties with date format
|
# - main: displays properties with date format
|
||||||
|
# - new class OleMetadata to parse standard properties
|
||||||
|
# - added get_metadata method
|
||||||
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
#-----------------------------------------------------------------------------
|
||||||
|
@ -428,6 +430,107 @@ except NameError:
|
||||||
|
|
||||||
#=== CLASSES ==================================================================
|
#=== CLASSES ==================================================================
|
||||||
|
|
||||||
|
class OleMetadata:
|
||||||
|
"""
|
||||||
|
class to parse and store metadata from standard properties of OLE files.
|
||||||
|
|
||||||
|
References for SummaryInformation stream:
|
||||||
|
- http://msdn.microsoft.com/en-us/library/dd942545.aspx
|
||||||
|
- http://msdn.microsoft.com/en-us/library/dd925819%28v=office.12%29.aspx
|
||||||
|
- http://msdn.microsoft.com/en-us/library/windows/desktop/aa380376%28v=vs.85%29.aspx
|
||||||
|
- http://msdn.microsoft.com/en-us/library/aa372045.aspx
|
||||||
|
- http://sedna-soft.de/summary-information-stream/
|
||||||
|
- http://poi.apache.org/apidocs/org/apache/poi/hpsf/SummaryInformation.html
|
||||||
|
|
||||||
|
References for DocumentSummaryInformation stream:
|
||||||
|
- http://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx
|
||||||
|
- http://msdn.microsoft.com/en-us/library/windows/desktop/aa380374%28v=vs.85%29.aspx
|
||||||
|
- http://poi.apache.org/apidocs/org/apache/poi/hpsf/DocumentSummaryInformation.html
|
||||||
|
"""
|
||||||
|
|
||||||
|
# attribute names for SummaryInformation stream properties:
|
||||||
|
SUMMARY_ATTRIBS = ['codepage', 'title', 'subject', 'author', 'keywords', 'comments',
|
||||||
|
'template', 'last_saved_by', 'revision_number', 'total_edit_time',
|
||||||
|
'last_printed', 'create_time', 'last_saved_time', 'num_pages',
|
||||||
|
'num_words', 'num_chars', 'thumbnail', 'creating_application',
|
||||||
|
'security']
|
||||||
|
|
||||||
|
# attribute names for DocumentSummaryInformation stream properties:
|
||||||
|
DOCSUM_ATTRIBS = ['codepage_doc', 'category', 'presentation_target', 'bytes', 'lines', 'paragraphs',
|
||||||
|
'slides', 'notes', 'hidden_slides', 'mm_clips',
|
||||||
|
'scale_crop', 'heading_pairs', 'titles_of_parts', 'manager',
|
||||||
|
'company', 'links_dirty', 'chars_with_spaces', 'unused', 'shared_doc',
|
||||||
|
'link_base', 'hlinks', 'hlinks_changed', 'version', 'dig_sig',
|
||||||
|
'content_type', 'content_status', 'language', 'doc_version']
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.codepage = None
|
||||||
|
self.title = None
|
||||||
|
self.subject = None
|
||||||
|
self.author = None
|
||||||
|
self.keywords = None
|
||||||
|
self.comments = None
|
||||||
|
self.template = None
|
||||||
|
self.last_saved_by = None
|
||||||
|
self.revision_number = None
|
||||||
|
self.total_edit_time = None
|
||||||
|
self.last_printed = None
|
||||||
|
self.create_time = None
|
||||||
|
self.last_saved_time = None
|
||||||
|
self.num_pages = None
|
||||||
|
self.num_words = None
|
||||||
|
self.num_chars = None
|
||||||
|
self.thumbnail = None
|
||||||
|
self.creating_application = None
|
||||||
|
self.security = None
|
||||||
|
## self. = None
|
||||||
|
## self. = None
|
||||||
|
## self. = None
|
||||||
|
## self. = None
|
||||||
|
## self. = None
|
||||||
|
## self. = None
|
||||||
|
## self. = None
|
||||||
|
## self. = None
|
||||||
|
## self. = None
|
||||||
|
## self. = None
|
||||||
|
## self. = None
|
||||||
|
## self. = None
|
||||||
|
|
||||||
|
|
||||||
|
def parse_properties(self, olefile):
|
||||||
|
"""
|
||||||
|
Parse standard properties of an OLE file
|
||||||
|
"""
|
||||||
|
if olefile.exists("\x05SummaryInformation"):
|
||||||
|
# get properties from the stream:
|
||||||
|
props = olefile.getproperties("\x05SummaryInformation",
|
||||||
|
convert_time=True)
|
||||||
|
# store them into this object's attributes:
|
||||||
|
for i in range(len(self.SUMMARY_ATTRIBS)):
|
||||||
|
# ids for standards properties start at 0x01, until 0x13
|
||||||
|
value = props.get(i+1, None)
|
||||||
|
setattr(self, self.SUMMARY_ATTRIBS[i], value)
|
||||||
|
if olefile.exists("\x05DocumentSummaryInformation"):
|
||||||
|
# get properties from the stream:
|
||||||
|
props = olefile.getproperties("\x05DocumentSummaryInformation",
|
||||||
|
convert_time=True)
|
||||||
|
# store them into this object's attributes:
|
||||||
|
for i in range(len(self.DOCSUM_ATTRIBS)):
|
||||||
|
# ids for standards properties start at 0x01, until 0x13
|
||||||
|
value = props.get(i+1, None)
|
||||||
|
setattr(self, self.DOCSUM_ATTRIBS[i], value)
|
||||||
|
|
||||||
|
def dump(self):
|
||||||
|
print 'Properties from SummaryInformation stream:'
|
||||||
|
for prop in self.SUMMARY_ATTRIBS:
|
||||||
|
value = getattr(self, prop)
|
||||||
|
print '- %s: %s' % (prop, value)
|
||||||
|
print 'Properties from DocumentSummaryInformation stream:'
|
||||||
|
for prop in self.DOCSUM_ATTRIBS:
|
||||||
|
value = getattr(self, prop)
|
||||||
|
print '- %s: %s' % (prop, value)
|
||||||
|
|
||||||
|
|
||||||
#--- _OleStream ---------------------------------------------------------------
|
#--- _OleStream ---------------------------------------------------------------
|
||||||
|
|
||||||
class _OleStream(StringIO.StringIO):
|
class _OleStream(StringIO.StringIO):
|
||||||
|
@ -1598,6 +1701,16 @@ class OleFileIO:
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
def get_metadata(self):
|
||||||
|
"""
|
||||||
|
Parse standard properties streams, return an OleMetadata object
|
||||||
|
containing all the available metadata.
|
||||||
|
(also stored in the metadata attribute of the OleFileIO object)
|
||||||
|
"""
|
||||||
|
self.metadata = OleMetadata()
|
||||||
|
self.metadata.parse_properties(self)
|
||||||
|
return self.metadata
|
||||||
|
|
||||||
#
|
#
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
# This script can be used to dump the directory of any OLE2 structured
|
# This script can be used to dump the directory of any OLE2 structured
|
||||||
|
@ -1673,6 +1786,10 @@ Options:
|
||||||
print 'NOT a stream : type=%d' % st_type
|
print 'NOT a stream : type=%d' % st_type
|
||||||
print ''
|
print ''
|
||||||
|
|
||||||
|
# parse and display metadata:
|
||||||
|
meta = ole.get_metadata()
|
||||||
|
meta.dump()
|
||||||
|
print ''
|
||||||
#[PL] Test a few new methods:
|
#[PL] Test a few new methods:
|
||||||
root = ole.get_rootentry_name()
|
root = ole.get_rootentry_name()
|
||||||
print 'Root entry name: "%s"' % root
|
print 'Root entry name: "%s"' % root
|
||||||
|
|
Loading…
Reference in New Issue
Block a user