mirror of
				https://github.com/python-pillow/Pillow.git
				synced 2025-10-26 13:41:08 +03:00 
			
		
		
		
	- new class OleMetadata to parse standard properties
- added get_metadata method to OleFileIO
This commit is contained in:
		
							parent
							
								
									25158fe8b1
								
							
						
					
					
						commit
						d5166fd97e
					
				
							
								
								
									
										123
									
								
								PIL/OleFileIO.py
									
									
									
									
									
								
							
							
						
						
									
										123
									
								
								PIL/OleFileIO.py
									
									
									
									
									
								
							|  | @ -6,7 +6,7 @@ OleFileIO_PL: | ||||||
|     Microsoft Compound Document File Format), such as Microsoft Office |     Microsoft Compound Document File Format), such as Microsoft Office | ||||||
|     documents, Image Composer and FlashPix files, Outlook messages, ... |     documents, Image Composer and FlashPix files, Outlook messages, ... | ||||||
| 
 | 
 | ||||||
| version 0.24 2013-05-03 Philippe Lagadec - http://www.decalage.info | version 0.24 2013-05-05 Philippe Lagadec - http://www.decalage.info | ||||||
| 
 | 
 | ||||||
| Project website: http://www.decalage.info/python/olefileio | Project website: http://www.decalage.info/python/olefileio | ||||||
| 
 | 
 | ||||||
|  | @ -24,7 +24,7 @@ WARNING: THIS IS (STILL) WORK IN PROGRESS. | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| __author__  = "Philippe Lagadec, Fredrik Lundh (Secret Labs AB)" | __author__  = "Philippe Lagadec, Fredrik Lundh (Secret Labs AB)" | ||||||
| __date__    = "2013-05-03" | __date__    = "2013-05-05" | ||||||
| __version__ = '0.24' | __version__ = '0.24' | ||||||
| 
 | 
 | ||||||
| #--- LICENSE ------------------------------------------------------------------ | #--- LICENSE ------------------------------------------------------------------ | ||||||
|  | @ -110,9 +110,11 @@ __version__ = '0.24' | ||||||
| #                        (https://bitbucket.org/decalage/olefileio_pl/issue/7) | #                        (https://bitbucket.org/decalage/olefileio_pl/issue/7) | ||||||
| #                      - added close method to OleFileIO (fixed issue #2) | #                      - added close method to OleFileIO (fixed issue #2) | ||||||
| # 2012-07-25 v0.23 PL: - added support for file-like objects (patch by mete0r_kr) | # 2012-07-25 v0.23 PL: - added support for file-like objects (patch by mete0r_kr) | ||||||
| # 2013-05-03 v0.24 PL: - getproperties: added conversion from filetime to python | # 2013-05-05 v0.24 PL: - getproperties: added conversion from filetime to python | ||||||
| #                        datetime | #                        datetime | ||||||
| #                      - main: displays properties with date format | #                      - main: displays properties with date format | ||||||
|  | #                      - new class OleMetadata to parse standard properties | ||||||
|  | #                      - added get_metadata method | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| #----------------------------------------------------------------------------- | #----------------------------------------------------------------------------- | ||||||
|  | @ -428,6 +430,107 @@ except NameError: | ||||||
| 
 | 
 | ||||||
| #=== CLASSES ================================================================== | #=== CLASSES ================================================================== | ||||||
| 
 | 
 | ||||||
|  | class OleMetadata: | ||||||
|  |     """ | ||||||
|  |     class to parse and store metadata from standard properties of OLE files. | ||||||
|  | 
 | ||||||
|  |     References for SummaryInformation stream: | ||||||
|  |     - http://msdn.microsoft.com/en-us/library/dd942545.aspx | ||||||
|  |     - http://msdn.microsoft.com/en-us/library/dd925819%28v=office.12%29.aspx | ||||||
|  |     - http://msdn.microsoft.com/en-us/library/windows/desktop/aa380376%28v=vs.85%29.aspx | ||||||
|  |     - http://msdn.microsoft.com/en-us/library/aa372045.aspx | ||||||
|  |     - http://sedna-soft.de/summary-information-stream/ | ||||||
|  |     - http://poi.apache.org/apidocs/org/apache/poi/hpsf/SummaryInformation.html | ||||||
|  | 
 | ||||||
|  |     References for DocumentSummaryInformation stream: | ||||||
|  |     - http://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx | ||||||
|  |     - http://msdn.microsoft.com/en-us/library/windows/desktop/aa380374%28v=vs.85%29.aspx | ||||||
|  |     - http://poi.apache.org/apidocs/org/apache/poi/hpsf/DocumentSummaryInformation.html | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     # attribute names for SummaryInformation stream properties: | ||||||
|  |     SUMMARY_ATTRIBS = ['codepage', 'title', 'subject', 'author', 'keywords', 'comments', | ||||||
|  |         'template', 'last_saved_by', 'revision_number', 'total_edit_time', | ||||||
|  |         'last_printed', 'create_time', 'last_saved_time', 'num_pages', | ||||||
|  |         'num_words', 'num_chars', 'thumbnail', 'creating_application', | ||||||
|  |         'security'] | ||||||
|  | 
 | ||||||
|  |     # attribute names for DocumentSummaryInformation stream properties: | ||||||
|  |     DOCSUM_ATTRIBS = ['codepage_doc', 'category', 'presentation_target', 'bytes', 'lines', 'paragraphs', | ||||||
|  |         'slides', 'notes', 'hidden_slides', 'mm_clips', | ||||||
|  |         'scale_crop', 'heading_pairs', 'titles_of_parts', 'manager', | ||||||
|  |         'company', 'links_dirty', 'chars_with_spaces', 'unused', 'shared_doc', | ||||||
|  |         'link_base', 'hlinks', 'hlinks_changed', 'version', 'dig_sig', | ||||||
|  |         'content_type', 'content_status', 'language', 'doc_version'] | ||||||
|  | 
 | ||||||
|  |     def __init__(self): | ||||||
|  |         self.codepage = None | ||||||
|  |         self.title = None | ||||||
|  |         self.subject = None | ||||||
|  |         self.author = None | ||||||
|  |         self.keywords = None | ||||||
|  |         self.comments = None | ||||||
|  |         self.template = None | ||||||
|  |         self.last_saved_by = None | ||||||
|  |         self.revision_number = None | ||||||
|  |         self.total_edit_time = None | ||||||
|  |         self.last_printed = None | ||||||
|  |         self.create_time = None | ||||||
|  |         self.last_saved_time = None | ||||||
|  |         self.num_pages = None | ||||||
|  |         self.num_words = None | ||||||
|  |         self.num_chars = None | ||||||
|  |         self.thumbnail = None | ||||||
|  |         self.creating_application = None | ||||||
|  |         self.security = None | ||||||
|  | ##        self. = None | ||||||
|  | ##        self. = None | ||||||
|  | ##        self. = None | ||||||
|  | ##        self. = None | ||||||
|  | ##        self. = None | ||||||
|  | ##        self. = None | ||||||
|  | ##        self. = None | ||||||
|  | ##        self. = None | ||||||
|  | ##        self. = None | ||||||
|  | ##        self. = None | ||||||
|  | ##        self. = None | ||||||
|  | ##        self. = None | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |     def parse_properties(self, olefile): | ||||||
|  |         """ | ||||||
|  |         Parse standard properties of an OLE file | ||||||
|  |         """ | ||||||
|  |         if olefile.exists("\x05SummaryInformation"): | ||||||
|  |             # get properties from the stream: | ||||||
|  |             props = olefile.getproperties("\x05SummaryInformation", | ||||||
|  |                 convert_time=True) | ||||||
|  |             # store them into this object's attributes: | ||||||
|  |             for i in range(len(self.SUMMARY_ATTRIBS)): | ||||||
|  |                 # ids for standards properties start at 0x01, until 0x13 | ||||||
|  |                 value = props.get(i+1, None) | ||||||
|  |                 setattr(self, self.SUMMARY_ATTRIBS[i], value) | ||||||
|  |         if olefile.exists("\x05DocumentSummaryInformation"): | ||||||
|  |             # get properties from the stream: | ||||||
|  |             props = olefile.getproperties("\x05DocumentSummaryInformation", | ||||||
|  |                 convert_time=True) | ||||||
|  |             # store them into this object's attributes: | ||||||
|  |             for i in range(len(self.DOCSUM_ATTRIBS)): | ||||||
|  |                 # ids for standards properties start at 0x01, until 0x13 | ||||||
|  |                 value = props.get(i+1, None) | ||||||
|  |                 setattr(self, self.DOCSUM_ATTRIBS[i], value) | ||||||
|  | 
 | ||||||
|  |     def dump(self): | ||||||
|  |         print 'Properties from SummaryInformation stream:' | ||||||
|  |         for prop in self.SUMMARY_ATTRIBS: | ||||||
|  |             value = getattr(self, prop) | ||||||
|  |             print '- %s: %s' % (prop, value) | ||||||
|  |         print 'Properties from DocumentSummaryInformation stream:' | ||||||
|  |         for prop in self.DOCSUM_ATTRIBS: | ||||||
|  |             value = getattr(self, prop) | ||||||
|  |             print '- %s: %s' % (prop, value) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| #--- _OleStream --------------------------------------------------------------- | #--- _OleStream --------------------------------------------------------------- | ||||||
| 
 | 
 | ||||||
| class _OleStream(StringIO.StringIO): | class _OleStream(StringIO.StringIO): | ||||||
|  | @ -1598,6 +1701,16 @@ class OleFileIO: | ||||||
| 
 | 
 | ||||||
|         return data |         return data | ||||||
| 
 | 
 | ||||||
|  |     def get_metadata(self): | ||||||
|  |         """ | ||||||
|  |         Parse standard properties streams, return an OleMetadata object | ||||||
|  |         containing all the available metadata. | ||||||
|  |         (also stored in the metadata attribute of the OleFileIO object) | ||||||
|  |         """ | ||||||
|  |         self.metadata = OleMetadata() | ||||||
|  |         self.metadata.parse_properties(self) | ||||||
|  |         return self.metadata | ||||||
|  | 
 | ||||||
| # | # | ||||||
| # -------------------------------------------------------------------- | # -------------------------------------------------------------------- | ||||||
| # This script can be used to dump the directory of any OLE2 structured | # This script can be used to dump the directory of any OLE2 structured | ||||||
|  | @ -1673,6 +1786,10 @@ Options: | ||||||
|                         print 'NOT a stream : type=%d' % st_type |                         print 'NOT a stream : type=%d' % st_type | ||||||
|                 print '' |                 print '' | ||||||
| 
 | 
 | ||||||
|  |             # parse and display metadata: | ||||||
|  |             meta = ole.get_metadata() | ||||||
|  |             meta.dump() | ||||||
|  |             print '' | ||||||
|             #[PL] Test a few new methods: |             #[PL] Test a few new methods: | ||||||
|             root = ole.get_rootentry_name() |             root = ole.get_rootentry_name() | ||||||
|             print 'Root entry name: "%s"' % root |             print 'Root entry name: "%s"' % root | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user