From fc57658635cae5cb40c72025a950b6287450b8b8 Mon Sep 17 00:00:00 2001
From: Andrew Murray <radarhere@users.noreply.github.com>
Date: Sun, 29 Jul 2018 22:33:59 +1000
Subject: [PATCH] Added PDF creation and modification date info

---
 Tests/test_file_pdf.py               | 25 ++++++++++++++----
 Tests/test_pdfparser.py              | 14 ++++++++++
 docs/handbook/image-file-formats.rst | 15 ++++++++++-
 src/PIL/PdfImagePlugin.py            | 39 ++++++++++++++--------------
 src/PIL/PdfParser.py                 | 27 ++++++++++++++++---
 5 files changed, 91 insertions(+), 29 deletions(-)

diff --git a/Tests/test_file_pdf.py b/Tests/test_file_pdf.py
index f012fb9d8..57f1c2118 100644
--- a/Tests/test_file_pdf.py
+++ b/Tests/test_file_pdf.py
@@ -4,6 +4,7 @@ import io
 import os
 import os.path
 import tempfile
+import time
 
 
 class TestFilePdf(PillowTestCase):
@@ -187,8 +188,13 @@ class TestFilePdf(PillowTestCase):
         # open it, check pages and info
         with PdfParser.PdfParser(pdf_filename, mode="r+b") as pdf:
             self.assertEqual(len(pdf.pages), 1)
-            self.assertEqual(len(pdf.info), 1)
+            self.assertEqual(len(pdf.info), 4)
+            self.assertEqual(pdf.info.Title, os.path.splitext(
+                                                os.path.basename(pdf_filename)
+                                             )[0])
             self.assertEqual(pdf.info.Producer, "PdfParser")
+            self.assertIn(b"CreationDate", pdf.info)
+            self.assertIn(b"ModDate", pdf.info)
             self.check_pdf_pages_consistency(pdf)
 
             # append some info
@@ -203,8 +209,10 @@ class TestFilePdf(PillowTestCase):
         # open it again, check pages and info again
         with PdfParser.PdfParser(pdf_filename) as pdf:
             self.assertEqual(len(pdf.pages), 1)
-            self.assertEqual(len(pdf.info), 6)
+            self.assertEqual(len(pdf.info), 8)
             self.assertEqual(pdf.info.Title, "abc")
+            self.assertIn(b"CreationDate", pdf.info)
+            self.assertIn(b"ModDate", pdf.info)
             self.check_pdf_pages_consistency(pdf)
 
         # append two images
@@ -216,29 +224,36 @@ class TestFilePdf(PillowTestCase):
         # open the PDF again, check pages and info again
         with PdfParser.PdfParser(pdf_filename) as pdf:
             self.assertEqual(len(pdf.pages), 3)
-            self.assertEqual(len(pdf.info), 6)
+            self.assertEqual(len(pdf.info), 8)
             self.assertEqual(PdfParser.decode_text(pdf.info[b"Title"]), "abc")
             self.assertEqual(pdf.info.Title, "abc")
             self.assertEqual(pdf.info.Producer, "PdfParser")
             self.assertEqual(pdf.info.Keywords, "qw)e\\r(ty")
             self.assertEqual(pdf.info.Subject, u"ghi\uABCD")
+            self.assertIn(b"CreationDate", pdf.info)
+            self.assertIn(b"ModDate", pdf.info)
             self.check_pdf_pages_consistency(pdf)
 
     def test_pdf_info(self):
         # make a PDF file
         pdf_filename = self.helper_save_as_pdf(
             "RGB", title="title", author="author", subject="subject",
-            keywords="keywords", creator="creator", producer="producer")
+            keywords="keywords", creator="creator", producer="producer",
+            creationDate=time.strptime("2000", "%Y"),
+            modDate=time.strptime("2001", "%Y"))
 
         # open it, check pages and info
         with PdfParser.PdfParser(pdf_filename) as pdf:
-            self.assertEqual(len(pdf.info), 6)
+            self.assertEqual(len(pdf.info), 8)
             self.assertEqual(pdf.info.Title, "title")
             self.assertEqual(pdf.info.Author, "author")
             self.assertEqual(pdf.info.Subject, "subject")
             self.assertEqual(pdf.info.Keywords, "keywords")
             self.assertEqual(pdf.info.Creator, "creator")
             self.assertEqual(pdf.info.Producer, "producer")
+            self.assertEqual(pdf.info.CreationDate,
+                             time.strptime("2000", "%Y"))
+            self.assertEqual(pdf.info.ModDate, time.strptime("2001", "%Y"))
             self.check_pdf_pages_consistency(pdf)
 
     def test_pdf_append_to_bytesio(self):
diff --git a/Tests/test_pdfparser.py b/Tests/test_pdfparser.py
index 42c813520..660405635 100644
--- a/Tests/test_pdfparser.py
+++ b/Tests/test_pdfparser.py
@@ -3,6 +3,7 @@ from helper import unittest, PillowTestCase
 from PIL.PdfParser import IndirectObjectDef, IndirectReference, PdfBinary, \
                           PdfDict, PdfFormatError, PdfName, PdfParser, \
                           PdfStream, decode_text, encode_text, pdf_repr
+import time
 
 
 class TestPdfParser(PillowTestCase):
@@ -76,6 +77,19 @@ class TestPdfParser(PillowTestCase):
         self.assertIsInstance(s, PdfStream)
         self.assertEqual(s.dictionary.Name, "value")
         self.assertEqual(s.decode(), b"abcde")
+        for name in ["CreationDate", "ModDate"]:
+            for date, value in {
+                b"20180729214124": "20180729214124",
+                b"D:20180729214124": "20180729214124",
+                b"D:2018072921": "20180729210000",
+                b"D:20180729214124Z": "20180729214124",
+                b"D:20180729214124+08'00'": "20180729134124",
+                b"D:20180729214124-05'00'": "20180730024124"
+            }.items():
+                d = PdfParser.get_value(
+                    b"<</"+name.encode()+b" ("+date+b")>>", 0)[0]
+                self.assertEqual(
+                    time.strftime("%Y%m%d%H%M%S", getattr(d, name)), value)
 
     def test_pdf_repr(self):
         self.assertEqual(bytes(IndirectReference(1, 2)), b"1 2 R")
diff --git a/docs/handbook/image-file-formats.rst b/docs/handbook/image-file-formats.rst
index eb50ff23d..e1138726f 100644
--- a/docs/handbook/image-file-formats.rst
+++ b/docs/handbook/image-file-formats.rst
@@ -1029,7 +1029,8 @@ The :py:meth:`~PIL.Image.Image.save` method can take the following keyword argum
     saved in the PDF.
 
 **title**
-    The document’s title.
+    The document’s title. If not appending to an existing PDF file, this will
+    default to the filename.
 
     .. versionadded:: 5.1.0
 
@@ -1061,6 +1062,18 @@ The :py:meth:`~PIL.Image.Image.save` method can take the following keyword argum
 
     .. versionadded:: 5.1.0
 
+**creationDate**
+    The creation date of the document. If not appending to an existing PDF
+    file, this will default to the current time.
+
+    .. versionadded:: 5.3.0
+
+**modDate**
+    The modification date of the document. If not appending to an existing PDF
+    file, this will default to the current time.
+
+    .. versionadded:: 5.3.0
+
 XV Thumbnails
 ^^^^^^^^^^^^^
 
diff --git a/src/PIL/PdfImagePlugin.py b/src/PIL/PdfImagePlugin.py
index d90e06a72..b42502762 100644
--- a/src/PIL/PdfImagePlugin.py
+++ b/src/PIL/PdfImagePlugin.py
@@ -23,6 +23,7 @@
 from . import Image, ImageFile, ImageSequence, PdfParser
 import io
 import os
+import time
 
 __version__ = "0.5"
 
@@ -46,32 +47,30 @@ def _save_all(im, fp, filename):
 # (Internal) Image save plugin for the PDF format.
 
 def _save(im, fp, filename, save_all=False):
-    resolution = im.encoderinfo.get("resolution", 72.0)
     is_appending = im.encoderinfo.get("append", False)
-    title = None if is_appending else im.encoderinfo.get("title", os.path.splitext(filename)[0])
-    author = im.encoderinfo.get("author", None)
-    subject = im.encoderinfo.get("subject", None)
-    keywords = im.encoderinfo.get("keywords", None)
-    creator = im.encoderinfo.get("creator", None)
-    producer = im.encoderinfo.get("producer", None)
-
     if is_appending:
         existing_pdf = PdfParser.PdfParser(f=fp, filename=filename, mode="r+b")
     else:
         existing_pdf = PdfParser.PdfParser(f=fp, filename=filename, mode="w+b")
 
-    if title:
-        existing_pdf.info.Title = title
-    if author:
-        existing_pdf.info.Author = author
-    if subject:
-        existing_pdf.info.Subject = subject
-    if keywords:
-        existing_pdf.info.Keywords = keywords
-    if creator:
-        existing_pdf.info.Creator = creator
-    if producer:
-        existing_pdf.info.Producer = producer
+    resolution = im.encoderinfo.get("resolution", 72.0)
+
+    info = {
+        "title": None if is_appending else os.path.splitext(
+                                               os.path.basename(filename)
+                                           )[0],
+        "author": None,
+        "subject": None,
+        "keywords": None,
+        "creator": None,
+        "producer": None,
+        "creationDate": None if is_appending else time.gmtime(),
+        "modDate": None if is_appending else time.gmtime()
+    }
+    for k, default in info.items():
+        v = im.encoderinfo.get(k) if k in im.encoderinfo else default
+        if v:
+            existing_pdf.info[k[0].upper() + k[1:]] = v
 
     #
     # make sure image data is available
diff --git a/src/PIL/PdfParser.py b/src/PIL/PdfParser.py
index 971f44514..ad6d9f3fe 100644
--- a/src/PIL/PdfParser.py
+++ b/src/PIL/PdfParser.py
@@ -1,8 +1,10 @@
+import calendar
 import codecs
 import collections
 import mmap
 import os
 import re
+import time
 import zlib
 from ._util import py3
 
@@ -280,9 +282,26 @@ class PdfDict(UserDict):
             except KeyError:
                 raise AttributeError(key)
         if isinstance(value, bytes):
-            return decode_text(value)
-        else:
-            return value
+            value = decode_text(value)
+        if key.endswith("Date"):
+            if value.startswith("D:"):
+                value = value[2:]
+
+            relationship = 'Z'
+            if len(value) > 17:
+                relationship = value[14]
+                offset = int(value[15:17]) * 60
+                if len(value) > 20:
+                    offset += int(value[18:20])
+
+            format = '%Y%m%d%H%M%S'[:len(value) - 2]
+            value = time.strptime(value[:len(format)+2], format)
+            if relationship in ['+', '-']:
+                offset *= 60
+                if relationship == '+':
+                    offset *= -1
+                value = time.gmtime(calendar.timegm(value) + offset)
+        return value
 
     def __bytes__(self):
         out = bytearray(b"<<")
@@ -347,6 +366,8 @@ def pdf_repr(x):
         return bytes(x)
     elif isinstance(x, int):
         return str(x).encode("us-ascii")
+    elif isinstance(x, time.struct_time):
+        return b'(D:'+time.strftime('%Y%m%d%H%M%SZ', x).encode("us-ascii")+b')'
     elif isinstance(x, dict):
         return bytes(PdfDict(x))
     elif isinstance(x, list):