issue #2959: fix PdfDict attribute access, text decoding, tests

This commit is contained in:
Dvořák Václav 2018-01-26 13:42:11 +01:00
parent 4d3b13fb08
commit 53ce9ec4fb
3 changed files with 39 additions and 16 deletions

View File

@ -137,13 +137,13 @@ class TestFilePdf(PillowTestCase):
pdf = pdfParser.PdfParser(pdf_filename)
self.assertEqual(len(pdf.pages), 1)
self.assertEqual(len(pdf.info), 1)
self.assertEqual(pdfParser.decode_text(pdf.info["Producer"]), "pdfParser")
self.assertEqual(pdf.info.Producer, "pdfParser")
# append some info
pdf.info["Title"] = "abc"
pdf.info["Author"] = "def"
pdf.info["Subject"] = "ghi"
pdf.info["Keywords"] = "jkl"
pdf.info["Creator"] = "hopper()"
pdf.info.Title = "abc"
pdf.info.Author = "def"
pdf.info.Subject = u"ghi\uABCD"
pdf.info.Keywords = "qw)e\\r(ty"
pdf.info.Creator = "hopper()"
with open(pdf_filename, "r+b") as f:
f.seek(0, os.SEEK_END)
pdf.write_xref_and_trailer(f)
@ -151,7 +151,7 @@ class TestFilePdf(PillowTestCase):
pdf = pdfParser.PdfParser(pdf_filename)
self.assertEqual(len(pdf.pages), 1)
self.assertEqual(len(pdf.info), 6)
self.assertEqual(pdfParser.decode_text(pdf.info["Title"]), "abc")
self.assertEqual(pdf.info.Title, "abc")
# append two images
mode_CMYK = hopper("CMYK")
mode_P = hopper("P")
@ -160,8 +160,11 @@ class TestFilePdf(PillowTestCase):
pdf = pdfParser.PdfParser(pdf_filename)
self.assertEqual(len(pdf.pages), 3)
self.assertEqual(len(pdf.info), 6)
self.assertEqual(pdfParser.decode_text(pdf.info["Title"]), "abc")
self.assertEqual(pdfParser.decode_text(pdf.info["Producer"]), "pdfParser")
self.assertEqual(pdfParser.decode_text(pdf.info[b"Title"]), "abc")
self.assertEqual(pdf.info.Title, "abc")
self.assertEqual(pdf.info.Producer, "pdfParser")
self.assertEqual(pdf.info.Keywords, "qw)e\\r(ty")
self.assertEqual(pdf.info.Subject, u"ghi\uABCD")
def test_pdf_append_to_bytesio(self):
im = hopper("RGB")

View File

@ -62,17 +62,17 @@ def _save(im, fp, filename, save_all=False):
existing_pdf = pdfParser.PdfParser()
if title:
existing_pdf.info["Title"] = title
existing_pdf.info.Title = title
if author:
existing_pdf.info["Author"] = author
existing_pdf.info.Author = author
if subject:
existing_pdf.info["Subject"] = subject
existing_pdf.info.Subject = subject
if keywords:
existing_pdf.info["Keywords"] = keywords
existing_pdf.info.Keywords = keywords
if creator:
existing_pdf.info["Creator"] = creator
existing_pdf.info.Creator = creator
if producer:
existing_pdf.info["Producer"] = producer
existing_pdf.info.Producer = producer
#
# make sure image data is available

View File

@ -186,7 +186,7 @@ class PdfName():
elif isinstance(name, bytes):
self.name = name
else:
self.name = name.encode("utf-8")
self.name = name.encode("us-ascii")
@classmethod
def from_pdf_stream(klass, data):
@ -224,6 +224,24 @@ class PdfDict(UserDict):
def __init__(self, *args, **kwargs):
UserDict.__init__(self, *args, **kwargs)
def __setattr__(self, key, value):
if key == "data":
UserDict.__setattr__(self, key, value)
else:
if isinstance(key, str):
key = key.encode("us-ascii")
self[key] = value
def __getattr__(self, key):
try:
value = self[key]
except KeyError:
value = self[key.encode("us-ascii")]
if isinstance(value, bytes):
return decode_text(value)
else:
return value
def __bytes__(self):
out = bytearray(b"<<")
for key, value in self.items():
@ -624,6 +642,7 @@ class PdfParser:
b"f": b"\f",
b"(": b"(",
b")": b")",
b"\\": b"\\",
ord(b"n"): b"\n",
ord(b"r"): b"\r",
ord(b"t"): b"\t",
@ -631,6 +650,7 @@ class PdfParser:
ord(b"f"): b"\f",
ord(b"("): b"(",
ord(b")"): b")",
ord(b"\\"): b"\\",
}
@classmethod
def get_literal_string(klass, data, offset):