mirror of
https://github.com/python-pillow/Pillow.git
synced 2025-06-28 17:03:13 +03:00
issue #2959: text string decoding, support for Info dict, updated tests
This commit is contained in:
parent
991f832d91
commit
13fe1a5ba7
|
@ -7,16 +7,13 @@ import tempfile
|
||||||
|
|
||||||
class TestFilePdf(PillowTestCase):
|
class TestFilePdf(PillowTestCase):
|
||||||
|
|
||||||
def helper_save_as_pdf(self, mode, save_all=False):
|
def helper_save_as_pdf(self, mode, **kwargs):
|
||||||
# Arrange
|
# Arrange
|
||||||
im = hopper(mode)
|
im = hopper(mode)
|
||||||
outfile = self.tempfile("temp_" + mode + ".pdf")
|
outfile = self.tempfile("temp_" + mode + ".pdf")
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
if save_all:
|
im.save(outfile, **kwargs)
|
||||||
im.save(outfile, save_all=True)
|
|
||||||
else:
|
|
||||||
im.save(outfile)
|
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
self.assertTrue(os.path.isfile(outfile))
|
self.assertTrue(os.path.isfile(outfile))
|
||||||
|
@ -134,18 +131,18 @@ class TestFilePdf(PillowTestCase):
|
||||||
|
|
||||||
def test_pdf_append(self):
|
def test_pdf_append(self):
|
||||||
# make a PDF file
|
# make a PDF file
|
||||||
pdf_filename = self.helper_save_as_pdf("RGB")
|
pdf_filename = self.helper_save_as_pdf("RGB", producer="pdfParser")
|
||||||
# open it, check pages and info
|
# open it, check pages and info
|
||||||
pdf = pdfParser.PdfParser(pdf_filename)
|
pdf = pdfParser.PdfParser(pdf_filename)
|
||||||
self.assertEqual(len(pdf.pages), 1)
|
self.assertEqual(len(pdf.pages), 1)
|
||||||
self.assertEqual(len(pdf.info), 0)
|
self.assertEqual(len(pdf.info), 1)
|
||||||
|
self.assertEqual(pdfParser.decode_text(pdf.info[b"Producer"]), "pdfParser")
|
||||||
# append some info
|
# append some info
|
||||||
pdf.info[b"Title"] = b"abc"
|
pdf.info[b"Title"] = pdfParser.encode_text("abc")
|
||||||
pdf.info[b"Author"] = b"def"
|
pdf.info[b"Author"] = pdfParser.encode_text("def")
|
||||||
pdf.info[b"Subject"] = pdfParser.encode_text("ghi")
|
pdf.info[b"Subject"] = pdfParser.encode_text("ghi")
|
||||||
pdf.info[b"Keywords"] = b"jkl"
|
pdf.info[b"Keywords"] = pdfParser.encode_text("jkl")
|
||||||
pdf.info[b"Creator"] = b"hopper()"
|
pdf.info[b"Creator"] = pdfParser.encode_text("hopper()")
|
||||||
pdf.info[b"Producer"] = b"pdfParser"
|
|
||||||
with open(pdf_filename, "r+b") as f:
|
with open(pdf_filename, "r+b") as f:
|
||||||
f.seek(0, os.SEEK_END)
|
f.seek(0, os.SEEK_END)
|
||||||
pdf.write_xref_and_trailer(f)
|
pdf.write_xref_and_trailer(f)
|
||||||
|
@ -153,7 +150,7 @@ class TestFilePdf(PillowTestCase):
|
||||||
pdf = pdfParser.PdfParser(pdf_filename)
|
pdf = pdfParser.PdfParser(pdf_filename)
|
||||||
self.assertEqual(len(pdf.pages), 1)
|
self.assertEqual(len(pdf.pages), 1)
|
||||||
self.assertEqual(len(pdf.info), 6)
|
self.assertEqual(len(pdf.info), 6)
|
||||||
self.assertEqual(pdf.info[b"Title"], b"abc")
|
self.assertEqual(pdfParser.decode_text(pdf.info[b"Title"]), "abc")
|
||||||
# append two images
|
# append two images
|
||||||
mode_CMYK = hopper("CMYK")
|
mode_CMYK = hopper("CMYK")
|
||||||
mode_P = hopper("P")
|
mode_P = hopper("P")
|
||||||
|
@ -162,7 +159,8 @@ class TestFilePdf(PillowTestCase):
|
||||||
pdf = pdfParser.PdfParser(pdf_filename)
|
pdf = pdfParser.PdfParser(pdf_filename)
|
||||||
self.assertEqual(len(pdf.pages), 3)
|
self.assertEqual(len(pdf.pages), 3)
|
||||||
self.assertEqual(len(pdf.info), 6)
|
self.assertEqual(len(pdf.info), 6)
|
||||||
self.assertEqual(pdf.info[b"Title"], b"abc")
|
self.assertEqual(pdfParser.decode_text(pdf.info[b"Title"]), "abc")
|
||||||
|
self.assertEqual(pdfParser.decode_text(pdf.info[b"Producer"]), "pdfParser")
|
||||||
|
|
||||||
def test_pdf_parser(self):
|
def test_pdf_parser(self):
|
||||||
pdfParser.selftest()
|
pdfParser.selftest()
|
||||||
|
|
|
@ -48,12 +48,32 @@ def _save_all(im, fp, filename):
|
||||||
def _save(im, fp, filename, save_all=False):
|
def _save(im, fp, filename, save_all=False):
|
||||||
resolution = im.encoderinfo.get("resolution", 72.0)
|
resolution = im.encoderinfo.get("resolution", 72.0)
|
||||||
is_appending = im.encoderinfo.get("append", False)
|
is_appending = im.encoderinfo.get("append", False)
|
||||||
|
title = im.encoderinfo.get("title", None)
|
||||||
|
author = im.encoderinfo.get("author", None)
|
||||||
|
subject = im.encoderinfo.get("subject", None)
|
||||||
|
keywords = im.encoderinfo.get("keywords", None)
|
||||||
|
creator = im.encoderinfo.get("creator", None)
|
||||||
|
producer = im.encoderinfo.get("producer", None)
|
||||||
|
|
||||||
if is_appending:
|
if is_appending:
|
||||||
existing_pdf = pdfParser.PdfParser(f=fp, filename=filename)
|
existing_pdf = pdfParser.PdfParser(f=fp, filename=filename)
|
||||||
fp.seek(0, io.SEEK_END)
|
fp.seek(0, io.SEEK_END)
|
||||||
else:
|
else:
|
||||||
existing_pdf = pdfParser.PdfParser()
|
existing_pdf = pdfParser.PdfParser()
|
||||||
|
|
||||||
|
if title:
|
||||||
|
existing_pdf.info[b"Title"] = pdfParser.encode_text(title)
|
||||||
|
if author:
|
||||||
|
existing_pdf.info[b"Author"] = pdfParser.encode_text(author)
|
||||||
|
if subject:
|
||||||
|
existing_pdf.info[b"Subject"] = pdfParser.encode_text(subject)
|
||||||
|
if keywords:
|
||||||
|
existing_pdf.info[b"Keywords"] = pdfParser.encode_text(keywords)
|
||||||
|
if creator:
|
||||||
|
existing_pdf.info[b"Creator"] = pdfParser.encode_text(creator)
|
||||||
|
if producer:
|
||||||
|
existing_pdf.info[b"Producer"] = pdfParser.encode_text(producer)
|
||||||
|
|
||||||
#
|
#
|
||||||
# make sure image data is available
|
# make sure image data is available
|
||||||
im.load()
|
im.load()
|
||||||
|
|
|
@ -18,10 +18,63 @@ else: # Python 3.x
|
||||||
return s.encode("us-ascii")
|
return s.encode("us-ascii")
|
||||||
|
|
||||||
|
|
||||||
|
# see 7.9.2.2 Text String Type on page 86 and D.3 PDFDocEncoding Character Set on page 656
|
||||||
def encode_text(s):
|
def encode_text(s):
|
||||||
return codecs.BOM_UTF16_BE + s.encode("utf_16_be")
|
return codecs.BOM_UTF16_BE + s.encode("utf_16_be")
|
||||||
|
|
||||||
|
|
||||||
|
PDFDocEncoding = {
|
||||||
|
0x16: u"\u0017",
|
||||||
|
0x18: u"\u02D8",
|
||||||
|
0x19: u"\u02C7",
|
||||||
|
0x1A: u"\u02C6",
|
||||||
|
0x1B: u"\u02D9",
|
||||||
|
0x1C: u"\u02DD",
|
||||||
|
0x1D: u"\u02DB",
|
||||||
|
0x1E: u"\u02DA",
|
||||||
|
0x1F: u"\u02DC",
|
||||||
|
0x80: u"\u2022",
|
||||||
|
0x81: u"\u2020",
|
||||||
|
0x82: u"\u2021",
|
||||||
|
0x83: u"\u2026",
|
||||||
|
0x84: u"\u2014",
|
||||||
|
0x85: u"\u2013",
|
||||||
|
0x86: u"\u0192",
|
||||||
|
0x87: u"\u2044",
|
||||||
|
0x88: u"\u2039",
|
||||||
|
0x89: u"\u203A",
|
||||||
|
0x8A: u"\u2212",
|
||||||
|
0x8B: u"\u2030",
|
||||||
|
0x8C: u"\u201E",
|
||||||
|
0x8D: u"\u201C",
|
||||||
|
0x8E: u"\u201D",
|
||||||
|
0x8F: u"\u2018",
|
||||||
|
0x90: u"\u2019",
|
||||||
|
0x91: u"\u201A",
|
||||||
|
0x92: u"\u2122",
|
||||||
|
0x93: u"\uFB01",
|
||||||
|
0x94: u"\uFB02",
|
||||||
|
0x95: u"\u0141",
|
||||||
|
0x96: u"\u0152",
|
||||||
|
0x97: u"\u0160",
|
||||||
|
0x98: u"\u0178",
|
||||||
|
0x99: u"\u017D",
|
||||||
|
0x9A: u"\u0131",
|
||||||
|
0x9B: u"\u0142",
|
||||||
|
0x9C: u"\u0153",
|
||||||
|
0x9D: u"\u0161",
|
||||||
|
0x9E: u"\u017E",
|
||||||
|
0xA0: u"\u20AC",
|
||||||
|
}
|
||||||
|
def decode_text(b):
|
||||||
|
if b[:len(codecs.BOM_UTF16_BE)] == codecs.BOM_UTF16_BE:
|
||||||
|
return b[len(codecs.BOM_UTF16_BE):].decode("utf_16_be")
|
||||||
|
elif str == bytes: # Python 2.x
|
||||||
|
return u"".join(PDFDocEncoding.get(ord(byte), byte) for byte in b)
|
||||||
|
else:
|
||||||
|
return "".join(PDFDocEncoding.get(byte, chr(byte)) for byte in b)
|
||||||
|
|
||||||
|
|
||||||
class PdfFormatError(RuntimeError):
|
class PdfFormatError(RuntimeError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -667,6 +720,10 @@ class PdfParser:
|
||||||
|
|
||||||
|
|
||||||
def selftest():
|
def selftest():
|
||||||
|
assert encode_text("abc") == b"\xFE\xFF\x00a\x00b\x00c"
|
||||||
|
assert decode_text(b"\xFE\xFF\x00a\x00b\x00c") == "abc"
|
||||||
|
assert decode_text(b"abc") == "abc"
|
||||||
|
assert decode_text(b"\x1B a \x1C") == u"\u02D9 a \u02DD"
|
||||||
assert PdfParser.interpret_name(b"Name#23Hash") == b"Name#Hash"
|
assert PdfParser.interpret_name(b"Name#23Hash") == b"Name#Hash"
|
||||||
assert PdfParser.interpret_name(b"Name#23Hash", as_text=True) == "Name#Hash"
|
assert PdfParser.interpret_name(b"Name#23Hash", as_text=True) == "Name#Hash"
|
||||||
assert IndirectReference(1,2) == IndirectReference(1,2)
|
assert IndirectReference(1,2) == IndirectReference(1,2)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user