From c9147c9c85818c526ed2d8def7d39dcfc48a8cf0 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Sun, 6 Aug 2023 22:14:32 +1000 Subject: [PATCH 1/4] Moved writing of object into separate function --- src/PIL/PdfImagePlugin.py | 248 +++++++++++++++++++------------------- 1 file changed, 127 insertions(+), 121 deletions(-) diff --git a/src/PIL/PdfImagePlugin.py b/src/PIL/PdfImagePlugin.py index 07f67d465..be39f4d16 100644 --- a/src/PIL/PdfImagePlugin.py +++ b/src/PIL/PdfImagePlugin.py @@ -46,6 +46,128 @@ def _save_all(im, fp, filename): # (Internal) Image save plugin for the PDF format. +def _write_image(im, filename, existing_pdf, image_refs): + # FIXME: Should replace ASCIIHexDecode with RunLengthDecode + # (packbits) or LZWDecode (tiff/lzw compression). Note that + # PDF 1.2 also supports Flatedecode (zip compression). + + params = None + decode = None + + # + # Get image characteristics + + width, height = im.size + + dict_obj = {"BitsPerComponent": 8} + if im.mode == "1": + if features.check("libtiff"): + filter = "CCITTFaxDecode" + dict_obj["BitsPerComponent"] = 1 + params = PdfParser.PdfArray( + [ + PdfParser.PdfDict( + { + "K": -1, + "BlackIs1": True, + "Columns": width, + "Rows": height, + } + ) + ] + ) + else: + filter = "DCTDecode" + dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceGray") + procset = "ImageB" # grayscale + elif im.mode == "L": + filter = "DCTDecode" + # params = f"<< /Predictor 15 /Columns {width-2} >>" + dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceGray") + procset = "ImageB" # grayscale + elif im.mode == "LA": + filter = "JPXDecode" + # params = f"<< /Predictor 15 /Columns {width-2} >>" + procset = "ImageB" # grayscale + dict_obj["SMaskInData"] = 1 + elif im.mode == "P": + filter = "ASCIIHexDecode" + palette = im.getpalette() + dict_obj["ColorSpace"] = [ + PdfParser.PdfName("Indexed"), + PdfParser.PdfName("DeviceRGB"), + 255, + PdfParser.PdfBinary(palette), + ] + procset = "ImageI" # indexed color + elif im.mode == "RGB": + filter = "DCTDecode" + dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceRGB") + procset = "ImageC" # color images + elif im.mode == "RGBA": + filter = "JPXDecode" + procset = "ImageC" # color images + dict_obj["SMaskInData"] = 1 + elif im.mode == "CMYK": + filter = "DCTDecode" + dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceCMYK") + procset = "ImageC" # color images + decode = [1, 0, 1, 0, 1, 0, 1, 0] + else: + msg = f"cannot save mode {im.mode}" + raise ValueError(msg) + + # + # image + + op = io.BytesIO() + + if filter == "ASCIIHexDecode": + ImageFile._save(im, op, [("hex", (0, 0) + im.size, 0, im.mode)]) + elif filter == "CCITTFaxDecode": + im.save( + op, + "TIFF", + compression="group4", + # use a single strip + strip_size=math.ceil(im.width / 8) * im.height, + ) + elif filter == "DCTDecode": + Image.SAVE["JPEG"](im, op, filename) + elif filter == "JPXDecode": + del dict_obj["BitsPerComponent"] + Image.SAVE["JPEG2000"](im, op, filename) + elif filter == "FlateDecode": + ImageFile._save(im, op, [("zip", (0, 0) + im.size, 0, im.mode)]) + elif filter == "RunLengthDecode": + ImageFile._save(im, op, [("packbits", (0, 0) + im.size, 0, im.mode)]) + else: + msg = f"unsupported PDF filter ({filter})" + raise ValueError(msg) + + stream = op.getvalue() + if filter == "CCITTFaxDecode": + stream = stream[8:] + filter = PdfParser.PdfArray([PdfParser.PdfName(filter)]) + else: + filter = PdfParser.PdfName(filter) + + existing_pdf.write_obj( + image_refs[page_number], + stream=stream, + Type=PdfParser.PdfName("XObject"), + Subtype=PdfParser.PdfName("Image"), + Width=width, # * 72.0 / x_resolution, + Height=height, # * 72.0 / y_resolution, + Filter=filter, + Decode=decode, + DecodeParms=params, + **dict_obj, + ) + + return procset + + def _save(im, fp, filename, save_all=False): is_appending = im.encoderinfo.get("append", False) if is_appending: @@ -121,123 +243,7 @@ def _save(im, fp, filename, save_all=False): for im_sequence in ims: im_pages = ImageSequence.Iterator(im_sequence) if save_all else [im_sequence] for im in im_pages: - # FIXME: Should replace ASCIIHexDecode with RunLengthDecode - # (packbits) or LZWDecode (tiff/lzw compression). Note that - # PDF 1.2 also supports Flatedecode (zip compression). - - params = None - decode = None - - # - # Get image characteristics - - width, height = im.size - - dict_obj = {"BitsPerComponent": 8} - if im.mode == "1": - if features.check("libtiff"): - filter = "CCITTFaxDecode" - dict_obj["BitsPerComponent"] = 1 - params = PdfParser.PdfArray( - [ - PdfParser.PdfDict( - { - "K": -1, - "BlackIs1": True, - "Columns": width, - "Rows": height, - } - ) - ] - ) - else: - filter = "DCTDecode" - dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceGray") - procset = "ImageB" # grayscale - elif im.mode == "L": - filter = "DCTDecode" - # params = f"<< /Predictor 15 /Columns {width-2} >>" - dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceGray") - procset = "ImageB" # grayscale - elif im.mode == "LA": - filter = "JPXDecode" - # params = f"<< /Predictor 15 /Columns {width-2} >>" - procset = "ImageB" # grayscale - dict_obj["SMaskInData"] = 1 - elif im.mode == "P": - filter = "ASCIIHexDecode" - palette = im.getpalette() - dict_obj["ColorSpace"] = [ - PdfParser.PdfName("Indexed"), - PdfParser.PdfName("DeviceRGB"), - 255, - PdfParser.PdfBinary(palette), - ] - procset = "ImageI" # indexed color - elif im.mode == "RGB": - filter = "DCTDecode" - dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceRGB") - procset = "ImageC" # color images - elif im.mode == "RGBA": - filter = "JPXDecode" - procset = "ImageC" # color images - dict_obj["SMaskInData"] = 1 - elif im.mode == "CMYK": - filter = "DCTDecode" - dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceCMYK") - procset = "ImageC" # color images - decode = [1, 0, 1, 0, 1, 0, 1, 0] - else: - msg = f"cannot save mode {im.mode}" - raise ValueError(msg) - - # - # image - - op = io.BytesIO() - - if filter == "ASCIIHexDecode": - ImageFile._save(im, op, [("hex", (0, 0) + im.size, 0, im.mode)]) - elif filter == "CCITTFaxDecode": - im.save( - op, - "TIFF", - compression="group4", - # use a single strip - strip_size=math.ceil(im.width / 8) * im.height, - ) - elif filter == "DCTDecode": - Image.SAVE["JPEG"](im, op, filename) - elif filter == "JPXDecode": - del dict_obj["BitsPerComponent"] - Image.SAVE["JPEG2000"](im, op, filename) - elif filter == "FlateDecode": - ImageFile._save(im, op, [("zip", (0, 0) + im.size, 0, im.mode)]) - elif filter == "RunLengthDecode": - ImageFile._save(im, op, [("packbits", (0, 0) + im.size, 0, im.mode)]) - else: - msg = f"unsupported PDF filter ({filter})" - raise ValueError(msg) - - stream = op.getvalue() - if filter == "CCITTFaxDecode": - stream = stream[8:] - filter = PdfParser.PdfArray([PdfParser.PdfName(filter)]) - else: - filter = PdfParser.PdfName(filter) - - existing_pdf.write_obj( - image_refs[page_number], - stream=stream, - Type=PdfParser.PdfName("XObject"), - Subtype=PdfParser.PdfName("Image"), - Width=width, # * 72.0 / x_resolution, - Height=height, # * 72.0 / y_resolution, - Filter=filter, - Decode=decode, - DecodeParms=params, - **dict_obj, - ) + procset = _write_image(im, filename, existing_pdfs, image_refs) # # page @@ -251,8 +257,8 @@ def _save(im, fp, filename, save_all=False): MediaBox=[ 0, 0, - width * 72.0 / x_resolution, - height * 72.0 / y_resolution, + im.width * 72.0 / x_resolution, + im.height * 72.0 / y_resolution, ], Contents=contents_refs[page_number], ) @@ -261,8 +267,8 @@ def _save(im, fp, filename, save_all=False): # page contents page_contents = b"q %f 0 0 %f 0 0 cm /image Do Q\n" % ( - width * 72.0 / x_resolution, - height * 72.0 / y_resolution, + im.width * 72.0 / x_resolution, + im.height * 72.0 / y_resolution, ) existing_pdf.write_obj(contents_refs[page_number], stream=page_contents) From a70ea82eb5c2cd073e1fe7cab0dca32b93fdcb9f Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Mon, 7 Aug 2023 13:53:19 +1000 Subject: [PATCH 2/4] Write P transparency as SMask --- Tests/test_file_pdf.py | 16 ++++++++++++++++ src/PIL/PdfImagePlugin.py | 21 ++++++++++++++++----- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/Tests/test_file_pdf.py b/Tests/test_file_pdf.py index 9c8e90b7e..4f7b09af2 100644 --- a/Tests/test_file_pdf.py +++ b/Tests/test_file_pdf.py @@ -48,6 +48,22 @@ def test_save_alpha(tmp_path, mode): helper_save_as_pdf(tmp_path, mode) +def test_p_alpha(tmp_path): + # Arrange + outfile = str(tmp_path / "temp.pdf") + with Image.open("Tests/images/pil123p.png") as im: + assert im.mode == "P" + assert isinstance(im.info["transparency"], bytes) + + # Act + im.save(outfile) + + # Assert + with open(outfile, "rb") as fp: + contents = fp.read() + assert b"SMask" in contents + + def test_monochrome(tmp_path): # Arrange mode = "1" diff --git a/src/PIL/PdfImagePlugin.py b/src/PIL/PdfImagePlugin.py index be39f4d16..e3af1b452 100644 --- a/src/PIL/PdfImagePlugin.py +++ b/src/PIL/PdfImagePlugin.py @@ -100,6 +100,13 @@ def _write_image(im, filename, existing_pdf, image_refs): PdfParser.PdfBinary(palette), ] procset = "ImageI" # indexed color + + if "transparency" in im.info: + smask = im.convert("LA").getchannel("A") + smask.encoderinfo = {} + + image_ref = _write_image(smask, filename, existing_pdf, image_refs)[0] + dict_obj["SMask"] = image_ref elif im.mode == "RGB": filter = "DCTDecode" dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceRGB") @@ -130,7 +137,7 @@ def _write_image(im, filename, existing_pdf, image_refs): "TIFF", compression="group4", # use a single strip - strip_size=math.ceil(im.width / 8) * im.height, + strip_size=math.ceil(width / 8) * height, ) elif filter == "DCTDecode": Image.SAVE["JPEG"](im, op, filename) @@ -152,8 +159,9 @@ def _write_image(im, filename, existing_pdf, image_refs): else: filter = PdfParser.PdfName(filter) + image_ref = image_refs.pop(0) existing_pdf.write_obj( - image_refs[page_number], + image_ref, stream=stream, Type=PdfParser.PdfName("XObject"), Subtype=PdfParser.PdfName("Image"), @@ -165,7 +173,7 @@ def _write_image(im, filename, existing_pdf, image_refs): **dict_obj, ) - return procset + return image_ref, procset def _save(im, fp, filename, save_all=False): @@ -231,6 +239,9 @@ def _save(im, fp, filename, save_all=False): number_of_pages += im_number_of_pages for i in range(im_number_of_pages): image_refs.append(existing_pdf.next_object_id(0)) + if im.mode == "P" and "transparency" in im.info: + image_refs.append(existing_pdf.next_object_id(0)) + page_refs.append(existing_pdf.next_object_id(0)) contents_refs.append(existing_pdf.next_object_id(0)) existing_pdf.pages.append(page_refs[-1]) @@ -243,7 +254,7 @@ def _save(im, fp, filename, save_all=False): for im_sequence in ims: im_pages = ImageSequence.Iterator(im_sequence) if save_all else [im_sequence] for im in im_pages: - procset = _write_image(im, filename, existing_pdfs, image_refs) + image_ref, procset = _write_image(im, filename, existing_pdf, image_refs) # # page @@ -252,7 +263,7 @@ def _save(im, fp, filename, save_all=False): page_refs[page_number], Resources=PdfParser.PdfDict( ProcSet=[PdfParser.PdfName("PDF"), PdfParser.PdfName(procset)], - XObject=PdfParser.PdfDict(image=image_refs[page_number]), + XObject=PdfParser.PdfDict(image=image_ref), ), MediaBox=[ 0, From 5c5980721665a6a5ee64a5bf24efd26514b0b7eb Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Mon, 7 Aug 2023 13:54:11 +1000 Subject: [PATCH 3/4] Removed unused decoders --- src/PIL/PdfImagePlugin.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/PIL/PdfImagePlugin.py b/src/PIL/PdfImagePlugin.py index e3af1b452..09fc0c7e6 100644 --- a/src/PIL/PdfImagePlugin.py +++ b/src/PIL/PdfImagePlugin.py @@ -144,10 +144,6 @@ def _write_image(im, filename, existing_pdf, image_refs): elif filter == "JPXDecode": del dict_obj["BitsPerComponent"] Image.SAVE["JPEG2000"](im, op, filename) - elif filter == "FlateDecode": - ImageFile._save(im, op, [("zip", (0, 0) + im.size, 0, im.mode)]) - elif filter == "RunLengthDecode": - ImageFile._save(im, op, [("packbits", (0, 0) + im.size, 0, im.mode)]) else: msg = f"unsupported PDF filter ({filter})" raise ValueError(msg) From 73bd40babe644fcd402f0b7d3ae8be4894ca66f2 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Mon, 7 Aug 2023 20:49:29 +1000 Subject: [PATCH 4/4] Test for relevant characters before and after "SMask" --- Tests/test_file_pdf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tests/test_file_pdf.py b/Tests/test_file_pdf.py index 4f7b09af2..ffc392d6b 100644 --- a/Tests/test_file_pdf.py +++ b/Tests/test_file_pdf.py @@ -61,7 +61,7 @@ def test_p_alpha(tmp_path): # Assert with open(outfile, "rb") as fp: contents = fp.read() - assert b"SMask" in contents + assert b"\n/SMask " in contents def test_monochrome(tmp_path):