Merge pull request #6470 from radarhere/pdf_ccittfaxdecode

Save 1 mode PDF using CCITTFaxDecode filter
This commit is contained in:
Hugo van Kemenade 2022-08-07 19:11:37 +03:00 committed by GitHub
commit ae14255c34
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 48 additions and 14 deletions

View File

@ -1011,14 +1011,18 @@ class TestFileLibTiff(LibTiffTestCase):
# Assert that there are multiple strips # Assert that there are multiple strips
assert len(im.tag_v2[STRIPOFFSETS]) > 1 assert len(im.tag_v2[STRIPOFFSETS]) > 1
def test_save_single_strip(self, tmp_path): @pytest.mark.parametrize("argument", (True, False))
def test_save_single_strip(self, argument, tmp_path):
im = hopper("RGB").resize((256, 256)) im = hopper("RGB").resize((256, 256))
out = str(tmp_path / "temp.tif") out = str(tmp_path / "temp.tif")
TiffImagePlugin.STRIP_SIZE = 2**18 if not argument:
TiffImagePlugin.STRIP_SIZE = 2**18
try: try:
arguments = {"compression": "tiff_adobe_deflate"}
im.save(out, compression="tiff_adobe_deflate") if argument:
arguments["strip_size"] = 2**18
im.save(out, **arguments)
with Image.open(out) as im: with Image.open(out) as im:
assert len(im.tag_v2[STRIPOFFSETS]) == 1 assert len(im.tag_v2[STRIPOFFSETS]) == 1

View File

@ -43,7 +43,7 @@ def test_monochrome(tmp_path):
# Act / Assert # Act / Assert
outfile = helper_save_as_pdf(tmp_path, mode) outfile = helper_save_as_pdf(tmp_path, mode)
assert os.path.getsize(outfile) < 15000 assert os.path.getsize(outfile) < 5000
def test_greyscale(tmp_path): def test_greyscale(tmp_path):

View File

@ -21,6 +21,7 @@
## ##
import io import io
import math
import os import os
import time import time
@ -123,8 +124,26 @@ def _save(im, fp, filename, save_all=False):
params = None params = None
decode = None decode = None
#
# Get image characteristics
width, height = im.size
if im.mode == "1": if im.mode == "1":
filter = "DCTDecode" filter = "CCITTFaxDecode"
bits = 1
params = PdfParser.PdfArray(
[
PdfParser.PdfDict(
{
"K": -1,
"BlackIs1": True,
"Columns": width,
"Rows": height,
}
)
]
)
colorspace = PdfParser.PdfName("DeviceGray") colorspace = PdfParser.PdfName("DeviceGray")
procset = "ImageB" # grayscale procset = "ImageB" # grayscale
elif im.mode == "L": elif im.mode == "L":
@ -161,6 +180,14 @@ def _save(im, fp, filename, save_all=False):
if filter == "ASCIIHexDecode": if filter == "ASCIIHexDecode":
ImageFile._save(im, op, [("hex", (0, 0) + im.size, 0, im.mode)]) ImageFile._save(im, op, [("hex", (0, 0) + im.size, 0, im.mode)])
elif filter == "CCITTFaxDecode":
im.save(
op,
"TIFF",
compression="group4",
# use a single strip
strip_size=math.ceil(im.width / 8) * im.height,
)
elif filter == "DCTDecode": elif filter == "DCTDecode":
Image.SAVE["JPEG"](im, op, filename) Image.SAVE["JPEG"](im, op, filename)
elif filter == "FlateDecode": elif filter == "FlateDecode":
@ -170,22 +197,24 @@ def _save(im, fp, filename, save_all=False):
else: else:
raise ValueError(f"unsupported PDF filter ({filter})") raise ValueError(f"unsupported PDF filter ({filter})")
# stream = op.getvalue()
# Get image characteristics if filter == "CCITTFaxDecode":
stream = stream[8:]
width, height = im.size filter = PdfParser.PdfArray([PdfParser.PdfName(filter)])
else:
filter = PdfParser.PdfName(filter)
existing_pdf.write_obj( existing_pdf.write_obj(
image_refs[page_number], image_refs[page_number],
stream=op.getvalue(), stream=stream,
Type=PdfParser.PdfName("XObject"), Type=PdfParser.PdfName("XObject"),
Subtype=PdfParser.PdfName("Image"), Subtype=PdfParser.PdfName("Image"),
Width=width, # * 72.0 / resolution, Width=width, # * 72.0 / resolution,
Height=height, # * 72.0 / resolution, Height=height, # * 72.0 / resolution,
Filter=PdfParser.PdfName(filter), Filter=filter,
BitsPerComponent=bits, BitsPerComponent=bits,
Decode=decode, Decode=decode,
DecodeParams=params, DecodeParms=params,
ColorSpace=colorspace, ColorSpace=colorspace,
) )

View File

@ -1684,7 +1684,8 @@ def _save(im, fp, filename):
stride = len(bits) * ((im.size[0] * bits[0] + 7) // 8) stride = len(bits) * ((im.size[0] * bits[0] + 7) // 8)
# aim for given strip size (64 KB by default) when using libtiff writer # aim for given strip size (64 KB by default) when using libtiff writer
if libtiff: if libtiff:
rows_per_strip = 1 if stride == 0 else min(STRIP_SIZE // stride, im.size[1]) im_strip_size = encoderinfo.get("strip_size", STRIP_SIZE)
rows_per_strip = 1 if stride == 0 else min(im_strip_size // stride, im.size[1])
# JPEG encoder expects multiple of 8 rows # JPEG encoder expects multiple of 8 rows
if compression == "jpeg": if compression == "jpeg":
rows_per_strip = min(((rows_per_strip + 7) // 8) * 8, im.size[1]) rows_per_strip = min(((rows_per_strip + 7) // 8) * 8, im.size[1])