mirror of
https://github.com/python-pillow/Pillow.git
synced 2025-07-10 16:22:22 +03:00
issue #2959: fix wrong Parent of pre-existing Page objects when appending
This commit is contained in:
parent
113d67214c
commit
24ecfe315a
|
@ -147,6 +147,23 @@ class TestFilePdf(PillowTestCase):
|
||||||
finally:
|
finally:
|
||||||
os.rmdir(temp_dir)
|
os.rmdir(temp_dir)
|
||||||
|
|
||||||
|
def check_pdf_pages_consistency(self, pdf):
|
||||||
|
pages_info = pdf.read_indirect(pdf.pages_ref)
|
||||||
|
self.assertNotIn(b"Parent", pages_info)
|
||||||
|
self.assertIn(b"Kids", pages_info)
|
||||||
|
kids_not_used = pages_info[b"Kids"]
|
||||||
|
for page_ref in pdf.pages:
|
||||||
|
while True:
|
||||||
|
if page_ref in kids_not_used:
|
||||||
|
kids_not_used.remove(page_ref)
|
||||||
|
page_info = pdf.read_indirect(page_ref)
|
||||||
|
self.assertIn(b"Parent", page_info)
|
||||||
|
page_ref = page_info[b"Parent"]
|
||||||
|
if page_ref == pdf.pages_ref:
|
||||||
|
break
|
||||||
|
self.assertEqual(pdf.pages_ref, page_info[b"Parent"])
|
||||||
|
self.assertEqual(kids_not_used, [])
|
||||||
|
|
||||||
def test_pdf_append(self):
|
def test_pdf_append(self):
|
||||||
# make a PDF file
|
# make a PDF file
|
||||||
pdf_filename = self.helper_save_as_pdf("RGB", producer="PdfParser")
|
pdf_filename = self.helper_save_as_pdf("RGB", producer="PdfParser")
|
||||||
|
@ -156,6 +173,7 @@ class TestFilePdf(PillowTestCase):
|
||||||
self.assertEqual(len(pdf.pages), 1)
|
self.assertEqual(len(pdf.pages), 1)
|
||||||
self.assertEqual(len(pdf.info), 1)
|
self.assertEqual(len(pdf.info), 1)
|
||||||
self.assertEqual(pdf.info.Producer, "PdfParser")
|
self.assertEqual(pdf.info.Producer, "PdfParser")
|
||||||
|
self.check_pdf_pages_consistency(pdf)
|
||||||
|
|
||||||
# append some info
|
# append some info
|
||||||
pdf.info.Title = "abc"
|
pdf.info.Title = "abc"
|
||||||
|
@ -171,6 +189,7 @@ class TestFilePdf(PillowTestCase):
|
||||||
self.assertEqual(len(pdf.pages), 1)
|
self.assertEqual(len(pdf.pages), 1)
|
||||||
self.assertEqual(len(pdf.info), 6)
|
self.assertEqual(len(pdf.info), 6)
|
||||||
self.assertEqual(pdf.info.Title, "abc")
|
self.assertEqual(pdf.info.Title, "abc")
|
||||||
|
self.check_pdf_pages_consistency(pdf)
|
||||||
|
|
||||||
# append two images
|
# append two images
|
||||||
mode_CMYK = hopper("CMYK")
|
mode_CMYK = hopper("CMYK")
|
||||||
|
@ -186,6 +205,7 @@ class TestFilePdf(PillowTestCase):
|
||||||
self.assertEqual(pdf.info.Producer, "PdfParser")
|
self.assertEqual(pdf.info.Producer, "PdfParser")
|
||||||
self.assertEqual(pdf.info.Keywords, "qw)e\\r(ty")
|
self.assertEqual(pdf.info.Keywords, "qw)e\\r(ty")
|
||||||
self.assertEqual(pdf.info.Subject, u"ghi\uABCD")
|
self.assertEqual(pdf.info.Subject, u"ghi\uABCD")
|
||||||
|
self.check_pdf_pages_consistency(pdf)
|
||||||
|
|
||||||
def test_pdf_info(self):
|
def test_pdf_info(self):
|
||||||
# make a PDF file
|
# make a PDF file
|
||||||
|
|
|
@ -101,6 +101,9 @@ class IndirectReference(collections.namedtuple("IndirectReferenceTuple", ["objec
|
||||||
def __ne__(self, other):
|
def __ne__(self, other):
|
||||||
return not (self == other)
|
return not (self == other)
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash((self.object_id, self.generation))
|
||||||
|
|
||||||
|
|
||||||
class IndirectObjectDef(IndirectReference):
|
class IndirectObjectDef(IndirectReference):
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
@ -192,6 +195,9 @@ class PdfName:
|
||||||
else:
|
else:
|
||||||
self.name = name.encode("us-ascii")
|
self.name = name.encode("us-ascii")
|
||||||
|
|
||||||
|
def name_as_str(self):
|
||||||
|
return self.name.decode("us-ascii")
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
return (isinstance(other, PdfName) and other.name == self.name) or other == self.name
|
return (isinstance(other, PdfName) and other.name == self.name) or other == self.name
|
||||||
|
|
||||||
|
@ -358,6 +364,7 @@ class PdfParser:
|
||||||
self.should_close_buf = True
|
self.should_close_buf = True
|
||||||
if not filename and hasattr(f, "name"):
|
if not filename and hasattr(f, "name"):
|
||||||
self.filename = f.name
|
self.filename = f.name
|
||||||
|
self.cached_objects = {}
|
||||||
if buf:
|
if buf:
|
||||||
self.read_pdf_info()
|
self.read_pdf_info()
|
||||||
else:
|
else:
|
||||||
|
@ -368,6 +375,7 @@ class PdfParser:
|
||||||
self.info_ref = None
|
self.info_ref = None
|
||||||
self.page_tree_root = {}
|
self.page_tree_root = {}
|
||||||
self.pages = []
|
self.pages = []
|
||||||
|
self.orig_pages = []
|
||||||
self.pages_ref = None
|
self.pages_ref = None
|
||||||
self.last_xref_section_offset = None
|
self.last_xref_section_offset = None
|
||||||
self.trailer_dict = {}
|
self.trailer_dict = {}
|
||||||
|
@ -414,15 +422,45 @@ class PdfParser:
|
||||||
self.del_root()
|
self.del_root()
|
||||||
self.root_ref = self.next_object_id(self.f.tell())
|
self.root_ref = self.next_object_id(self.f.tell())
|
||||||
self.pages_ref = self.next_object_id(0)
|
self.pages_ref = self.next_object_id(0)
|
||||||
|
self.rewrite_pages()
|
||||||
self.write_obj(self.root_ref,
|
self.write_obj(self.root_ref,
|
||||||
Type=PdfName(b"Catalog"),
|
Type=PdfName(b"Catalog"),
|
||||||
Pages=self.pages_ref)
|
Pages=self.pages_ref)
|
||||||
self.write_obj(self.pages_ref,
|
self.write_obj(self.pages_ref,
|
||||||
Type=PdfName("Pages"),
|
Type=PdfName(b"Pages"),
|
||||||
Count=len(self.pages),
|
Count=len(self.pages),
|
||||||
Kids=self.pages)
|
Kids=self.pages)
|
||||||
return self.root_ref
|
return self.root_ref
|
||||||
|
|
||||||
|
def rewrite_pages(self):
|
||||||
|
pages_tree_nodes_to_delete = []
|
||||||
|
for i, page_ref in enumerate(self.orig_pages):
|
||||||
|
page_info = self.cached_objects[page_ref]
|
||||||
|
del self.xref_table[page_ref.object_id]
|
||||||
|
pages_tree_nodes_to_delete.append(page_info[PdfName(b"Parent")])
|
||||||
|
if page_ref not in self.pages:
|
||||||
|
# the page has been deleted
|
||||||
|
continue
|
||||||
|
# make dict keys into strings for passing to write_page
|
||||||
|
stringified_page_info = {}
|
||||||
|
for key, value in page_info.items():
|
||||||
|
# key should be a PdfName
|
||||||
|
stringified_page_info[key.name_as_str()] = value
|
||||||
|
stringified_page_info["Parent"] = self.pages_ref
|
||||||
|
new_page_ref = self.write_page(None, **stringified_page_info)
|
||||||
|
for j, cur_page_ref in enumerate(self.pages):
|
||||||
|
if cur_page_ref == page_ref:
|
||||||
|
# replace the page reference with the new one
|
||||||
|
self.pages[j] = new_page_ref
|
||||||
|
# delete redundant Pages tree nodes from xref table
|
||||||
|
for pages_tree_node_ref in pages_tree_nodes_to_delete:
|
||||||
|
while pages_tree_node_ref:
|
||||||
|
pages_tree_node = self.cached_objects[pages_tree_node_ref]
|
||||||
|
if pages_tree_node_ref.object_id in self.xref_table:
|
||||||
|
del self.xref_table[pages_tree_node_ref.object_id]
|
||||||
|
pages_tree_node_ref = pages_tree_node.get(b"Parent", None)
|
||||||
|
self.orig_pages = []
|
||||||
|
|
||||||
def write_xref_and_trailer(self, new_root_ref=None):
|
def write_xref_and_trailer(self, new_root_ref=None):
|
||||||
if new_root_ref:
|
if new_root_ref:
|
||||||
self.del_root()
|
self.del_root()
|
||||||
|
@ -443,7 +481,7 @@ class PdfParser:
|
||||||
if isinstance(ref, int):
|
if isinstance(ref, int):
|
||||||
ref = self.pages[ref]
|
ref = self.pages[ref]
|
||||||
if "Type" not in dict_obj:
|
if "Type" not in dict_obj:
|
||||||
dict_obj["Type"] = PdfName("Page")
|
dict_obj["Type"] = PdfName(b"Page")
|
||||||
if "Parent" not in dict_obj:
|
if "Parent" not in dict_obj:
|
||||||
dict_obj["Parent"] = self.pages_ref
|
dict_obj["Parent"] = self.pages_ref
|
||||||
return self.write_obj(ref, *objs, **dict_obj)
|
return self.write_obj(ref, *objs, **dict_obj)
|
||||||
|
@ -474,7 +512,6 @@ class PdfParser:
|
||||||
return
|
return
|
||||||
del self.xref_table[self.root_ref.object_id]
|
del self.xref_table[self.root_ref.object_id]
|
||||||
del self.xref_table[self.root[b"Pages"].object_id]
|
del self.xref_table[self.root[b"Pages"].object_id]
|
||||||
# XXX TODO delete Pages tree recursively
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_buf_from_file(f):
|
def get_buf_from_file(f):
|
||||||
|
@ -506,6 +543,8 @@ class PdfParser:
|
||||||
self.pages_ref = self.root[b"Pages"]
|
self.pages_ref = self.root[b"Pages"]
|
||||||
self.page_tree_root = self.read_indirect(self.pages_ref)
|
self.page_tree_root = self.read_indirect(self.pages_ref)
|
||||||
self.pages = self.linearize_page_tree(self.page_tree_root)
|
self.pages = self.linearize_page_tree(self.page_tree_root)
|
||||||
|
# save the original list of page references in case the user modifies, adds or deletes some pages and we need to rewrite the pages and their list
|
||||||
|
self.orig_pages = self.pages[:]
|
||||||
|
|
||||||
def next_object_id(self, offset=None):
|
def next_object_id(self, offset=None):
|
||||||
try:
|
try:
|
||||||
|
@ -789,7 +828,9 @@ class PdfParser:
|
||||||
offset, generation = self.xref_table[ref[0]]
|
offset, generation = self.xref_table[ref[0]]
|
||||||
check_format_condition(generation == ref[1], "expected to find generation %s for object ID %s in xref table, instead found generation %s at offset %s" \
|
check_format_condition(generation == ref[1], "expected to find generation %s for object ID %s in xref table, instead found generation %s at offset %s" \
|
||||||
% (ref[1], ref[0], generation, offset))
|
% (ref[1], ref[0], generation, offset))
|
||||||
return self.get_value(self.buf, offset + self.start_offset, expect_indirect=IndirectReference(*ref), max_nesting=max_nesting)[0]
|
value = self.get_value(self.buf, offset + self.start_offset, expect_indirect=IndirectReference(*ref), max_nesting=max_nesting)[0]
|
||||||
|
self.cached_objects[ref] = value
|
||||||
|
return value
|
||||||
|
|
||||||
def linearize_page_tree(self, node=None):
|
def linearize_page_tree(self, node=None):
|
||||||
if node is None:
|
if node is None:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user