mirror of
https://github.com/python-pillow/Pillow.git
synced 2025-01-12 18:26:17 +03:00
issue #2959: fix wrong Parent of pre-existing Page objects when appending
This commit is contained in:
parent
113d67214c
commit
24ecfe315a
|
@ -147,6 +147,23 @@ class TestFilePdf(PillowTestCase):
|
|||
finally:
|
||||
os.rmdir(temp_dir)
|
||||
|
||||
def check_pdf_pages_consistency(self, pdf):
|
||||
pages_info = pdf.read_indirect(pdf.pages_ref)
|
||||
self.assertNotIn(b"Parent", pages_info)
|
||||
self.assertIn(b"Kids", pages_info)
|
||||
kids_not_used = pages_info[b"Kids"]
|
||||
for page_ref in pdf.pages:
|
||||
while True:
|
||||
if page_ref in kids_not_used:
|
||||
kids_not_used.remove(page_ref)
|
||||
page_info = pdf.read_indirect(page_ref)
|
||||
self.assertIn(b"Parent", page_info)
|
||||
page_ref = page_info[b"Parent"]
|
||||
if page_ref == pdf.pages_ref:
|
||||
break
|
||||
self.assertEqual(pdf.pages_ref, page_info[b"Parent"])
|
||||
self.assertEqual(kids_not_used, [])
|
||||
|
||||
def test_pdf_append(self):
|
||||
# make a PDF file
|
||||
pdf_filename = self.helper_save_as_pdf("RGB", producer="PdfParser")
|
||||
|
@ -156,6 +173,7 @@ class TestFilePdf(PillowTestCase):
|
|||
self.assertEqual(len(pdf.pages), 1)
|
||||
self.assertEqual(len(pdf.info), 1)
|
||||
self.assertEqual(pdf.info.Producer, "PdfParser")
|
||||
self.check_pdf_pages_consistency(pdf)
|
||||
|
||||
# append some info
|
||||
pdf.info.Title = "abc"
|
||||
|
@ -171,6 +189,7 @@ class TestFilePdf(PillowTestCase):
|
|||
self.assertEqual(len(pdf.pages), 1)
|
||||
self.assertEqual(len(pdf.info), 6)
|
||||
self.assertEqual(pdf.info.Title, "abc")
|
||||
self.check_pdf_pages_consistency(pdf)
|
||||
|
||||
# append two images
|
||||
mode_CMYK = hopper("CMYK")
|
||||
|
@ -186,6 +205,7 @@ class TestFilePdf(PillowTestCase):
|
|||
self.assertEqual(pdf.info.Producer, "PdfParser")
|
||||
self.assertEqual(pdf.info.Keywords, "qw)e\\r(ty")
|
||||
self.assertEqual(pdf.info.Subject, u"ghi\uABCD")
|
||||
self.check_pdf_pages_consistency(pdf)
|
||||
|
||||
def test_pdf_info(self):
|
||||
# make a PDF file
|
||||
|
|
|
@ -101,6 +101,9 @@ class IndirectReference(collections.namedtuple("IndirectReferenceTuple", ["objec
|
|||
def __ne__(self, other):
|
||||
return not (self == other)
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.object_id, self.generation))
|
||||
|
||||
|
||||
class IndirectObjectDef(IndirectReference):
|
||||
def __str__(self):
|
||||
|
@ -192,6 +195,9 @@ class PdfName:
|
|||
else:
|
||||
self.name = name.encode("us-ascii")
|
||||
|
||||
def name_as_str(self):
|
||||
return self.name.decode("us-ascii")
|
||||
|
||||
def __eq__(self, other):
|
||||
return (isinstance(other, PdfName) and other.name == self.name) or other == self.name
|
||||
|
||||
|
@ -358,6 +364,7 @@ class PdfParser:
|
|||
self.should_close_buf = True
|
||||
if not filename and hasattr(f, "name"):
|
||||
self.filename = f.name
|
||||
self.cached_objects = {}
|
||||
if buf:
|
||||
self.read_pdf_info()
|
||||
else:
|
||||
|
@ -368,6 +375,7 @@ class PdfParser:
|
|||
self.info_ref = None
|
||||
self.page_tree_root = {}
|
||||
self.pages = []
|
||||
self.orig_pages = []
|
||||
self.pages_ref = None
|
||||
self.last_xref_section_offset = None
|
||||
self.trailer_dict = {}
|
||||
|
@ -414,15 +422,45 @@ class PdfParser:
|
|||
self.del_root()
|
||||
self.root_ref = self.next_object_id(self.f.tell())
|
||||
self.pages_ref = self.next_object_id(0)
|
||||
self.rewrite_pages()
|
||||
self.write_obj(self.root_ref,
|
||||
Type=PdfName(b"Catalog"),
|
||||
Pages=self.pages_ref)
|
||||
self.write_obj(self.pages_ref,
|
||||
Type=PdfName("Pages"),
|
||||
Type=PdfName(b"Pages"),
|
||||
Count=len(self.pages),
|
||||
Kids=self.pages)
|
||||
return self.root_ref
|
||||
|
||||
def rewrite_pages(self):
|
||||
pages_tree_nodes_to_delete = []
|
||||
for i, page_ref in enumerate(self.orig_pages):
|
||||
page_info = self.cached_objects[page_ref]
|
||||
del self.xref_table[page_ref.object_id]
|
||||
pages_tree_nodes_to_delete.append(page_info[PdfName(b"Parent")])
|
||||
if page_ref not in self.pages:
|
||||
# the page has been deleted
|
||||
continue
|
||||
# make dict keys into strings for passing to write_page
|
||||
stringified_page_info = {}
|
||||
for key, value in page_info.items():
|
||||
# key should be a PdfName
|
||||
stringified_page_info[key.name_as_str()] = value
|
||||
stringified_page_info["Parent"] = self.pages_ref
|
||||
new_page_ref = self.write_page(None, **stringified_page_info)
|
||||
for j, cur_page_ref in enumerate(self.pages):
|
||||
if cur_page_ref == page_ref:
|
||||
# replace the page reference with the new one
|
||||
self.pages[j] = new_page_ref
|
||||
# delete redundant Pages tree nodes from xref table
|
||||
for pages_tree_node_ref in pages_tree_nodes_to_delete:
|
||||
while pages_tree_node_ref:
|
||||
pages_tree_node = self.cached_objects[pages_tree_node_ref]
|
||||
if pages_tree_node_ref.object_id in self.xref_table:
|
||||
del self.xref_table[pages_tree_node_ref.object_id]
|
||||
pages_tree_node_ref = pages_tree_node.get(b"Parent", None)
|
||||
self.orig_pages = []
|
||||
|
||||
def write_xref_and_trailer(self, new_root_ref=None):
|
||||
if new_root_ref:
|
||||
self.del_root()
|
||||
|
@ -443,7 +481,7 @@ class PdfParser:
|
|||
if isinstance(ref, int):
|
||||
ref = self.pages[ref]
|
||||
if "Type" not in dict_obj:
|
||||
dict_obj["Type"] = PdfName("Page")
|
||||
dict_obj["Type"] = PdfName(b"Page")
|
||||
if "Parent" not in dict_obj:
|
||||
dict_obj["Parent"] = self.pages_ref
|
||||
return self.write_obj(ref, *objs, **dict_obj)
|
||||
|
@ -474,7 +512,6 @@ class PdfParser:
|
|||
return
|
||||
del self.xref_table[self.root_ref.object_id]
|
||||
del self.xref_table[self.root[b"Pages"].object_id]
|
||||
# XXX TODO delete Pages tree recursively
|
||||
|
||||
@staticmethod
|
||||
def get_buf_from_file(f):
|
||||
|
@ -506,6 +543,8 @@ class PdfParser:
|
|||
self.pages_ref = self.root[b"Pages"]
|
||||
self.page_tree_root = self.read_indirect(self.pages_ref)
|
||||
self.pages = self.linearize_page_tree(self.page_tree_root)
|
||||
# save the original list of page references in case the user modifies, adds or deletes some pages and we need to rewrite the pages and their list
|
||||
self.orig_pages = self.pages[:]
|
||||
|
||||
def next_object_id(self, offset=None):
|
||||
try:
|
||||
|
@ -789,7 +828,9 @@ class PdfParser:
|
|||
offset, generation = self.xref_table[ref[0]]
|
||||
check_format_condition(generation == ref[1], "expected to find generation %s for object ID %s in xref table, instead found generation %s at offset %s" \
|
||||
% (ref[1], ref[0], generation, offset))
|
||||
return self.get_value(self.buf, offset + self.start_offset, expect_indirect=IndirectReference(*ref), max_nesting=max_nesting)[0]
|
||||
value = self.get_value(self.buf, offset + self.start_offset, expect_indirect=IndirectReference(*ref), max_nesting=max_nesting)[0]
|
||||
self.cached_objects[ref] = value
|
||||
return value
|
||||
|
||||
def linearize_page_tree(self, node=None):
|
||||
if node is None:
|
||||
|
|
Loading…
Reference in New Issue
Block a user