mirror of
				https://github.com/python-pillow/Pillow.git
				synced 2025-11-04 01:47:47 +03:00 
			
		
		
		
	issue #2959: fix wrong Parent of pre-existing Page objects when appending
This commit is contained in:
		
							parent
							
								
									113d67214c
								
							
						
					
					
						commit
						24ecfe315a
					
				| 
						 | 
					@ -147,6 +147,23 @@ class TestFilePdf(PillowTestCase):
 | 
				
			||||||
        finally:
 | 
					        finally:
 | 
				
			||||||
            os.rmdir(temp_dir)
 | 
					            os.rmdir(temp_dir)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def check_pdf_pages_consistency(self, pdf):
 | 
				
			||||||
 | 
					        pages_info = pdf.read_indirect(pdf.pages_ref)
 | 
				
			||||||
 | 
					        self.assertNotIn(b"Parent", pages_info)
 | 
				
			||||||
 | 
					        self.assertIn(b"Kids", pages_info)
 | 
				
			||||||
 | 
					        kids_not_used = pages_info[b"Kids"]
 | 
				
			||||||
 | 
					        for page_ref in pdf.pages:
 | 
				
			||||||
 | 
					            while True:
 | 
				
			||||||
 | 
					                if page_ref in kids_not_used:
 | 
				
			||||||
 | 
					                    kids_not_used.remove(page_ref)
 | 
				
			||||||
 | 
					                page_info = pdf.read_indirect(page_ref)
 | 
				
			||||||
 | 
					                self.assertIn(b"Parent", page_info)
 | 
				
			||||||
 | 
					                page_ref = page_info[b"Parent"]
 | 
				
			||||||
 | 
					                if page_ref == pdf.pages_ref:
 | 
				
			||||||
 | 
					                    break
 | 
				
			||||||
 | 
					            self.assertEqual(pdf.pages_ref, page_info[b"Parent"])
 | 
				
			||||||
 | 
					        self.assertEqual(kids_not_used, [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_pdf_append(self):
 | 
					    def test_pdf_append(self):
 | 
				
			||||||
        # make a PDF file
 | 
					        # make a PDF file
 | 
				
			||||||
        pdf_filename = self.helper_save_as_pdf("RGB", producer="PdfParser")
 | 
					        pdf_filename = self.helper_save_as_pdf("RGB", producer="PdfParser")
 | 
				
			||||||
| 
						 | 
					@ -156,6 +173,7 @@ class TestFilePdf(PillowTestCase):
 | 
				
			||||||
            self.assertEqual(len(pdf.pages), 1)
 | 
					            self.assertEqual(len(pdf.pages), 1)
 | 
				
			||||||
            self.assertEqual(len(pdf.info), 1)
 | 
					            self.assertEqual(len(pdf.info), 1)
 | 
				
			||||||
            self.assertEqual(pdf.info.Producer, "PdfParser")
 | 
					            self.assertEqual(pdf.info.Producer, "PdfParser")
 | 
				
			||||||
 | 
					            self.check_pdf_pages_consistency(pdf)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # append some info
 | 
					            # append some info
 | 
				
			||||||
            pdf.info.Title = "abc"
 | 
					            pdf.info.Title = "abc"
 | 
				
			||||||
| 
						 | 
					@ -171,6 +189,7 @@ class TestFilePdf(PillowTestCase):
 | 
				
			||||||
            self.assertEqual(len(pdf.pages), 1)
 | 
					            self.assertEqual(len(pdf.pages), 1)
 | 
				
			||||||
            self.assertEqual(len(pdf.info), 6)
 | 
					            self.assertEqual(len(pdf.info), 6)
 | 
				
			||||||
            self.assertEqual(pdf.info.Title, "abc")
 | 
					            self.assertEqual(pdf.info.Title, "abc")
 | 
				
			||||||
 | 
					            self.check_pdf_pages_consistency(pdf)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # append two images
 | 
					        # append two images
 | 
				
			||||||
        mode_CMYK = hopper("CMYK")
 | 
					        mode_CMYK = hopper("CMYK")
 | 
				
			||||||
| 
						 | 
					@ -186,6 +205,7 @@ class TestFilePdf(PillowTestCase):
 | 
				
			||||||
            self.assertEqual(pdf.info.Producer, "PdfParser")
 | 
					            self.assertEqual(pdf.info.Producer, "PdfParser")
 | 
				
			||||||
            self.assertEqual(pdf.info.Keywords, "qw)e\\r(ty")
 | 
					            self.assertEqual(pdf.info.Keywords, "qw)e\\r(ty")
 | 
				
			||||||
            self.assertEqual(pdf.info.Subject, u"ghi\uABCD")
 | 
					            self.assertEqual(pdf.info.Subject, u"ghi\uABCD")
 | 
				
			||||||
 | 
					            self.check_pdf_pages_consistency(pdf)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_pdf_info(self):
 | 
					    def test_pdf_info(self):
 | 
				
			||||||
        # make a PDF file
 | 
					        # make a PDF file
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -101,6 +101,9 @@ class IndirectReference(collections.namedtuple("IndirectReferenceTuple", ["objec
 | 
				
			||||||
    def __ne__(self, other):
 | 
					    def __ne__(self, other):
 | 
				
			||||||
        return not (self == other)
 | 
					        return not (self == other)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __hash__(self):
 | 
				
			||||||
 | 
					        return hash((self.object_id, self.generation))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class IndirectObjectDef(IndirectReference):
 | 
					class IndirectObjectDef(IndirectReference):
 | 
				
			||||||
    def __str__(self):
 | 
					    def __str__(self):
 | 
				
			||||||
| 
						 | 
					@ -192,6 +195,9 @@ class PdfName:
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            self.name = name.encode("us-ascii")
 | 
					            self.name = name.encode("us-ascii")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def name_as_str(self):
 | 
				
			||||||
 | 
					        return self.name.decode("us-ascii")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __eq__(self, other):
 | 
					    def __eq__(self, other):
 | 
				
			||||||
        return (isinstance(other, PdfName) and other.name == self.name) or other == self.name
 | 
					        return (isinstance(other, PdfName) and other.name == self.name) or other == self.name
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -358,6 +364,7 @@ class PdfParser:
 | 
				
			||||||
            self.should_close_buf = True
 | 
					            self.should_close_buf = True
 | 
				
			||||||
            if not filename and hasattr(f, "name"):
 | 
					            if not filename and hasattr(f, "name"):
 | 
				
			||||||
                self.filename = f.name
 | 
					                self.filename = f.name
 | 
				
			||||||
 | 
					        self.cached_objects = {}
 | 
				
			||||||
        if buf:
 | 
					        if buf:
 | 
				
			||||||
            self.read_pdf_info()
 | 
					            self.read_pdf_info()
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
| 
						 | 
					@ -368,6 +375,7 @@ class PdfParser:
 | 
				
			||||||
            self.info_ref = None
 | 
					            self.info_ref = None
 | 
				
			||||||
            self.page_tree_root = {}
 | 
					            self.page_tree_root = {}
 | 
				
			||||||
            self.pages = []
 | 
					            self.pages = []
 | 
				
			||||||
 | 
					            self.orig_pages = []
 | 
				
			||||||
            self.pages_ref = None
 | 
					            self.pages_ref = None
 | 
				
			||||||
            self.last_xref_section_offset = None
 | 
					            self.last_xref_section_offset = None
 | 
				
			||||||
            self.trailer_dict = {}
 | 
					            self.trailer_dict = {}
 | 
				
			||||||
| 
						 | 
					@ -414,15 +422,45 @@ class PdfParser:
 | 
				
			||||||
        self.del_root()
 | 
					        self.del_root()
 | 
				
			||||||
        self.root_ref = self.next_object_id(self.f.tell())
 | 
					        self.root_ref = self.next_object_id(self.f.tell())
 | 
				
			||||||
        self.pages_ref = self.next_object_id(0)
 | 
					        self.pages_ref = self.next_object_id(0)
 | 
				
			||||||
 | 
					        self.rewrite_pages()
 | 
				
			||||||
        self.write_obj(self.root_ref,
 | 
					        self.write_obj(self.root_ref,
 | 
				
			||||||
            Type=PdfName(b"Catalog"),
 | 
					            Type=PdfName(b"Catalog"),
 | 
				
			||||||
            Pages=self.pages_ref)
 | 
					            Pages=self.pages_ref)
 | 
				
			||||||
        self.write_obj(self.pages_ref,
 | 
					        self.write_obj(self.pages_ref,
 | 
				
			||||||
            Type=PdfName("Pages"),
 | 
					            Type=PdfName(b"Pages"),
 | 
				
			||||||
            Count=len(self.pages),
 | 
					            Count=len(self.pages),
 | 
				
			||||||
            Kids=self.pages)
 | 
					            Kids=self.pages)
 | 
				
			||||||
        return self.root_ref
 | 
					        return self.root_ref
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def rewrite_pages(self):
 | 
				
			||||||
 | 
					        pages_tree_nodes_to_delete = []
 | 
				
			||||||
 | 
					        for i, page_ref in enumerate(self.orig_pages):
 | 
				
			||||||
 | 
					            page_info = self.cached_objects[page_ref]
 | 
				
			||||||
 | 
					            del self.xref_table[page_ref.object_id]
 | 
				
			||||||
 | 
					            pages_tree_nodes_to_delete.append(page_info[PdfName(b"Parent")])
 | 
				
			||||||
 | 
					            if page_ref not in self.pages:
 | 
				
			||||||
 | 
					                # the page has been deleted
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					            # make dict keys into strings for passing to write_page
 | 
				
			||||||
 | 
					            stringified_page_info = {}
 | 
				
			||||||
 | 
					            for key, value in page_info.items():
 | 
				
			||||||
 | 
					                # key should be a PdfName
 | 
				
			||||||
 | 
					                stringified_page_info[key.name_as_str()] = value
 | 
				
			||||||
 | 
					            stringified_page_info["Parent"] = self.pages_ref
 | 
				
			||||||
 | 
					            new_page_ref = self.write_page(None, **stringified_page_info)
 | 
				
			||||||
 | 
					            for j, cur_page_ref in enumerate(self.pages):
 | 
				
			||||||
 | 
					                if cur_page_ref == page_ref:
 | 
				
			||||||
 | 
					                    # replace the page reference with the new one
 | 
				
			||||||
 | 
					                    self.pages[j] = new_page_ref
 | 
				
			||||||
 | 
					        # delete redundant Pages tree nodes from xref table
 | 
				
			||||||
 | 
					        for pages_tree_node_ref in pages_tree_nodes_to_delete:
 | 
				
			||||||
 | 
					            while pages_tree_node_ref:
 | 
				
			||||||
 | 
					                pages_tree_node = self.cached_objects[pages_tree_node_ref]
 | 
				
			||||||
 | 
					                if pages_tree_node_ref.object_id in self.xref_table:
 | 
				
			||||||
 | 
					                    del self.xref_table[pages_tree_node_ref.object_id]
 | 
				
			||||||
 | 
					                pages_tree_node_ref = pages_tree_node.get(b"Parent", None)
 | 
				
			||||||
 | 
					        self.orig_pages = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def write_xref_and_trailer(self, new_root_ref=None):
 | 
					    def write_xref_and_trailer(self, new_root_ref=None):
 | 
				
			||||||
        if new_root_ref:
 | 
					        if new_root_ref:
 | 
				
			||||||
            self.del_root()
 | 
					            self.del_root()
 | 
				
			||||||
| 
						 | 
					@ -443,7 +481,7 @@ class PdfParser:
 | 
				
			||||||
        if isinstance(ref, int):
 | 
					        if isinstance(ref, int):
 | 
				
			||||||
            ref = self.pages[ref]
 | 
					            ref = self.pages[ref]
 | 
				
			||||||
        if "Type" not in dict_obj:
 | 
					        if "Type" not in dict_obj:
 | 
				
			||||||
            dict_obj["Type"] = PdfName("Page")
 | 
					            dict_obj["Type"] = PdfName(b"Page")
 | 
				
			||||||
        if "Parent" not in dict_obj:
 | 
					        if "Parent" not in dict_obj:
 | 
				
			||||||
            dict_obj["Parent"] = self.pages_ref
 | 
					            dict_obj["Parent"] = self.pages_ref
 | 
				
			||||||
        return self.write_obj(ref, *objs, **dict_obj)
 | 
					        return self.write_obj(ref, *objs, **dict_obj)
 | 
				
			||||||
| 
						 | 
					@ -474,7 +512,6 @@ class PdfParser:
 | 
				
			||||||
            return
 | 
					            return
 | 
				
			||||||
        del self.xref_table[self.root_ref.object_id]
 | 
					        del self.xref_table[self.root_ref.object_id]
 | 
				
			||||||
        del self.xref_table[self.root[b"Pages"].object_id]
 | 
					        del self.xref_table[self.root[b"Pages"].object_id]
 | 
				
			||||||
        # XXX TODO delete Pages tree recursively
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @staticmethod
 | 
					    @staticmethod
 | 
				
			||||||
    def get_buf_from_file(f):
 | 
					    def get_buf_from_file(f):
 | 
				
			||||||
| 
						 | 
					@ -506,6 +543,8 @@ class PdfParser:
 | 
				
			||||||
        self.pages_ref = self.root[b"Pages"]
 | 
					        self.pages_ref = self.root[b"Pages"]
 | 
				
			||||||
        self.page_tree_root = self.read_indirect(self.pages_ref)
 | 
					        self.page_tree_root = self.read_indirect(self.pages_ref)
 | 
				
			||||||
        self.pages = self.linearize_page_tree(self.page_tree_root)
 | 
					        self.pages = self.linearize_page_tree(self.page_tree_root)
 | 
				
			||||||
 | 
					        # save the original list of page references in case the user modifies, adds or deletes some pages and we need to rewrite the pages and their list
 | 
				
			||||||
 | 
					        self.orig_pages = self.pages[:]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def next_object_id(self, offset=None):
 | 
					    def next_object_id(self, offset=None):
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
| 
						 | 
					@ -789,7 +828,9 @@ class PdfParser:
 | 
				
			||||||
        offset, generation = self.xref_table[ref[0]]
 | 
					        offset, generation = self.xref_table[ref[0]]
 | 
				
			||||||
        check_format_condition(generation == ref[1], "expected to find generation %s for object ID %s in xref table, instead found generation %s at offset %s" \
 | 
					        check_format_condition(generation == ref[1], "expected to find generation %s for object ID %s in xref table, instead found generation %s at offset %s" \
 | 
				
			||||||
            % (ref[1], ref[0], generation, offset))
 | 
					            % (ref[1], ref[0], generation, offset))
 | 
				
			||||||
        return self.get_value(self.buf, offset + self.start_offset, expect_indirect=IndirectReference(*ref), max_nesting=max_nesting)[0]
 | 
					        value = self.get_value(self.buf, offset + self.start_offset, expect_indirect=IndirectReference(*ref), max_nesting=max_nesting)[0]
 | 
				
			||||||
 | 
					        self.cached_objects[ref] = value
 | 
				
			||||||
 | 
					        return value
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def linearize_page_tree(self, node=None):
 | 
					    def linearize_page_tree(self, node=None):
 | 
				
			||||||
        if node is None:
 | 
					        if node is None:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user