diff --git a/Tests/images/duplicate_xref_entry.pdf b/Tests/images/duplicate_xref_entry.pdf new file mode 100644 index 000000000..f57a57d61 Binary files /dev/null and b/Tests/images/duplicate_xref_entry.pdf differ diff --git a/Tests/test_pdfparser.py b/Tests/test_pdfparser.py index 43e244c7b..105a838d9 100644 --- a/Tests/test_pdfparser.py +++ b/Tests/test_pdfparser.py @@ -117,3 +117,9 @@ def test_pdf_repr(): assert pdf_repr(b"a)/b\\(c") == rb"(a\)/b\\\(c)" assert pdf_repr([123, True, {"a": PdfName(b"b")}]) == b"[ 123 true <<\n/a /b\n>> ]" assert pdf_repr(PdfBinary(b"\x90\x1F\xA0")) == b"<901FA0>" + + +def test_duplicate_xref_entry(): + pdf = PdfParser("Tests/images/duplicate_xref_entry.pdf") + assert pdf.xref_table.existing_entries[6][0] == 1197 + pdf.close() diff --git a/src/PIL/PdfParser.py b/src/PIL/PdfParser.py index 1b3cb52a2..dc1012f54 100644 --- a/src/PIL/PdfParser.py +++ b/src/PIL/PdfParser.py @@ -957,14 +957,11 @@ class PdfParser: check_format_condition(m, "xref entry not found") offset = m.end() is_free = m.group(3) == b"f" - generation = int(m.group(2)) if not is_free: + generation = int(m.group(2)) new_entry = (int(m.group(1)), generation) - check_format_condition( - i not in self.xref_table or self.xref_table[i] == new_entry, - "xref entry duplicated (and not identical)", - ) - self.xref_table[i] = new_entry + if i not in self.xref_table: + self.xref_table[i] = new_entry return offset def read_indirect(self, ref, max_nesting=-1):