Use later value for duplicate xref entries

This commit is contained in:
Andrew Murray 2023-04-20 23:15:20 +10:00
parent 625b9fcf02
commit d2256338b8
3 changed files with 9 additions and 6 deletions

Binary file not shown.

View File

@ -117,3 +117,9 @@ def test_pdf_repr():
assert pdf_repr(b"a)/b\\(c") == rb"(a\)/b\\\(c)" assert pdf_repr(b"a)/b\\(c") == rb"(a\)/b\\\(c)"
assert pdf_repr([123, True, {"a": PdfName(b"b")}]) == b"[ 123 true <<\n/a /b\n>> ]" assert pdf_repr([123, True, {"a": PdfName(b"b")}]) == b"[ 123 true <<\n/a /b\n>> ]"
assert pdf_repr(PdfBinary(b"\x90\x1F\xA0")) == b"<901FA0>" assert pdf_repr(PdfBinary(b"\x90\x1F\xA0")) == b"<901FA0>"
def test_duplicate_xref_entry():
pdf = PdfParser("Tests/images/duplicate_xref_entry.pdf")
assert pdf.xref_table.existing_entries[6][0] == 1197
pdf.close()

View File

@ -957,13 +957,10 @@ class PdfParser:
check_format_condition(m, "xref entry not found") check_format_condition(m, "xref entry not found")
offset = m.end() offset = m.end()
is_free = m.group(3) == b"f" is_free = m.group(3) == b"f"
generation = int(m.group(2))
if not is_free: if not is_free:
generation = int(m.group(2))
new_entry = (int(m.group(1)), generation) new_entry = (int(m.group(1)), generation)
check_format_condition( if i not in self.xref_table:
i not in self.xref_table or self.xref_table[i] == new_entry,
"xref entry duplicated (and not identical)",
)
self.xref_table[i] = new_entry self.xref_table[i] = new_entry
return offset return offset