Merge pull request #8663 from radarhere/bigtiff

This commit is contained in:
Hugo van Kemenade 2025-01-08 19:15:31 +02:00 committed by GitHub
commit af3b904233
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 108 additions and 45 deletions

View File

@ -117,10 +117,16 @@ class TestFileTiff:
def test_bigtiff_save(self, tmp_path: Path) -> None:
outfile = str(tmp_path / "temp.tif")
hopper().save(outfile, big_tiff=True)
im = hopper()
im.save(outfile, big_tiff=True)
with Image.open(outfile) as im:
assert im.tag_v2._bigtiff is True
with Image.open(outfile) as reloaded:
assert reloaded.tag_v2._bigtiff is True
im.save(outfile, save_all=True, append_images=[im], big_tiff=True)
with Image.open(outfile) as reloaded:
assert reloaded.tag_v2._bigtiff is True
def test_seek_too_large(self) -> None:
with pytest.raises(ValueError, match="Unable to seek to frame"):
@ -740,7 +746,7 @@ class TestFileTiff:
assert reread.n_frames == 3
def test_fixoffsets(self) -> None:
b = BytesIO(b"II\x2a\x00\x00\x00\x00\x00")
b = BytesIO(b"II\x2A\x00\x00\x00\x00\x00")
with TiffImagePlugin.AppendingTiffWriter(b) as a:
b.seek(0)
a.fixOffsets(1, isShort=True)
@ -753,6 +759,37 @@ class TestFileTiff:
with pytest.raises(RuntimeError):
a.fixOffsets(1)
b = BytesIO(b"II\x2A\x00\x00\x00\x00\x00")
with TiffImagePlugin.AppendingTiffWriter(b) as a:
a.offsetOfNewPage = 2**16
b.seek(0)
a.fixOffsets(1, isShort=True)
b = BytesIO(b"II\x2B\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00")
with TiffImagePlugin.AppendingTiffWriter(b) as a:
a.offsetOfNewPage = 2**32
b.seek(0)
a.fixOffsets(1, isShort=True)
b.seek(0)
a.fixOffsets(1, isLong=True)
def test_appending_tiff_writer_writelong(self) -> None:
data = b"II\x2A\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
b = BytesIO(data)
with TiffImagePlugin.AppendingTiffWriter(b) as a:
a.writeLong(2**32 - 1)
assert b.getvalue() == data + b"\xff\xff\xff\xff"
def test_appending_tiff_writer_rewritelastshorttolong(self) -> None:
data = b"II\x2A\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
b = BytesIO(data)
with TiffImagePlugin.AppendingTiffWriter(b) as a:
a.rewriteLastShortToLong(2**32 - 1)
assert b.getvalue() == data[:-2] + b"\xff\xff\xff\xff"
def test_saving_icc_profile(self, tmp_path: Path) -> None:
# Tests saving TIFF with icc_profile set.
# At the time of writing this will only work for non-compressed tiffs

View File

@ -962,13 +962,16 @@ class ImageFileDirectory_v2(_IFDv2Base):
result = self._pack("Q" if self._bigtiff else "H", len(self._tags_v2))
entries: list[tuple[int, int, int, bytes, bytes]] = []
offset += len(result) + len(self._tags_v2) * (20 if self._bigtiff else 12) + 4
fmt = "Q" if self._bigtiff else "L"
fmt_size = 8 if self._bigtiff else 4
offset += (
len(result) + len(self._tags_v2) * (20 if self._bigtiff else 12) + fmt_size
)
stripoffsets = None
# pass 1: convert tags to binary format
# always write tags in ascending order
fmt = "Q" if self._bigtiff else "L"
fmt_size = 8 if self._bigtiff else 4
for tag, value in sorted(self._tags_v2.items()):
if tag == STRIPOFFSETS:
stripoffsets = len(entries)
@ -1024,7 +1027,7 @@ class ImageFileDirectory_v2(_IFDv2Base):
)
# -- overwrite here for multi-page --
result += b"\0\0\0\0" # end of entries
result += self._pack(fmt, 0) # end of entries
# pass 3: write auxiliary data to file
for tag, typ, count, value, data in entries:
@ -2044,20 +2047,21 @@ class AppendingTiffWriter(io.BytesIO):
self.offsetOfNewPage = 0
self.IIMM = iimm = self.f.read(4)
self._bigtiff = b"\x2B" in iimm
if not iimm:
# empty file - first page
self.isFirst = True
return
self.isFirst = False
if iimm == b"II\x2a\x00":
self.setEndian("<")
elif iimm == b"MM\x00\x2a":
self.setEndian(">")
else:
if iimm not in PREFIXES:
msg = "Invalid TIFF file header"
raise RuntimeError(msg)
self.setEndian("<" if iimm.startswith(II) else ">")
if self._bigtiff:
self.f.seek(4, os.SEEK_CUR)
self.skipIFDs()
self.goToEnd()
@ -2077,11 +2081,13 @@ class AppendingTiffWriter(io.BytesIO):
msg = "IIMM of new page doesn't match IIMM of first page"
raise RuntimeError(msg)
ifd_offset = self.readLong()
if self._bigtiff:
self.f.seek(4, os.SEEK_CUR)
ifd_offset = self._read(8 if self._bigtiff else 4)
ifd_offset += self.offsetOfNewPage
assert self.whereToWriteNewIFDOffset is not None
self.f.seek(self.whereToWriteNewIFDOffset)
self.writeLong(ifd_offset)
self._write(ifd_offset, 8 if self._bigtiff else 4)
self.f.seek(ifd_offset)
self.fixIFD()
@ -2127,18 +2133,20 @@ class AppendingTiffWriter(io.BytesIO):
self.endian = endian
self.longFmt = f"{self.endian}L"
self.shortFmt = f"{self.endian}H"
self.tagFormat = f"{self.endian}HHL"
self.tagFormat = f"{self.endian}HH" + ("Q" if self._bigtiff else "L")
def skipIFDs(self) -> None:
while True:
ifd_offset = self.readLong()
ifd_offset = self._read(8 if self._bigtiff else 4)
if ifd_offset == 0:
self.whereToWriteNewIFDOffset = self.f.tell() - 4
self.whereToWriteNewIFDOffset = self.f.tell() - (
8 if self._bigtiff else 4
)
break
self.f.seek(ifd_offset)
num_tags = self.readShort()
self.f.seek(num_tags * 12, os.SEEK_CUR)
num_tags = self._read(8 if self._bigtiff else 2)
self.f.seek(num_tags * (20 if self._bigtiff else 12), os.SEEK_CUR)
def write(self, data: Buffer, /) -> int:
return self.f.write(data)
@ -2168,17 +2176,19 @@ class AppendingTiffWriter(io.BytesIO):
msg = f"wrote only {bytes_written} bytes but wanted {expected}"
raise RuntimeError(msg)
def rewriteLastShortToLong(self, value: int) -> None:
self.f.seek(-2, os.SEEK_CUR)
bytes_written = self.f.write(struct.pack(self.longFmt, value))
self._verify_bytes_written(bytes_written, 4)
def _rewriteLast(self, value: int, field_size: int) -> None:
def _rewriteLast(
self, value: int, field_size: int, new_field_size: int = 0
) -> None:
self.f.seek(-field_size, os.SEEK_CUR)
if not new_field_size:
new_field_size = field_size
bytes_written = self.f.write(
struct.pack(self.endian + self._fmt(field_size), value)
struct.pack(self.endian + self._fmt(new_field_size), value)
)
self._verify_bytes_written(bytes_written, field_size)
self._verify_bytes_written(bytes_written, new_field_size)
def rewriteLastShortToLong(self, value: int) -> None:
self._rewriteLast(value, 2, 4)
def rewriteLastShort(self, value: int) -> None:
return self._rewriteLast(value, 2)
@ -2186,13 +2196,17 @@ class AppendingTiffWriter(io.BytesIO):
def rewriteLastLong(self, value: int) -> None:
return self._rewriteLast(value, 4)
def _write(self, value: int, field_size: int) -> None:
bytes_written = self.f.write(
struct.pack(self.endian + self._fmt(field_size), value)
)
self._verify_bytes_written(bytes_written, field_size)
def writeShort(self, value: int) -> None:
bytes_written = self.f.write(struct.pack(self.shortFmt, value))
self._verify_bytes_written(bytes_written, 2)
self._write(value, 2)
def writeLong(self, value: int) -> None:
bytes_written = self.f.write(struct.pack(self.longFmt, value))
self._verify_bytes_written(bytes_written, 4)
self._write(value, 4)
def close(self) -> None:
self.finalize()
@ -2200,24 +2214,27 @@ class AppendingTiffWriter(io.BytesIO):
self.f.close()
def fixIFD(self) -> None:
num_tags = self.readShort()
num_tags = self._read(8 if self._bigtiff else 2)
for i in range(num_tags):
tag, field_type, count = struct.unpack(self.tagFormat, self.f.read(8))
tag, field_type, count = struct.unpack(
self.tagFormat, self.f.read(12 if self._bigtiff else 8)
)
field_size = self.fieldSizes[field_type]
total_size = field_size * count
is_local = total_size <= 4
fmt_size = 8 if self._bigtiff else 4
is_local = total_size <= fmt_size
if not is_local:
offset = self.readLong() + self.offsetOfNewPage
self.rewriteLastLong(offset)
offset = self._read(fmt_size) + self.offsetOfNewPage
self._rewriteLast(offset, fmt_size)
if tag in self.Tags:
cur_pos = self.f.tell()
if is_local:
self._fixOffsets(count, field_size)
self.f.seek(cur_pos + 4)
self.f.seek(cur_pos + fmt_size)
else:
self.f.seek(offset)
self._fixOffsets(count, field_size)
@ -2225,24 +2242,33 @@ class AppendingTiffWriter(io.BytesIO):
elif is_local:
# skip the locally stored value that is not an offset
self.f.seek(4, os.SEEK_CUR)
self.f.seek(fmt_size, os.SEEK_CUR)
def _fixOffsets(self, count: int, field_size: int) -> None:
for i in range(count):
offset = self._read(field_size)
offset += self.offsetOfNewPage
if field_size == 2 and offset >= 65536:
# offset is now too large - we must convert shorts to longs
new_field_size = 0
if self._bigtiff and field_size in (2, 4) and offset >= 2**32:
# offset is now too large - we must convert long to long8
new_field_size = 8
elif field_size == 2 and offset >= 2**16:
# offset is now too large - we must convert short to long
new_field_size = 4
if new_field_size:
if count != 1:
msg = "not implemented"
raise RuntimeError(msg) # XXX TODO
# simple case - the offset is just one and therefore it is
# local (not referenced with another offset)
self.rewriteLastShortToLong(offset)
self.f.seek(-10, os.SEEK_CUR)
self.writeShort(TiffTags.LONG) # rewrite the type to LONG
self.f.seek(8, os.SEEK_CUR)
self._rewriteLast(offset, field_size, new_field_size)
# Move back past the new offset, past 'count', and before 'field_type'
rewind = -new_field_size - 4 - 2
self.f.seek(rewind, os.SEEK_CUR)
self.writeShort(new_field_size) # rewrite the type
self.f.seek(2 - rewind, os.SEEK_CUR)
else:
self._rewriteLast(offset, field_size)