2018-01-25 02:44:59 +03:00
|
|
|
import io
|
2018-01-24 04:28:39 +03:00
|
|
|
import os
|
2014-06-10 13:10:47 +04:00
|
|
|
import os.path
|
2018-01-24 04:28:39 +03:00
|
|
|
import tempfile
|
2018-07-29 15:33:59 +03:00
|
|
|
import time
|
2014-05-11 10:01:09 +04:00
|
|
|
|
2019-07-06 23:40:53 +03:00
|
|
|
from PIL import Image, PdfParser
|
|
|
|
|
|
|
|
from .helper import PillowTestCase, hopper
|
|
|
|
|
2014-05-11 10:01:09 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
class TestFilePdf(PillowTestCase):
|
2018-01-25 02:20:18 +03:00
|
|
|
def helper_save_as_pdf(self, mode, **kwargs):
|
2014-06-10 13:10:47 +04:00
|
|
|
# Arrange
|
2014-09-05 13:36:24 +04:00
|
|
|
im = hopper(mode)
|
2014-06-10 13:10:47 +04:00
|
|
|
outfile = self.tempfile("temp_" + mode + ".pdf")
|
2014-05-12 15:30:03 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
# Act
|
2018-01-25 02:20:18 +03:00
|
|
|
im.save(outfile, **kwargs)
|
2014-05-11 18:35:49 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
# Assert
|
|
|
|
self.assertTrue(os.path.isfile(outfile))
|
|
|
|
self.assertGreater(os.path.getsize(outfile), 0)
|
2018-01-31 02:35:55 +03:00
|
|
|
with PdfParser.PdfParser(outfile) as pdf:
|
2019-06-13 18:54:11 +03:00
|
|
|
if kwargs.get("append_images", False) or kwargs.get("append", False):
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertGreater(len(pdf.pages), 1)
|
|
|
|
else:
|
|
|
|
self.assertGreater(len(pdf.pages), 0)
|
2019-06-13 18:54:11 +03:00
|
|
|
with open(outfile, "rb") as fp:
|
2019-04-30 08:31:47 +03:00
|
|
|
contents = fp.read()
|
2019-06-13 18:54:11 +03:00
|
|
|
size = tuple(
|
|
|
|
int(d)
|
|
|
|
for d in contents.split(b"/MediaBox [ 0 0 ")[1].split(b"]")[0].split()
|
|
|
|
)
|
2019-04-30 08:31:47 +03:00
|
|
|
self.assertEqual(im.size, size)
|
2014-05-11 18:35:49 +04:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
return outfile
|
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
def test_monochrome(self):
|
|
|
|
# Arrange
|
|
|
|
mode = "1"
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
# Act / Assert
|
|
|
|
self.helper_save_as_pdf(mode)
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
def test_greyscale(self):
|
|
|
|
# Arrange
|
|
|
|
mode = "L"
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
# Act / Assert
|
|
|
|
self.helper_save_as_pdf(mode)
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
def test_rgb(self):
|
|
|
|
# Arrange
|
|
|
|
mode = "RGB"
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
# Act / Assert
|
|
|
|
self.helper_save_as_pdf(mode)
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
def test_p_mode(self):
|
|
|
|
# Arrange
|
|
|
|
mode = "P"
|
2014-05-12 15:45:54 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
# Act / Assert
|
|
|
|
self.helper_save_as_pdf(mode)
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
def test_cmyk_mode(self):
|
|
|
|
# Arrange
|
|
|
|
mode = "CMYK"
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
# Act / Assert
|
|
|
|
self.helper_save_as_pdf(mode)
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2015-07-03 08:03:25 +03:00
|
|
|
def test_unsupported_mode(self):
|
|
|
|
im = hopper("LA")
|
|
|
|
outfile = self.tempfile("temp_LA.pdf")
|
|
|
|
|
2017-09-01 14:05:40 +03:00
|
|
|
self.assertRaises(ValueError, im.save, outfile)
|
2015-07-03 08:03:25 +03:00
|
|
|
|
2015-09-29 15:51:52 +03:00
|
|
|
def test_save_all(self):
|
|
|
|
# Single frame image
|
|
|
|
self.helper_save_as_pdf("RGB", save_all=True)
|
|
|
|
|
|
|
|
# Multiframe image
|
Improve handling of file resources
Follow Python's file object semantics. User code is responsible for
closing resources (usually through a context manager) in a deterministic
way.
To achieve this, remove __del__ functions. These functions used to
closed open file handlers in an attempt to silence Python
ResourceWarnings. However, using __del__ has the following drawbacks:
- __del__ isn't called until the object's reference count reaches 0.
Therefore, resource handlers remain open or in use longer than
necessary.
- The __del__ method isn't guaranteed to execute on system exit. See the
Python documentation:
https://docs.python.org/3/reference/datamodel.html#object.__del__
> It is not guaranteed that __del__() methods are called for objects
> that still exist when the interpreter exits.
- Exceptions that occur inside __del__ are ignored instead of raised.
This has the potential of hiding bugs. This is also in the Python
documentation:
> Warning: Due to the precarious circumstances under which __del__()
> methods are invoked, exceptions that occur during their execution
> are ignored, and a warning is printed to sys.stderr instead.
Instead, always close resource handlers when they are no longer in use.
This will close the file handler at a specified point in the user's code
and not wait until the interpreter chooses to. It is always guaranteed
to run. And, if an exception occurs while closing the file handler, the
bug will not be ignored.
Now, when code receives a ResourceWarning, it will highlight an area
that is mishandling resources. It should not simply be silenced, but
fixed by closing resources with a context manager.
All warnings that were emitted during tests have been cleaned up. To
enable warnings, I passed the `-Wa` CLI option to Python. This exposed
some mishandling of resources in ImageFile.__init__() and
SpiderImagePlugin.loadImageSeries(), they too were fixed.
2019-05-25 19:30:58 +03:00
|
|
|
with Image.open("Tests/images/dispose_bgnd.gif") as im:
|
2015-09-29 15:51:52 +03:00
|
|
|
|
Improve handling of file resources
Follow Python's file object semantics. User code is responsible for
closing resources (usually through a context manager) in a deterministic
way.
To achieve this, remove __del__ functions. These functions used to
closed open file handlers in an attempt to silence Python
ResourceWarnings. However, using __del__ has the following drawbacks:
- __del__ isn't called until the object's reference count reaches 0.
Therefore, resource handlers remain open or in use longer than
necessary.
- The __del__ method isn't guaranteed to execute on system exit. See the
Python documentation:
https://docs.python.org/3/reference/datamodel.html#object.__del__
> It is not guaranteed that __del__() methods are called for objects
> that still exist when the interpreter exits.
- Exceptions that occur inside __del__ are ignored instead of raised.
This has the potential of hiding bugs. This is also in the Python
documentation:
> Warning: Due to the precarious circumstances under which __del__()
> methods are invoked, exceptions that occur during their execution
> are ignored, and a warning is printed to sys.stderr instead.
Instead, always close resource handlers when they are no longer in use.
This will close the file handler at a specified point in the user's code
and not wait until the interpreter chooses to. It is always guaranteed
to run. And, if an exception occurs while closing the file handler, the
bug will not be ignored.
Now, when code receives a ResourceWarning, it will highlight an area
that is mishandling resources. It should not simply be silenced, but
fixed by closing resources with a context manager.
All warnings that were emitted during tests have been cleaned up. To
enable warnings, I passed the `-Wa` CLI option to Python. This exposed
some mishandling of resources in ImageFile.__init__() and
SpiderImagePlugin.loadImageSeries(), they too were fixed.
2019-05-25 19:30:58 +03:00
|
|
|
outfile = self.tempfile("temp.pdf")
|
|
|
|
im.save(outfile, save_all=True)
|
2015-09-29 15:51:52 +03:00
|
|
|
|
Improve handling of file resources
Follow Python's file object semantics. User code is responsible for
closing resources (usually through a context manager) in a deterministic
way.
To achieve this, remove __del__ functions. These functions used to
closed open file handlers in an attempt to silence Python
ResourceWarnings. However, using __del__ has the following drawbacks:
- __del__ isn't called until the object's reference count reaches 0.
Therefore, resource handlers remain open or in use longer than
necessary.
- The __del__ method isn't guaranteed to execute on system exit. See the
Python documentation:
https://docs.python.org/3/reference/datamodel.html#object.__del__
> It is not guaranteed that __del__() methods are called for objects
> that still exist when the interpreter exits.
- Exceptions that occur inside __del__ are ignored instead of raised.
This has the potential of hiding bugs. This is also in the Python
documentation:
> Warning: Due to the precarious circumstances under which __del__()
> methods are invoked, exceptions that occur during their execution
> are ignored, and a warning is printed to sys.stderr instead.
Instead, always close resource handlers when they are no longer in use.
This will close the file handler at a specified point in the user's code
and not wait until the interpreter chooses to. It is always guaranteed
to run. And, if an exception occurs while closing the file handler, the
bug will not be ignored.
Now, when code receives a ResourceWarning, it will highlight an area
that is mishandling resources. It should not simply be silenced, but
fixed by closing resources with a context manager.
All warnings that were emitted during tests have been cleaned up. To
enable warnings, I passed the `-Wa` CLI option to Python. This exposed
some mishandling of resources in ImageFile.__init__() and
SpiderImagePlugin.loadImageSeries(), they too were fixed.
2019-05-25 19:30:58 +03:00
|
|
|
self.assertTrue(os.path.isfile(outfile))
|
|
|
|
self.assertGreater(os.path.getsize(outfile), 0)
|
2015-09-29 15:51:52 +03:00
|
|
|
|
Improve handling of file resources
Follow Python's file object semantics. User code is responsible for
closing resources (usually through a context manager) in a deterministic
way.
To achieve this, remove __del__ functions. These functions used to
closed open file handlers in an attempt to silence Python
ResourceWarnings. However, using __del__ has the following drawbacks:
- __del__ isn't called until the object's reference count reaches 0.
Therefore, resource handlers remain open or in use longer than
necessary.
- The __del__ method isn't guaranteed to execute on system exit. See the
Python documentation:
https://docs.python.org/3/reference/datamodel.html#object.__del__
> It is not guaranteed that __del__() methods are called for objects
> that still exist when the interpreter exits.
- Exceptions that occur inside __del__ are ignored instead of raised.
This has the potential of hiding bugs. This is also in the Python
documentation:
> Warning: Due to the precarious circumstances under which __del__()
> methods are invoked, exceptions that occur during their execution
> are ignored, and a warning is printed to sys.stderr instead.
Instead, always close resource handlers when they are no longer in use.
This will close the file handler at a specified point in the user's code
and not wait until the interpreter chooses to. It is always guaranteed
to run. And, if an exception occurs while closing the file handler, the
bug will not be ignored.
Now, when code receives a ResourceWarning, it will highlight an area
that is mishandling resources. It should not simply be silenced, but
fixed by closing resources with a context manager.
All warnings that were emitted during tests have been cleaned up. To
enable warnings, I passed the `-Wa` CLI option to Python. This exposed
some mishandling of resources in ImageFile.__init__() and
SpiderImagePlugin.loadImageSeries(), they too were fixed.
2019-05-25 19:30:58 +03:00
|
|
|
# Append images
|
|
|
|
ims = [hopper()]
|
|
|
|
im.copy().save(outfile, save_all=True, append_images=ims)
|
2017-11-04 02:46:15 +03:00
|
|
|
|
Improve handling of file resources
Follow Python's file object semantics. User code is responsible for
closing resources (usually through a context manager) in a deterministic
way.
To achieve this, remove __del__ functions. These functions used to
closed open file handlers in an attempt to silence Python
ResourceWarnings. However, using __del__ has the following drawbacks:
- __del__ isn't called until the object's reference count reaches 0.
Therefore, resource handlers remain open or in use longer than
necessary.
- The __del__ method isn't guaranteed to execute on system exit. See the
Python documentation:
https://docs.python.org/3/reference/datamodel.html#object.__del__
> It is not guaranteed that __del__() methods are called for objects
> that still exist when the interpreter exits.
- Exceptions that occur inside __del__ are ignored instead of raised.
This has the potential of hiding bugs. This is also in the Python
documentation:
> Warning: Due to the precarious circumstances under which __del__()
> methods are invoked, exceptions that occur during their execution
> are ignored, and a warning is printed to sys.stderr instead.
Instead, always close resource handlers when they are no longer in use.
This will close the file handler at a specified point in the user's code
and not wait until the interpreter chooses to. It is always guaranteed
to run. And, if an exception occurs while closing the file handler, the
bug will not be ignored.
Now, when code receives a ResourceWarning, it will highlight an area
that is mishandling resources. It should not simply be silenced, but
fixed by closing resources with a context manager.
All warnings that were emitted during tests have been cleaned up. To
enable warnings, I passed the `-Wa` CLI option to Python. This exposed
some mishandling of resources in ImageFile.__init__() and
SpiderImagePlugin.loadImageSeries(), they too were fixed.
2019-05-25 19:30:58 +03:00
|
|
|
self.assertTrue(os.path.isfile(outfile))
|
|
|
|
self.assertGreater(os.path.getsize(outfile), 0)
|
2017-11-04 02:46:15 +03:00
|
|
|
|
Improve handling of file resources
Follow Python's file object semantics. User code is responsible for
closing resources (usually through a context manager) in a deterministic
way.
To achieve this, remove __del__ functions. These functions used to
closed open file handlers in an attempt to silence Python
ResourceWarnings. However, using __del__ has the following drawbacks:
- __del__ isn't called until the object's reference count reaches 0.
Therefore, resource handlers remain open or in use longer than
necessary.
- The __del__ method isn't guaranteed to execute on system exit. See the
Python documentation:
https://docs.python.org/3/reference/datamodel.html#object.__del__
> It is not guaranteed that __del__() methods are called for objects
> that still exist when the interpreter exits.
- Exceptions that occur inside __del__ are ignored instead of raised.
This has the potential of hiding bugs. This is also in the Python
documentation:
> Warning: Due to the precarious circumstances under which __del__()
> methods are invoked, exceptions that occur during their execution
> are ignored, and a warning is printed to sys.stderr instead.
Instead, always close resource handlers when they are no longer in use.
This will close the file handler at a specified point in the user's code
and not wait until the interpreter chooses to. It is always guaranteed
to run. And, if an exception occurs while closing the file handler, the
bug will not be ignored.
Now, when code receives a ResourceWarning, it will highlight an area
that is mishandling resources. It should not simply be silenced, but
fixed by closing resources with a context manager.
All warnings that were emitted during tests have been cleaned up. To
enable warnings, I passed the `-Wa` CLI option to Python. This exposed
some mishandling of resources in ImageFile.__init__() and
SpiderImagePlugin.loadImageSeries(), they too were fixed.
2019-05-25 19:30:58 +03:00
|
|
|
# Test appending using a generator
|
|
|
|
def imGenerator(ims):
|
2019-11-01 14:22:56 +03:00
|
|
|
yield from ims
|
2019-06-13 18:54:11 +03:00
|
|
|
|
Improve handling of file resources
Follow Python's file object semantics. User code is responsible for
closing resources (usually through a context manager) in a deterministic
way.
To achieve this, remove __del__ functions. These functions used to
closed open file handlers in an attempt to silence Python
ResourceWarnings. However, using __del__ has the following drawbacks:
- __del__ isn't called until the object's reference count reaches 0.
Therefore, resource handlers remain open or in use longer than
necessary.
- The __del__ method isn't guaranteed to execute on system exit. See the
Python documentation:
https://docs.python.org/3/reference/datamodel.html#object.__del__
> It is not guaranteed that __del__() methods are called for objects
> that still exist when the interpreter exits.
- Exceptions that occur inside __del__ are ignored instead of raised.
This has the potential of hiding bugs. This is also in the Python
documentation:
> Warning: Due to the precarious circumstances under which __del__()
> methods are invoked, exceptions that occur during their execution
> are ignored, and a warning is printed to sys.stderr instead.
Instead, always close resource handlers when they are no longer in use.
This will close the file handler at a specified point in the user's code
and not wait until the interpreter chooses to. It is always guaranteed
to run. And, if an exception occurs while closing the file handler, the
bug will not be ignored.
Now, when code receives a ResourceWarning, it will highlight an area
that is mishandling resources. It should not simply be silenced, but
fixed by closing resources with a context manager.
All warnings that were emitted during tests have been cleaned up. To
enable warnings, I passed the `-Wa` CLI option to Python. This exposed
some mishandling of resources in ImageFile.__init__() and
SpiderImagePlugin.loadImageSeries(), they too were fixed.
2019-05-25 19:30:58 +03:00
|
|
|
im.save(outfile, save_all=True, append_images=imGenerator(ims))
|
2017-05-13 07:26:52 +03:00
|
|
|
|
|
|
|
self.assertTrue(os.path.isfile(outfile))
|
|
|
|
self.assertGreater(os.path.getsize(outfile), 0)
|
|
|
|
|
2017-10-19 14:30:34 +03:00
|
|
|
# Append JPEG images
|
2019-11-25 23:03:23 +03:00
|
|
|
with Image.open("Tests/images/flower.jpg") as jpeg:
|
|
|
|
jpeg.save(outfile, save_all=True, append_images=[jpeg.copy()])
|
2017-10-19 14:30:34 +03:00
|
|
|
|
|
|
|
self.assertTrue(os.path.isfile(outfile))
|
|
|
|
self.assertGreater(os.path.getsize(outfile), 0)
|
|
|
|
|
2018-05-18 15:15:45 +03:00
|
|
|
def test_multiframe_normal_save(self):
|
|
|
|
# Test saving a multiframe image without save_all
|
Improve handling of file resources
Follow Python's file object semantics. User code is responsible for
closing resources (usually through a context manager) in a deterministic
way.
To achieve this, remove __del__ functions. These functions used to
closed open file handlers in an attempt to silence Python
ResourceWarnings. However, using __del__ has the following drawbacks:
- __del__ isn't called until the object's reference count reaches 0.
Therefore, resource handlers remain open or in use longer than
necessary.
- The __del__ method isn't guaranteed to execute on system exit. See the
Python documentation:
https://docs.python.org/3/reference/datamodel.html#object.__del__
> It is not guaranteed that __del__() methods are called for objects
> that still exist when the interpreter exits.
- Exceptions that occur inside __del__ are ignored instead of raised.
This has the potential of hiding bugs. This is also in the Python
documentation:
> Warning: Due to the precarious circumstances under which __del__()
> methods are invoked, exceptions that occur during their execution
> are ignored, and a warning is printed to sys.stderr instead.
Instead, always close resource handlers when they are no longer in use.
This will close the file handler at a specified point in the user's code
and not wait until the interpreter chooses to. It is always guaranteed
to run. And, if an exception occurs while closing the file handler, the
bug will not be ignored.
Now, when code receives a ResourceWarning, it will highlight an area
that is mishandling resources. It should not simply be silenced, but
fixed by closing resources with a context manager.
All warnings that were emitted during tests have been cleaned up. To
enable warnings, I passed the `-Wa` CLI option to Python. This exposed
some mishandling of resources in ImageFile.__init__() and
SpiderImagePlugin.loadImageSeries(), they too were fixed.
2019-05-25 19:30:58 +03:00
|
|
|
with Image.open("Tests/images/dispose_bgnd.gif") as im:
|
2018-05-18 15:15:45 +03:00
|
|
|
|
Improve handling of file resources
Follow Python's file object semantics. User code is responsible for
closing resources (usually through a context manager) in a deterministic
way.
To achieve this, remove __del__ functions. These functions used to
closed open file handlers in an attempt to silence Python
ResourceWarnings. However, using __del__ has the following drawbacks:
- __del__ isn't called until the object's reference count reaches 0.
Therefore, resource handlers remain open or in use longer than
necessary.
- The __del__ method isn't guaranteed to execute on system exit. See the
Python documentation:
https://docs.python.org/3/reference/datamodel.html#object.__del__
> It is not guaranteed that __del__() methods are called for objects
> that still exist when the interpreter exits.
- Exceptions that occur inside __del__ are ignored instead of raised.
This has the potential of hiding bugs. This is also in the Python
documentation:
> Warning: Due to the precarious circumstances under which __del__()
> methods are invoked, exceptions that occur during their execution
> are ignored, and a warning is printed to sys.stderr instead.
Instead, always close resource handlers when they are no longer in use.
This will close the file handler at a specified point in the user's code
and not wait until the interpreter chooses to. It is always guaranteed
to run. And, if an exception occurs while closing the file handler, the
bug will not be ignored.
Now, when code receives a ResourceWarning, it will highlight an area
that is mishandling resources. It should not simply be silenced, but
fixed by closing resources with a context manager.
All warnings that were emitted during tests have been cleaned up. To
enable warnings, I passed the `-Wa` CLI option to Python. This exposed
some mishandling of resources in ImageFile.__init__() and
SpiderImagePlugin.loadImageSeries(), they too were fixed.
2019-05-25 19:30:58 +03:00
|
|
|
outfile = self.tempfile("temp.pdf")
|
|
|
|
im.save(outfile)
|
2018-05-18 15:15:45 +03:00
|
|
|
|
|
|
|
self.assertTrue(os.path.isfile(outfile))
|
|
|
|
self.assertGreater(os.path.getsize(outfile), 0)
|
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
def test_pdf_open(self):
|
|
|
|
# fail on a buffer full of null bytes
|
2018-06-24 15:32:25 +03:00
|
|
|
self.assertRaises(
|
2019-06-13 18:54:11 +03:00
|
|
|
PdfParser.PdfFormatError, PdfParser.PdfParser, buf=bytearray(65536)
|
|
|
|
)
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
# make an empty PDF object
|
2018-01-31 02:35:55 +03:00
|
|
|
with PdfParser.PdfParser() as empty_pdf:
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(len(empty_pdf.pages), 0)
|
|
|
|
self.assertEqual(len(empty_pdf.info), 0)
|
|
|
|
self.assertFalse(empty_pdf.should_close_buf)
|
|
|
|
self.assertFalse(empty_pdf.should_close_file)
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
# make a PDF file
|
|
|
|
pdf_filename = self.helper_save_as_pdf("RGB")
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
# open the PDF file
|
2018-01-31 02:35:55 +03:00
|
|
|
with PdfParser.PdfParser(filename=pdf_filename) as hopper_pdf:
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(len(hopper_pdf.pages), 1)
|
|
|
|
self.assertTrue(hopper_pdf.should_close_buf)
|
|
|
|
self.assertTrue(hopper_pdf.should_close_file)
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
# read a PDF file from a buffer with a non-zero offset
|
|
|
|
with open(pdf_filename, "rb") as f:
|
|
|
|
content = b"xyzzy" + f.read()
|
2018-01-31 02:35:55 +03:00
|
|
|
with PdfParser.PdfParser(buf=content, start_offset=5) as hopper_pdf:
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(len(hopper_pdf.pages), 1)
|
|
|
|
self.assertFalse(hopper_pdf.should_close_buf)
|
|
|
|
self.assertFalse(hopper_pdf.should_close_file)
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
# read a PDF file from an already open file
|
|
|
|
with open(pdf_filename, "rb") as f:
|
2018-01-31 02:35:55 +03:00
|
|
|
with PdfParser.PdfParser(f=f) as hopper_pdf:
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(len(hopper_pdf.pages), 1)
|
|
|
|
self.assertTrue(hopper_pdf.should_close_buf)
|
|
|
|
self.assertFalse(hopper_pdf.should_close_file)
|
2018-01-24 04:28:39 +03:00
|
|
|
|
|
|
|
def test_pdf_append_fails_on_nonexistent_file(self):
|
|
|
|
im = hopper("RGB")
|
2019-10-08 16:32:42 +03:00
|
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
2019-06-13 18:54:11 +03:00
|
|
|
self.assertRaises(
|
|
|
|
IOError, im.save, os.path.join(temp_dir, "nonexistent.pdf"), append=True
|
|
|
|
)
|
2018-01-24 04:28:39 +03:00
|
|
|
|
2018-03-04 01:32:47 +03:00
|
|
|
def check_pdf_pages_consistency(self, pdf):
|
|
|
|
pages_info = pdf.read_indirect(pdf.pages_ref)
|
|
|
|
self.assertNotIn(b"Parent", pages_info)
|
|
|
|
self.assertIn(b"Kids", pages_info)
|
|
|
|
kids_not_used = pages_info[b"Kids"]
|
|
|
|
for page_ref in pdf.pages:
|
|
|
|
while True:
|
|
|
|
if page_ref in kids_not_used:
|
|
|
|
kids_not_used.remove(page_ref)
|
|
|
|
page_info = pdf.read_indirect(page_ref)
|
|
|
|
self.assertIn(b"Parent", page_info)
|
|
|
|
page_ref = page_info[b"Parent"]
|
|
|
|
if page_ref == pdf.pages_ref:
|
|
|
|
break
|
|
|
|
self.assertEqual(pdf.pages_ref, page_info[b"Parent"])
|
|
|
|
self.assertEqual(kids_not_used, [])
|
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
def test_pdf_append(self):
|
|
|
|
# make a PDF file
|
2018-01-31 02:35:55 +03:00
|
|
|
pdf_filename = self.helper_save_as_pdf("RGB", producer="PdfParser")
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
# open it, check pages and info
|
2018-01-31 02:35:55 +03:00
|
|
|
with PdfParser.PdfParser(pdf_filename, mode="r+b") as pdf:
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(len(pdf.pages), 1)
|
2018-07-29 15:33:59 +03:00
|
|
|
self.assertEqual(len(pdf.info), 4)
|
2019-06-13 18:54:11 +03:00
|
|
|
self.assertEqual(
|
|
|
|
pdf.info.Title, os.path.splitext(os.path.basename(pdf_filename))[0]
|
|
|
|
)
|
2018-01-31 02:35:55 +03:00
|
|
|
self.assertEqual(pdf.info.Producer, "PdfParser")
|
2018-07-29 15:33:59 +03:00
|
|
|
self.assertIn(b"CreationDate", pdf.info)
|
|
|
|
self.assertIn(b"ModDate", pdf.info)
|
2018-03-04 01:32:47 +03:00
|
|
|
self.check_pdf_pages_consistency(pdf)
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-26 21:07:43 +03:00
|
|
|
# append some info
|
|
|
|
pdf.info.Title = "abc"
|
|
|
|
pdf.info.Author = "def"
|
2019-09-30 17:56:31 +03:00
|
|
|
pdf.info.Subject = "ghi\uABCD"
|
2018-01-26 21:07:43 +03:00
|
|
|
pdf.info.Keywords = "qw)e\\r(ty"
|
|
|
|
pdf.info.Creator = "hopper()"
|
|
|
|
pdf.start_writing()
|
2018-01-31 02:25:04 +03:00
|
|
|
pdf.write_xref_and_trailer()
|
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
# open it again, check pages and info again
|
2018-01-31 02:35:55 +03:00
|
|
|
with PdfParser.PdfParser(pdf_filename) as pdf:
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(len(pdf.pages), 1)
|
2018-07-29 15:33:59 +03:00
|
|
|
self.assertEqual(len(pdf.info), 8)
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(pdf.info.Title, "abc")
|
2018-07-29 15:33:59 +03:00
|
|
|
self.assertIn(b"CreationDate", pdf.info)
|
|
|
|
self.assertIn(b"ModDate", pdf.info)
|
2018-03-04 01:32:47 +03:00
|
|
|
self.check_pdf_pages_consistency(pdf)
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
# append two images
|
|
|
|
mode_CMYK = hopper("CMYK")
|
|
|
|
mode_P = hopper("P")
|
2019-06-13 18:54:11 +03:00
|
|
|
mode_CMYK.save(pdf_filename, append=True, save_all=True, append_images=[mode_P])
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
# open the PDF again, check pages and info again
|
2018-01-31 02:35:55 +03:00
|
|
|
with PdfParser.PdfParser(pdf_filename) as pdf:
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(len(pdf.pages), 3)
|
2018-07-29 15:33:59 +03:00
|
|
|
self.assertEqual(len(pdf.info), 8)
|
2018-01-31 02:35:55 +03:00
|
|
|
self.assertEqual(PdfParser.decode_text(pdf.info[b"Title"]), "abc")
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(pdf.info.Title, "abc")
|
2018-01-31 02:35:55 +03:00
|
|
|
self.assertEqual(pdf.info.Producer, "PdfParser")
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(pdf.info.Keywords, "qw)e\\r(ty")
|
2019-09-30 17:56:31 +03:00
|
|
|
self.assertEqual(pdf.info.Subject, "ghi\uABCD")
|
2018-07-29 15:33:59 +03:00
|
|
|
self.assertIn(b"CreationDate", pdf.info)
|
|
|
|
self.assertIn(b"ModDate", pdf.info)
|
2018-03-04 01:32:47 +03:00
|
|
|
self.check_pdf_pages_consistency(pdf)
|
2018-01-24 04:28:39 +03:00
|
|
|
|
2018-01-31 02:25:04 +03:00
|
|
|
def test_pdf_info(self):
|
2018-01-26 19:15:38 +03:00
|
|
|
# make a PDF file
|
2018-06-24 15:32:25 +03:00
|
|
|
pdf_filename = self.helper_save_as_pdf(
|
2019-06-13 18:54:11 +03:00
|
|
|
"RGB",
|
|
|
|
title="title",
|
|
|
|
author="author",
|
|
|
|
subject="subject",
|
|
|
|
keywords="keywords",
|
|
|
|
creator="creator",
|
|
|
|
producer="producer",
|
2018-07-29 15:33:59 +03:00
|
|
|
creationDate=time.strptime("2000", "%Y"),
|
2019-06-13 18:54:11 +03:00
|
|
|
modDate=time.strptime("2001", "%Y"),
|
|
|
|
)
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-26 19:15:38 +03:00
|
|
|
# open it, check pages and info
|
2018-01-31 02:35:55 +03:00
|
|
|
with PdfParser.PdfParser(pdf_filename) as pdf:
|
2018-07-29 15:33:59 +03:00
|
|
|
self.assertEqual(len(pdf.info), 8)
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(pdf.info.Title, "title")
|
|
|
|
self.assertEqual(pdf.info.Author, "author")
|
|
|
|
self.assertEqual(pdf.info.Subject, "subject")
|
|
|
|
self.assertEqual(pdf.info.Keywords, "keywords")
|
|
|
|
self.assertEqual(pdf.info.Creator, "creator")
|
|
|
|
self.assertEqual(pdf.info.Producer, "producer")
|
2019-06-13 18:54:11 +03:00
|
|
|
self.assertEqual(pdf.info.CreationDate, time.strptime("2000", "%Y"))
|
2018-07-29 15:33:59 +03:00
|
|
|
self.assertEqual(pdf.info.ModDate, time.strptime("2001", "%Y"))
|
2018-03-04 02:01:14 +03:00
|
|
|
self.check_pdf_pages_consistency(pdf)
|
2018-01-26 19:15:38 +03:00
|
|
|
|
2018-01-25 02:44:59 +03:00
|
|
|
def test_pdf_append_to_bytesio(self):
|
|
|
|
im = hopper("RGB")
|
|
|
|
f = io.BytesIO()
|
|
|
|
im.save(f, format="PDF")
|
|
|
|
initial_size = len(f.getvalue())
|
|
|
|
self.assertGreater(initial_size, 0)
|
|
|
|
im = hopper("P")
|
|
|
|
f = io.BytesIO(f.getvalue())
|
|
|
|
im.save(f, format="PDF", append=True)
|
|
|
|
self.assertGreater(len(f.getvalue()), initial_size)
|