Merge pull request #886 from wiredfool/epsfile-rebase

EpsFilePlugin Speed improvements.
This commit is contained in:
Hugo 2014-09-13 08:08:24 +03:00
commit af8bf3f1d2
2 changed files with 180 additions and 131 deletions

View File

@ -86,26 +86,32 @@ def Ghostscript(tile, size, fp, scale=1):
out_fd, outfile = tempfile.mkstemp()
os.close(out_fd)
in_fd, infile = tempfile.mkstemp()
os.close(in_fd)
# ignore length and offset!
# ghostscript can read it
# copy whole file to read in ghostscript
with open(infile, 'wb') as f:
# fetch length of fp
fp.seek(0, 2)
fsize = fp.tell()
# ensure start position
# go back
fp.seek(0)
lengthfile = fsize
while lengthfile > 0:
s = fp.read(min(lengthfile, 100*1024))
if not s:
break
length -= len(s)
f.write(s)
infile_temp = None
if hasattr(fp, 'name') and os.path.exists(fp.name):
infile = fp.name
else:
in_fd, infile_temp = tempfile.mkstemp()
os.close(in_fd)
infile = infile_temp
# ignore length and offset!
# ghostscript can read it
# copy whole file to read in ghostscript
with open(infile_temp, 'wb') as f:
# fetch length of fp
fp.seek(0, 2)
fsize = fp.tell()
# ensure start position
# go back
fp.seek(0)
lengthfile = fsize
while lengthfile > 0:
s = fp.read(min(lengthfile, 100*1024))
if not s:
break
lengthfile -= len(s)
f.write(s)
# Build ghostscript command
command = ["gs",
@ -136,49 +142,36 @@ def Ghostscript(tile, size, fp, scale=1):
finally:
try:
os.unlink(outfile)
os.unlink(infile)
if infile_temo:
os.unlink(infile_temp)
except: pass
return im
class PSFile:
"""Wrapper that treats either CR or LF as end of line."""
"""Wrapper for bytesio object that treats either CR or LF as end of line."""
def __init__(self, fp):
self.fp = fp
self.char = None
def __getattr__(self, id):
v = getattr(self.fp, id)
setattr(self, id, v)
return v
def seek(self, offset, whence=0):
self.char = None
self.fp.seek(offset, whence)
def read(self, count):
return self.fp.read(count).decode('latin-1')
def readbinary(self, count):
return self.fp.read(count)
def tell(self):
pos = self.fp.tell()
if self.char:
pos -= 1
return pos
def readline(self):
s = b""
if self.char:
c = self.char
self.char = None
else:
c = self.fp.read(1)
s = self.char or b""
self.char = None
c = self.fp.read(1)
while c not in b"\r\n":
s = s + c
c = self.fp.read(1)
if c == b"\r":
self.char = self.fp.read(1)
if self.char == b"\n":
self.char = None
return s.decode('latin-1') + "\n"
self.char = self.fp.read(1)
# line endings can be 1 or 2 of \r \n, in either order
if self.char in b"\r\n":
self.char = None
return s.decode('latin-1')
def _accept(prefix):
return prefix[:4] == b"%!PS" or i32(prefix) == 0xC6D3D0C5
@ -193,36 +186,27 @@ class EpsImageFile(ImageFile.ImageFile):
format = "EPS"
format_description = "Encapsulated Postscript"
mode_map = { 1:"L", 2:"LAB", 3:"RGB" }
def _open(self):
(length, offset) = self._find_offset(self.fp)
fp = PSFile(self.fp)
# Rewrap the open file pointer in something that will
# convert line endings and decode to latin-1.
try:
if bytes is str:
# Python2, no encoding conversion necessary
fp = open(self.fp.name, "Ur")
else:
# Python3, can use bare open command.
fp = open(self.fp.name, "Ur", encoding='latin-1')
except Exception as msg:
# Expect this for bytesio/stringio
fp = PSFile(self.fp)
# FIX for: Some EPS file not handled correctly / issue #302
# EPS can contain binary data
# or start directly with latin coding
# read header in both ways to handle both
# file types
# more info see http://partners.adobe.com/public/developer/en/ps/5002.EPSF_Spec.pdf
# for HEAD without binary preview
s = fp.read(4)
# for HEAD with binary preview
fp.seek(0)
sb = fp.readbinary(160)
if s[:4] == "%!PS":
fp.seek(0, 2)
length = fp.tell()
offset = 0
elif i32(sb[0:4]) == 0xC6D3D0C5:
offset = i32(sb[4:8])
length = i32(sb[8:12])
else:
raise SyntaxError("not an EPS file")
# go to offset - start of "%!PS"
# go to offset - start of "%!PS"
fp.seek(offset)
box = None
self.mode = "RGB"
@ -231,18 +215,12 @@ class EpsImageFile(ImageFile.ImageFile):
#
# Load EPS header
s = fp.readline()
s = fp.readline().strip('\r\n')
while s:
if len(s) > 255:
raise SyntaxError("not an EPS file")
if s[-2:] == '\r\n':
s = s[:-2]
elif s[-1:] == '\n':
s = s[:-1]
try:
m = split.match(s)
except re.error as v:
@ -264,9 +242,7 @@ class EpsImageFile(ImageFile.ImageFile):
pass
else:
m = field.match(s)
if m:
k = m.group(1)
@ -276,16 +252,16 @@ class EpsImageFile(ImageFile.ImageFile):
self.info[k[:8]] = k[9:]
else:
self.info[k] = ""
elif s[0:1] == '%':
elif s[0] == '%':
# handle non-DSC Postscript comments that some
# tools mistakenly put in the Comments section
pass
else:
raise IOError("bad EPS header")
s = fp.readline()
s = fp.readline().strip('\r\n')
if s[:1] != "%":
if s[0] != "%":
break
@ -297,63 +273,48 @@ class EpsImageFile(ImageFile.ImageFile):
if len(s) > 255:
raise SyntaxError("not an EPS file")
if s[-2:] == '\r\n':
s = s[:-2]
elif s[-1:] == '\n':
s = s[:-1]
if s[:11] == "%ImageData:":
# Encoded bitmapped image.
[x, y, bi, mo, z3, z4, en, id] = s[11:].split(None, 7)
[x, y, bi, mo, z3, z4, en, id] =\
s[11:].split(None, 7)
x = int(x); y = int(y)
bi = int(bi)
mo = int(mo)
en = int(en)
if en == 1:
decoder = "eps_binary"
elif en == 2:
decoder = "eps_hex"
else:
if int(bi) != 8:
break
if bi != 8:
try:
self.mode = self.mode_map[int(mo)]
except:
break
if mo == 1:
self.mode = "L"
elif mo == 2:
self.mode = "LAB"
elif mo == 3:
self.mode = "RGB"
else:
break
if id[:1] == id[-1:] == '"':
id = id[1:-1]
# Scan forward to the actual image data
while True:
s = fp.readline()
if not s:
break
if s[:len(id)] == id:
self.size = x, y
self.tile2 = [(decoder,
(0, 0, x, y),
fp.tell(),
0)]
return
s = fp.readline()
self.size = int(x), int(y)
return
s = fp.readline().strip('\r\n')
if not s:
break
if not box:
raise IOError("cannot determine EPS bounding box")
def _find_offset(self, fp):
s = fp.read(160)
if s[:4] == b"%!PS":
# for HEAD without binary preview
fp.seek(0, 2)
length = fp.tell()
offset = 0
elif i32(s[0:4]) == 0xC6D3D0C5:
# FIX for: Some EPS file not handled correctly / issue #302
# EPS can contain binary data
# or start directly with latin coding
# more info see http://partners.adobe.com/public/developer/en/ps/5002.EPSF_Spec.pdf
offset = i32(s[4:8])
length = i32(s[8:12])
else:
raise SyntaxError("not an EPS file")
return (length, offset)
def load(self, scale=1):
# Load EPS via Ghostscript
if not self.tile:

View File

@ -63,6 +63,17 @@ class TestFileEps(PillowTestCase):
with io.open(self.tempfile('temp_iobase.eps'), 'wb') as fh:
image1.save(fh, 'EPS')
def test_bytesio_object(self):
with open(file1, 'rb') as f:
img_bytes = io.BytesIO(f.read())
img = Image.open(img_bytes)
img.load()
image1_scale1_compare = Image.open(file1_compare).convert("RGB")
image1_scale1_compare.load()
self.assert_image_similar(img, image1_scale1_compare, 5)
def test_render_scale1(self):
# We need png support for these render test
codecs = dir(Image.core)
@ -137,6 +148,83 @@ class TestFileEps(PillowTestCase):
# open image with binary preview
Image.open(file3)
def _test_readline(self,t, ending):
ending = "Failure with line ending: %s" %("".join("%s" %ord(s) for s in ending))
self.assertEqual(t.readline().strip('\r\n'), 'something', ending)
self.assertEqual(t.readline().strip('\r\n'), 'else', ending)
self.assertEqual(t.readline().strip('\r\n'), 'baz', ending)
self.assertEqual(t.readline().strip('\r\n'), 'bif', ending)
def _test_readline_stringio(self, test_string, ending):
# check all the freaking line endings possible
try:
import StringIO
except:
# don't skip, it skips everything in the parent test
return
t = StringIO.StringIO(test_string)
self._test_readline(t, ending)
def _test_readline_io(self, test_string, ending):
import io
if str is bytes:
t = io.StringIO(unicode(test_string))
else:
t = io.StringIO(test_string)
self._test_readline(t, ending)
def _test_readline_file_universal(self, test_string, ending):
f = self.tempfile('temp.txt')
with open(f,'wb') as w:
if str is bytes:
w.write(test_string)
else:
w.write(test_string.encode('UTF-8'))
with open(f,'rU') as t:
self._test_readline(t, ending)
def _test_readline_file_psfile(self, test_string, ending):
f = self.tempfile('temp.txt')
with open(f,'wb') as w:
if str is bytes:
w.write(test_string)
else:
w.write(test_string.encode('UTF-8'))
with open(f,'rb') as r:
t = EpsImagePlugin.PSFile(r)
self._test_readline(t, ending)
def test_readline(self):
# check all the freaking line endings possible from the spec
#test_string = u'something\r\nelse\n\rbaz\rbif\n'
line_endings = ['\r\n', '\n']
not_working_endings = ['\n\r', '\r']
strings = ['something', 'else', 'baz', 'bif']
for ending in line_endings:
s = ending.join(strings)
# Native python versions will pass these endings.
#self._test_readline_stringio(s, ending)
#self._test_readline_io(s, ending)
#self._test_readline_file_universal(s, ending)
self._test_readline_file_psfile(s, ending)
for ending in not_working_endings:
# these only work with the PSFile, while they're in spec,
# they're not likely to be used
s = ending.join(strings)
# Native python versions may fail on these endings.
#self._test_readline_stringio(s, ending)
#self._test_readline_io(s, ending)
#self._test_readline_file_universal(s, ending)
self._test_readline_file_psfile(s, ending)
if __name__ == '__main__':
unittest.main()