This commit is contained in:
wiredfool 2017-12-26 16:25:27 +00:00 committed by GitHub
commit cf99f6fd95
6 changed files with 71 additions and 14 deletions

View File

@ -63,6 +63,10 @@ install-coverage:
CFLAGS="-coverage" python setup.py build_ext install CFLAGS="-coverage" python setup.py build_ext install
python selftest.py --installed python selftest.py --installed
install-openmp:
python setup.py build_ext --enable-openmp install
python selftest.py --installed
debug: debug:
# make a debug version if we don't have a -dbg python. Leaves in symbols # make a debug version if we don't have a -dbg python. Leaves in symbols
# for our stuff, kills optimization, and redirects to dev null so we # for our stuff, kills optimization, and redirects to dev null so we

BIN
Tests/5k_image.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.4 MiB

36
Tests/mpbench.py Normal file
View File

@ -0,0 +1,36 @@
from PIL import Image
import time
import math
def timeit(n, f, *args, **kwargs):
def run():
start = time.time()
f(*args, **kwargs)
return time.time() - start
runs = [run() for _ in range(n)]
mean = sum(runs)/float(n)
stddev = math.sqrt(sum((r-mean)**2 for r in runs)/float(n))
return {'mean':mean,
'median': sorted(runs)[int(n/2)],
'min': min(runs),
'max': max(runs),
'stddev':stddev,
'dev_pct': stddev/mean*100.0
}
#return min(run() for _ in range(n))
n = 400
image = Image.open('5k_image.jpg').copy()
print 'warmup {mean:.4}'.format(**timeit(n // 4, image.im.resize, (2048, 1152), Image.ANTIALIAS))
print "%s runs"%n
print "Interpolation | Size | min | max | mean | median| stddev | Dev %"
print "--------- | --------- | ----- | ----- | ----- | ----- | ----- | ----"
print 'Antialias | 2048x1152 | {min:5.3f} | {max:5.3f} | {mean:5.3f} | {median:5.3f} | {stddev:5.4f} | {dev_pct:4.1f}%'.format(**timeit(n, image.im.resize, (2048, 1152), Image.ANTIALIAS))
print 'Antialias | 320x240 | {min:5.3f} | {max:5.3f} | {mean:5.3f} | {median:5.3f} | {stddev:5.4f} | {dev_pct:4.1f}%'.format(**timeit(n, image.im.resize, (320, 240), Image.ANTIALIAS))
print 'Bicubic | 2048x1152 | {min:5.3f} | {max:5.3f} | {mean:5.3f} | {median:5.3f} | {stddev:5.4f} | {dev_pct:4.1f}%'.format(**timeit(n, image.im.resize, (2048, 1152), Image.BICUBIC))
print 'Bicubic | 320x240 | {min:5.3f} | {max:5.3f} | {mean:5.3f} | {median:5.3f} | {stddev:5.4f} | {dev_pct:4.1f}%'.format(**timeit(n, image.im.resize, (320, 240), Image.BICUBIC))
print 'Bilinear | 2048x1152 | {min:5.3f} | {max:5.3f} | {mean:5.3f} | {median:5.3f} | {stddev:5.4f} | {dev_pct:4.1f}%'.format(**timeit(n, image.im.resize, (2048, 1152), Image.BILINEAR))
print 'Bilinear | 320x240 | {min:5.3f} | {max:5.3f} | {mean:5.3f} | {median:5.3f} | {stddev:5.4f} | {dev_pct:4.1f}%'.format(**timeit(n, image.im.resize, (320, 240), Image.BILINEAR))

View File

@ -5,6 +5,7 @@
Rotating in chunks that fit in the cache can speed up rotation Rotating in chunks that fit in the cache can speed up rotation
8x on a modern CPU. A chunk size of 128 requires only 65k and is large enough 8x on a modern CPU. A chunk size of 128 requires only 65k and is large enough
that the overhead from the extra loops are not apparent. */ that the overhead from the extra loops are not apparent. */
#define ROTATE_CHUNK 512 #define ROTATE_CHUNK 512
#define ROTATE_SMALL_CHUNK 8 #define ROTATE_SMALL_CHUNK 8
@ -163,11 +164,13 @@ ImagingTranspose(Imaging imOut, Imaging imIn)
ImagingSectionEnter(&cookie); ImagingSectionEnter(&cookie);
if (imIn->image8) if (imIn->image8){
#pragma omp parallel for private(x,y,xx,yy,xxx,yyy,yysize,xxsize,xxxsize,yyysize) shared(imIn,imOut) default(none) collapse(2)
TRANSPOSE(UINT8, image8) TRANSPOSE(UINT8, image8)
else } else {
#pragma omp parallel for private(x,y,xx,yy,xxx,yyy,yysize,xxsize,xxxsize,yyysize) shared(imIn,imOut) default(none) collapse(2)
TRANSPOSE(INT32, image32) TRANSPOSE(INT32, image32)
}
ImagingSectionLeave(&cookie); ImagingSectionLeave(&cookie);
#undef TRANSPOSE #undef TRANSPOSE

View File

@ -241,9 +241,10 @@ ImagingResampleHorizontal_8bpc(Imaging imOut, Imaging imIn, int offset,
ImagingSectionEnter(&cookie); ImagingSectionEnter(&cookie);
if (imIn->image8) { if (imIn->image8) {
#pragma omp parallel for private(k,x,xx,yy,xmin,xmax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds,offset) default(none) collapse(2)
for (yy = 0; yy < imOut->ysize; yy++) { for (yy = 0; yy < imOut->ysize; yy++) {
for (xx = 0; xx < imOut->xsize; xx++) { for (xx = 0; xx < imOut->xsize; xx++) {
xmin = bounds[xx * 2 + 0]; xmin = bounds[xx * 2 + 0];
xmax = bounds[xx * 2 + 1]; xmax = bounds[xx * 2 + 1];
k = &kk[xx * ksize]; k = &kk[xx * ksize];
ss0 = 1 << (PRECISION_BITS -1); ss0 = 1 << (PRECISION_BITS -1);
@ -254,7 +255,8 @@ ImagingResampleHorizontal_8bpc(Imaging imOut, Imaging imIn, int offset,
} }
} else if (imIn->type == IMAGING_TYPE_UINT8) { } else if (imIn->type == IMAGING_TYPE_UINT8) {
if (imIn->bands == 2) { if (imIn->bands == 2) {
for (yy = 0; yy < imOut->ysize; yy++) { #pragma omp parallel for private(k,x,xx,yy,xmin,xmax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds,offset) default(none) collapse(2)
for (yy = 0; yy < imOut->ysize; yy++) {
for (xx = 0; xx < imOut->xsize; xx++) { for (xx = 0; xx < imOut->xsize; xx++) {
xmin = bounds[xx * 2 + 0]; xmin = bounds[xx * 2 + 0];
xmax = bounds[xx * 2 + 1]; xmax = bounds[xx * 2 + 1];
@ -269,7 +271,8 @@ ImagingResampleHorizontal_8bpc(Imaging imOut, Imaging imIn, int offset,
} }
} }
} else if (imIn->bands == 3) { } else if (imIn->bands == 3) {
for (yy = 0; yy < imOut->ysize; yy++) { #pragma omp parallel for private(k,x,xx,yy,xmin,xmax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds,offset) default(none) collapse(2)
for (yy = 0; yy < imOut->ysize; yy++) {
for (xx = 0; xx < imOut->xsize; xx++) { for (xx = 0; xx < imOut->xsize; xx++) {
xmin = bounds[xx * 2 + 0]; xmin = bounds[xx * 2 + 0];
xmax = bounds[xx * 2 + 1]; xmax = bounds[xx * 2 + 1];
@ -285,6 +288,7 @@ ImagingResampleHorizontal_8bpc(Imaging imOut, Imaging imIn, int offset,
} }
} }
} else { } else {
#pragma omp parallel for private(k,x,xx,yy,xmin,xmax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds,offset) default(none) collapse(2)
for (yy = 0; yy < imOut->ysize; yy++) { for (yy = 0; yy < imOut->ysize; yy++) {
for (xx = 0; xx < imOut->xsize; xx++) { for (xx = 0; xx < imOut->xsize; xx++) {
xmin = bounds[xx * 2 + 0]; xmin = bounds[xx * 2 + 0];
@ -322,7 +326,8 @@ ImagingResampleVertical_8bpc(Imaging imOut, Imaging imIn, int offset,
ImagingSectionEnter(&cookie); ImagingSectionEnter(&cookie);
if (imIn->image8) { if (imIn->image8) {
for (yy = 0; yy < imOut->ysize; yy++) { #pragma omp parallel for private(k,y,xx,yy,ymin,ymax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds) default(none)
for (yy = 0; yy < imOut->ysize; yy++) {
k = &kk[yy * ksize]; k = &kk[yy * ksize];
ymin = bounds[yy * 2 + 0]; ymin = bounds[yy * 2 + 0];
ymax = bounds[yy * 2 + 1]; ymax = bounds[yy * 2 + 1];
@ -335,6 +340,7 @@ ImagingResampleVertical_8bpc(Imaging imOut, Imaging imIn, int offset,
} }
} else if (imIn->type == IMAGING_TYPE_UINT8) { } else if (imIn->type == IMAGING_TYPE_UINT8) {
if (imIn->bands == 2) { if (imIn->bands == 2) {
#pragma omp parallel for private(k,y,xx,yy,ymin,ymax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds) default(none)
for (yy = 0; yy < imOut->ysize; yy++) { for (yy = 0; yy < imOut->ysize; yy++) {
k = &kk[yy * ksize]; k = &kk[yy * ksize];
ymin = bounds[yy * 2 + 0]; ymin = bounds[yy * 2 + 0];
@ -350,6 +356,7 @@ ImagingResampleVertical_8bpc(Imaging imOut, Imaging imIn, int offset,
} }
} }
} else if (imIn->bands == 3) { } else if (imIn->bands == 3) {
#pragma omp parallel for private(k,y,xx,yy,ymin,ymax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds) default(none)
for (yy = 0; yy < imOut->ysize; yy++) { for (yy = 0; yy < imOut->ysize; yy++) {
k = &kk[yy * ksize]; k = &kk[yy * ksize];
ymin = bounds[yy * 2 + 0]; ymin = bounds[yy * 2 + 0];
@ -366,6 +373,7 @@ ImagingResampleVertical_8bpc(Imaging imOut, Imaging imIn, int offset,
} }
} }
} else { } else {
#pragma omp parallel for private(k,y,xx,yy,ymin,ymax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds) default(none)
for (yy = 0; yy < imOut->ysize; yy++) { for (yy = 0; yy < imOut->ysize; yy++) {
k = &kk[yy * ksize]; k = &kk[yy * ksize];
ymin = bounds[yy * 2 + 0]; ymin = bounds[yy * 2 + 0];

View File

@ -165,7 +165,7 @@ def _pkg_config(name):
class pil_build_ext(build_ext): class pil_build_ext(build_ext):
class feature: class feature:
features = ['zlib', 'jpeg', 'tiff', 'freetype', 'raqm', 'lcms', 'webp', features = ['zlib', 'jpeg', 'tiff', 'freetype', 'raqm', 'lcms', 'webp',
'webpmux', 'jpeg2000', 'imagequant'] 'webpmux', 'jpeg2000', 'imagequant', 'openmp']
required = {'jpeg', 'zlib'} required = {'jpeg', 'zlib'}
@ -204,9 +204,6 @@ class pil_build_ext(build_ext):
def finalize_options(self): def finalize_options(self):
build_ext.finalize_options(self) build_ext.finalize_options(self)
if self.debug:
global DEBUG
DEBUG = True
for x in self.feature: for x in self.feature:
if getattr(self, 'disable_%s' % x): if getattr(self, 'disable_%s' % x):
setattr(self.feature, x, False) setattr(self.feature, x, False)
@ -224,7 +221,8 @@ class pil_build_ext(build_ext):
library_dirs = [] library_dirs = []
include_dirs = [] include_dirs = []
extra_compile_args = []
_add_directory(include_dirs, "libImaging") _add_directory(include_dirs, "libImaging")
pkg_config = None pkg_config = None
@ -584,6 +582,10 @@ class pil_build_ext(build_ext):
_find_library_file(self, "libwebpdemux")): _find_library_file(self, "libwebpdemux")):
feature.webpmux = "libwebpmux" feature.webpmux = "libwebpmux"
if feature.require('openmp'):
extra_compile_args.append('-fopenmp')
feature.openmp = 'gomp'
for f in feature: for f in feature:
if not getattr(feature, f) and feature.require(f): if not getattr(feature, f) and feature.require(f):
if f in ('jpeg', 'zlib'): if f in ('jpeg', 'zlib'):
@ -622,6 +624,8 @@ class pil_build_ext(build_ext):
libs.extend(["kernel32", "user32", "gdi32"]) libs.extend(["kernel32", "user32", "gdi32"])
if struct.unpack("h", "\0\1".encode('ascii'))[0] == 1: if struct.unpack("h", "\0\1".encode('ascii'))[0] == 1:
defs.append(("WORDS_BIGENDIAN", None)) defs.append(("WORDS_BIGENDIAN", None))
if feature.openmp:
libs.append(feature.openmp)
if sys.platform == "win32" and not (PLATFORM_PYPY or PLATFORM_MINGW): if sys.platform == "win32" and not (PLATFORM_PYPY or PLATFORM_MINGW):
defs.append(("PILLOW_VERSION", '"\\"%s\\""' % PILLOW_VERSION)) defs.append(("PILLOW_VERSION", '"\\"%s\\""' % PILLOW_VERSION))
@ -631,7 +635,8 @@ class pil_build_ext(build_ext):
exts = [(Extension("PIL._imaging", exts = [(Extension("PIL._imaging",
files, files,
libraries=libs, libraries=libs,
define_macros=defs))] define_macros=defs,
extra_compile_args=extra_compile_args))]
# #
# additional libraries # additional libraries
@ -710,7 +715,8 @@ class pil_build_ext(build_ext):
(feature.lcms, "LITTLECMS2"), (feature.lcms, "LITTLECMS2"),
(feature.webp, "WEBP"), (feature.webp, "WEBP"),
(feature.webpmux, "WEBPMUX"), (feature.webpmux, "WEBPMUX"),
] (feature.openmp, "OpenMP"),
]
all = 1 all = 1
for option in options: for option in options: