diff --git a/Makefile b/Makefile index d27afaa0b..b63913be8 100644 --- a/Makefile +++ b/Makefile @@ -63,6 +63,10 @@ install-coverage: CFLAGS="-coverage" python setup.py build_ext install python selftest.py --installed +install-openmp: + python setup.py build_ext --enable-openmp install + python selftest.py --installed + debug: # make a debug version if we don't have a -dbg python. Leaves in symbols # for our stuff, kills optimization, and redirects to dev null so we diff --git a/Tests/5k_image.jpg b/Tests/5k_image.jpg new file mode 100644 index 000000000..697c51cc8 Binary files /dev/null and b/Tests/5k_image.jpg differ diff --git a/Tests/mpbench.py b/Tests/mpbench.py new file mode 100644 index 000000000..b68496e15 --- /dev/null +++ b/Tests/mpbench.py @@ -0,0 +1,36 @@ +from PIL import Image +import time +import math + +def timeit(n, f, *args, **kwargs): + def run(): + start = time.time() + f(*args, **kwargs) + return time.time() - start + + runs = [run() for _ in range(n)] + mean = sum(runs)/float(n) + stddev = math.sqrt(sum((r-mean)**2 for r in runs)/float(n)) + return {'mean':mean, + 'median': sorted(runs)[int(n/2)], + 'min': min(runs), + 'max': max(runs), + 'stddev':stddev, + 'dev_pct': stddev/mean*100.0 + } + + #return min(run() for _ in range(n)) + +n = 400 +image = Image.open('5k_image.jpg').copy() +print 'warmup {mean:.4}'.format(**timeit(n // 4, image.im.resize, (2048, 1152), Image.ANTIALIAS)) +print "%s runs"%n +print "Interpolation | Size | min | max | mean | median| stddev | Dev %" +print "--------- | --------- | ----- | ----- | ----- | ----- | ----- | ----" +print 'Antialias | 2048x1152 | {min:5.3f} | {max:5.3f} | {mean:5.3f} | {median:5.3f} | {stddev:5.4f} | {dev_pct:4.1f}%'.format(**timeit(n, image.im.resize, (2048, 1152), Image.ANTIALIAS)) +print 'Antialias | 320x240 | {min:5.3f} | {max:5.3f} | {mean:5.3f} | {median:5.3f} | {stddev:5.4f} | {dev_pct:4.1f}%'.format(**timeit(n, image.im.resize, (320, 240), Image.ANTIALIAS)) +print 'Bicubic | 2048x1152 | {min:5.3f} | {max:5.3f} | {mean:5.3f} | {median:5.3f} | {stddev:5.4f} | {dev_pct:4.1f}%'.format(**timeit(n, image.im.resize, (2048, 1152), Image.BICUBIC)) +print 'Bicubic | 320x240 | {min:5.3f} | {max:5.3f} | {mean:5.3f} | {median:5.3f} | {stddev:5.4f} | {dev_pct:4.1f}%'.format(**timeit(n, image.im.resize, (320, 240), Image.BICUBIC)) +print 'Bilinear | 2048x1152 | {min:5.3f} | {max:5.3f} | {mean:5.3f} | {median:5.3f} | {stddev:5.4f} | {dev_pct:4.1f}%'.format(**timeit(n, image.im.resize, (2048, 1152), Image.BILINEAR)) +print 'Bilinear | 320x240 | {min:5.3f} | {max:5.3f} | {mean:5.3f} | {median:5.3f} | {stddev:5.4f} | {dev_pct:4.1f}%'.format(**timeit(n, image.im.resize, (320, 240), Image.BILINEAR)) + diff --git a/libImaging/Geometry.c b/libImaging/Geometry.c index 1d08728da..c77866589 100644 --- a/libImaging/Geometry.c +++ b/libImaging/Geometry.c @@ -5,6 +5,7 @@ Rotating in chunks that fit in the cache can speed up rotation 8x on a modern CPU. A chunk size of 128 requires only 65k and is large enough that the overhead from the extra loops are not apparent. */ + #define ROTATE_CHUNK 512 #define ROTATE_SMALL_CHUNK 8 @@ -163,11 +164,13 @@ ImagingTranspose(Imaging imOut, Imaging imIn) ImagingSectionEnter(&cookie); - if (imIn->image8) + if (imIn->image8){ +#pragma omp parallel for private(x,y,xx,yy,xxx,yyy,yysize,xxsize,xxxsize,yyysize) shared(imIn,imOut) default(none) collapse(2) TRANSPOSE(UINT8, image8) - else + } else { +#pragma omp parallel for private(x,y,xx,yy,xxx,yyy,yysize,xxsize,xxxsize,yyysize) shared(imIn,imOut) default(none) collapse(2) TRANSPOSE(INT32, image32) - + } ImagingSectionLeave(&cookie); #undef TRANSPOSE diff --git a/libImaging/Resample.c b/libImaging/Resample.c index 877f25a94..39ac8716d 100644 --- a/libImaging/Resample.c +++ b/libImaging/Resample.c @@ -241,9 +241,10 @@ ImagingResampleHorizontal_8bpc(Imaging imOut, Imaging imIn, int offset, ImagingSectionEnter(&cookie); if (imIn->image8) { +#pragma omp parallel for private(k,x,xx,yy,xmin,xmax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds,offset) default(none) collapse(2) for (yy = 0; yy < imOut->ysize; yy++) { for (xx = 0; xx < imOut->xsize; xx++) { - xmin = bounds[xx * 2 + 0]; + xmin = bounds[xx * 2 + 0]; xmax = bounds[xx * 2 + 1]; k = &kk[xx * ksize]; ss0 = 1 << (PRECISION_BITS -1); @@ -254,7 +255,8 @@ ImagingResampleHorizontal_8bpc(Imaging imOut, Imaging imIn, int offset, } } else if (imIn->type == IMAGING_TYPE_UINT8) { if (imIn->bands == 2) { - for (yy = 0; yy < imOut->ysize; yy++) { +#pragma omp parallel for private(k,x,xx,yy,xmin,xmax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds,offset) default(none) collapse(2) + for (yy = 0; yy < imOut->ysize; yy++) { for (xx = 0; xx < imOut->xsize; xx++) { xmin = bounds[xx * 2 + 0]; xmax = bounds[xx * 2 + 1]; @@ -269,7 +271,8 @@ ImagingResampleHorizontal_8bpc(Imaging imOut, Imaging imIn, int offset, } } } else if (imIn->bands == 3) { - for (yy = 0; yy < imOut->ysize; yy++) { +#pragma omp parallel for private(k,x,xx,yy,xmin,xmax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds,offset) default(none) collapse(2) + for (yy = 0; yy < imOut->ysize; yy++) { for (xx = 0; xx < imOut->xsize; xx++) { xmin = bounds[xx * 2 + 0]; xmax = bounds[xx * 2 + 1]; @@ -285,6 +288,7 @@ ImagingResampleHorizontal_8bpc(Imaging imOut, Imaging imIn, int offset, } } } else { +#pragma omp parallel for private(k,x,xx,yy,xmin,xmax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds,offset) default(none) collapse(2) for (yy = 0; yy < imOut->ysize; yy++) { for (xx = 0; xx < imOut->xsize; xx++) { xmin = bounds[xx * 2 + 0]; @@ -322,7 +326,8 @@ ImagingResampleVertical_8bpc(Imaging imOut, Imaging imIn, int offset, ImagingSectionEnter(&cookie); if (imIn->image8) { - for (yy = 0; yy < imOut->ysize; yy++) { +#pragma omp parallel for private(k,y,xx,yy,ymin,ymax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds) default(none) + for (yy = 0; yy < imOut->ysize; yy++) { k = &kk[yy * ksize]; ymin = bounds[yy * 2 + 0]; ymax = bounds[yy * 2 + 1]; @@ -335,6 +340,7 @@ ImagingResampleVertical_8bpc(Imaging imOut, Imaging imIn, int offset, } } else if (imIn->type == IMAGING_TYPE_UINT8) { if (imIn->bands == 2) { +#pragma omp parallel for private(k,y,xx,yy,ymin,ymax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds) default(none) for (yy = 0; yy < imOut->ysize; yy++) { k = &kk[yy * ksize]; ymin = bounds[yy * 2 + 0]; @@ -350,6 +356,7 @@ ImagingResampleVertical_8bpc(Imaging imOut, Imaging imIn, int offset, } } } else if (imIn->bands == 3) { +#pragma omp parallel for private(k,y,xx,yy,ymin,ymax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds) default(none) for (yy = 0; yy < imOut->ysize; yy++) { k = &kk[yy * ksize]; ymin = bounds[yy * 2 + 0]; @@ -366,6 +373,7 @@ ImagingResampleVertical_8bpc(Imaging imOut, Imaging imIn, int offset, } } } else { +#pragma omp parallel for private(k,y,xx,yy,ymin,ymax,ss0,ss1,ss2,ss3) shared(imIn,imOut,kk,ksize,bounds) default(none) for (yy = 0; yy < imOut->ysize; yy++) { k = &kk[yy * ksize]; ymin = bounds[yy * 2 + 0]; diff --git a/setup.py b/setup.py index 202266139..2c1bcb766 100755 --- a/setup.py +++ b/setup.py @@ -165,7 +165,7 @@ def _pkg_config(name): class pil_build_ext(build_ext): class feature: features = ['zlib', 'jpeg', 'tiff', 'freetype', 'raqm', 'lcms', 'webp', - 'webpmux', 'jpeg2000', 'imagequant'] + 'webpmux', 'jpeg2000', 'imagequant', 'openmp'] required = {'jpeg', 'zlib'} @@ -204,9 +204,6 @@ class pil_build_ext(build_ext): def finalize_options(self): build_ext.finalize_options(self) - if self.debug: - global DEBUG - DEBUG = True for x in self.feature: if getattr(self, 'disable_%s' % x): setattr(self.feature, x, False) @@ -224,7 +221,8 @@ class pil_build_ext(build_ext): library_dirs = [] include_dirs = [] - + extra_compile_args = [] + _add_directory(include_dirs, "libImaging") pkg_config = None @@ -584,6 +582,10 @@ class pil_build_ext(build_ext): _find_library_file(self, "libwebpdemux")): feature.webpmux = "libwebpmux" + if feature.require('openmp'): + extra_compile_args.append('-fopenmp') + feature.openmp = 'gomp' + for f in feature: if not getattr(feature, f) and feature.require(f): if f in ('jpeg', 'zlib'): @@ -622,6 +624,8 @@ class pil_build_ext(build_ext): libs.extend(["kernel32", "user32", "gdi32"]) if struct.unpack("h", "\0\1".encode('ascii'))[0] == 1: defs.append(("WORDS_BIGENDIAN", None)) + if feature.openmp: + libs.append(feature.openmp) if sys.platform == "win32" and not (PLATFORM_PYPY or PLATFORM_MINGW): defs.append(("PILLOW_VERSION", '"\\"%s\\""' % PILLOW_VERSION)) @@ -631,7 +635,8 @@ class pil_build_ext(build_ext): exts = [(Extension("PIL._imaging", files, libraries=libs, - define_macros=defs))] + define_macros=defs, + extra_compile_args=extra_compile_args))] # # additional libraries @@ -710,7 +715,8 @@ class pil_build_ext(build_ext): (feature.lcms, "LITTLECMS2"), (feature.webp, "WEBP"), (feature.webpmux, "WEBPMUX"), - ] + (feature.openmp, "OpenMP"), + ] all = 1 for option in options: