mirror of
https://github.com/python-pillow/Pillow.git
synced 2025-01-12 18:26:17 +03:00
fix performance regression on 64 bit GCC 4.8.
This commit is contained in:
parent
42967dd1a6
commit
1cd6da4a49
|
@ -90,6 +90,21 @@ static inline UINT8 clip8(float in)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* This is work around bug in GCC prior 4.9 in 64 bit mode.
|
||||||
|
GCC generates code with partial dependency which 3 times slower.
|
||||||
|
See: http://stackoverflow.com/a/26588074/253146 */
|
||||||
|
#if defined(__x86_64__) && defined(__SSE__) && \
|
||||||
|
! defined(__clang__) && defined(GCC_VERSION) && (GCC_VERSION < 40900)
|
||||||
|
static float __attribute__((always_inline)) i2f(int v) {
|
||||||
|
float x;
|
||||||
|
__asm__("xorps %0, %0; cvtsi2ss %1, %0" : "=X"(x) : "r"(v) );
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static float inline i2f(int v) { return (float) v; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
Imaging
|
Imaging
|
||||||
ImagingStretchHorizaontal(Imaging imIn, int xsize, int filter)
|
ImagingStretchHorizaontal(Imaging imIn, int xsize, int filter)
|
||||||
{
|
{
|
||||||
|
@ -100,7 +115,7 @@ ImagingStretchHorizaontal(Imaging imIn, int xsize, int filter)
|
||||||
Imaging imOut;
|
Imaging imOut;
|
||||||
struct filter *filterp;
|
struct filter *filterp;
|
||||||
float support, scale, filterscale;
|
float support, scale, filterscale;
|
||||||
float center, ww, ss, ss4[4];
|
float center, ww, ss, ss0, ss1, ss2, ss3;
|
||||||
int xx, yy, x, kmax, xmin, xmax;
|
int xx, yy, x, kmax, xmin, xmax;
|
||||||
int *xbounds;
|
int *xbounds;
|
||||||
float *k, *kk;
|
float *k, *kk;
|
||||||
|
@ -193,7 +208,7 @@ ImagingStretchHorizaontal(Imaging imIn, int xsize, int filter)
|
||||||
k = &kk[xx * kmax];
|
k = &kk[xx * kmax];
|
||||||
ss = 0.5;
|
ss = 0.5;
|
||||||
for (x = xmin; x < xmax; x++)
|
for (x = xmin; x < xmax; x++)
|
||||||
ss = ss + imIn->image8[yy][x] * k[x - xmin];
|
ss += i2f(imIn->image8[yy][x]) * k[x - xmin];
|
||||||
imOut->image8[yy][xx] = clip8(ss);
|
imOut->image8[yy][xx] = clip8(ss);
|
||||||
}
|
}
|
||||||
} else
|
} else
|
||||||
|
@ -205,26 +220,25 @@ ImagingStretchHorizaontal(Imaging imIn, int xsize, int filter)
|
||||||
xmax = xbounds[xx * 2 + 1];
|
xmax = xbounds[xx * 2 + 1];
|
||||||
k = &kk[xx * kmax];
|
k = &kk[xx * kmax];
|
||||||
if (imIn->bands == 3) {
|
if (imIn->bands == 3) {
|
||||||
ss4[0] = ss4[1] = ss4[2] = 0.5;
|
ss0 = ss1 = ss2 = 0.5;
|
||||||
for (x = xmin; x < xmax; x++) {
|
for (x = xmin; x < xmax; x++) {
|
||||||
ss4[0] += (UINT8) imIn->image[yy][x*4 + 0] * k[x - xmin];
|
ss0 += i2f((UINT8) imIn->image[yy][x*4 + 0]) * k[x - xmin];
|
||||||
ss4[1] += (UINT8) imIn->image[yy][x*4 + 1] * k[x - xmin];
|
ss1 += i2f((UINT8) imIn->image[yy][x*4 + 1]) * k[x - xmin];
|
||||||
ss4[2] += (UINT8) imIn->image[yy][x*4 + 2] * k[x - xmin];
|
ss2 += i2f((UINT8) imIn->image[yy][x*4 + 2]) * k[x - xmin];
|
||||||
}
|
}
|
||||||
imOut->image32[yy][xx] =
|
imOut->image32[yy][xx] =
|
||||||
clip8(ss4[0]) | clip8(ss4[1]) << 8 |
|
clip8(ss0) | clip8(ss1) << 8 | clip8(ss2) << 16;
|
||||||
clip8(ss4[2]) << 16;
|
|
||||||
} else {
|
} else {
|
||||||
ss4[0] = ss4[1] = ss4[2] = ss4[3] = 0.5;
|
ss0 = ss1 = ss2 = ss3 = 0.5;
|
||||||
for (x = xmin; x < xmax; x++) {
|
for (x = xmin; x < xmax; x++) {
|
||||||
ss4[0] += (UINT8) imIn->image[yy][x*4 + 0] * k[x - xmin];
|
ss0 += i2f((UINT8) imIn->image[yy][x*4 + 0]) * k[x - xmin];
|
||||||
ss4[1] += (UINT8) imIn->image[yy][x*4 + 1] * k[x - xmin];
|
ss1 += i2f((UINT8) imIn->image[yy][x*4 + 1]) * k[x - xmin];
|
||||||
ss4[2] += (UINT8) imIn->image[yy][x*4 + 2] * k[x - xmin];
|
ss2 += i2f((UINT8) imIn->image[yy][x*4 + 2]) * k[x - xmin];
|
||||||
ss4[3] += (UINT8) imIn->image[yy][x*4 + 3] * k[x - xmin];
|
ss3 += i2f((UINT8) imIn->image[yy][x*4 + 3]) * k[x - xmin];
|
||||||
}
|
}
|
||||||
imOut->image32[yy][xx] =
|
imOut->image32[yy][xx] =
|
||||||
clip8(ss4[0]) | clip8(ss4[1]) << 8 |
|
clip8(ss0) | clip8(ss1) << 8 |
|
||||||
clip8(ss4[2]) << 16 | clip8(ss4[3]) << 24;
|
clip8(ss2) << 16 | clip8(ss3) << 24;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -236,7 +250,7 @@ ImagingStretchHorizaontal(Imaging imIn, int xsize, int filter)
|
||||||
k = &kk[xx * kmax];
|
k = &kk[xx * kmax];
|
||||||
ss = 0.0;
|
ss = 0.0;
|
||||||
for (x = xmin; x < xmax; x++)
|
for (x = xmin; x < xmax; x++)
|
||||||
ss = ss + IMAGING_PIXEL_I(imIn, x, yy) * k[x - xmin];
|
ss += i2f(IMAGING_PIXEL_I(imIn, x, yy)) * k[x - xmin];
|
||||||
IMAGING_PIXEL_I(imOut, xx, yy) = (int) ss;
|
IMAGING_PIXEL_I(imOut, xx, yy) = (int) ss;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -248,7 +262,7 @@ ImagingStretchHorizaontal(Imaging imIn, int xsize, int filter)
|
||||||
k = &kk[xx * kmax];
|
k = &kk[xx * kmax];
|
||||||
ss = 0.0;
|
ss = 0.0;
|
||||||
for (x = xmin; x < xmax; x++)
|
for (x = xmin; x < xmax; x++)
|
||||||
ss = ss + IMAGING_PIXEL_F(imIn, x, yy) * k[x - xmin];
|
ss += IMAGING_PIXEL_F(imIn, x, yy) * k[x - xmin];
|
||||||
IMAGING_PIXEL_F(imOut, xx, yy) = ss;
|
IMAGING_PIXEL_F(imOut, xx, yy) = ss;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -72,3 +72,9 @@
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
typedef signed __int64 int64_t;
|
typedef signed __int64 int64_t;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#define GCC_VERSION (__GNUC__ * 10000 \
|
||||||
|
+ __GNUC_MINOR__ * 100 \
|
||||||
|
+ __GNUC_PATCHLEVEL__)
|
||||||
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue
Block a user