From e13297b396adc24a383862bef14e6fe0023251d8 Mon Sep 17 00:00:00 2001
From: homm <homm86@gmail.com>
Date: Tue, 3 May 2016 12:23:16 +0200
Subject: [PATCH] make coefficients ints

---
 libImaging/Resample.c | 82 +++++++++++++++++++------------------------
 1 file changed, 37 insertions(+), 45 deletions(-)

diff --git a/libImaging/Resample.c b/libImaging/Resample.c
index 8e50c4b1b..0098ab492 100644
--- a/libImaging/Resample.c
+++ b/libImaging/Resample.c
@@ -38,8 +38,6 @@ static inline float lanczos_filter(float x)
     return 0.0;
 }
 
-static struct filter LANCZOS = { lanczos_filter, 3.0 };
-
 static inline float bilinear_filter(float x)
 {
     if (x < 0.0)
@@ -49,8 +47,6 @@ static inline float bilinear_filter(float x)
     return 0.0;
 }
 
-static struct filter BILINEAR = { bilinear_filter, 1.0 };
-
 static inline float bicubic_filter(float x)
 {
     /* https://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm */
@@ -65,45 +61,41 @@ static inline float bicubic_filter(float x)
 #undef a
 }
 
+static struct filter LANCZOS = { lanczos_filter, 3.0 };
+static struct filter BILINEAR = { bilinear_filter, 1.0 };
 static struct filter BICUBIC = { bicubic_filter, 2.0 };
 
 
-static inline UINT8 clip8(float in)
+
+/* 8 bits for result. Filter can have negative areas.
+   In one cases the sum of the coefficients will be negative,
+   in the other it will be more than 1.0. That is why we need
+   two extra bits for overflow and int type. */
+#define PRECISION_BITS (32 - 8 - 2)
+
+
+static inline UINT8 clip8(int in)
 {
-    int out = (int) in;
-    if (out >= 255)
+    if (in >= (1 << PRECISION_BITS << 8))
        return 255;
-    if (out <= 0)
+    if (in <= 0)
         return 0;
-    return (UINT8) out;
+    return (UINT8) (in >> PRECISION_BITS);
 }
 
 
-/* This is work around bug in GCC prior 4.9 in 64-bit mode.
-   GCC generates code with partial dependency which 3 times slower.
-   See: http://stackoverflow.com/a/26588074/253146 */
-#if defined(__x86_64__) && defined(__SSE__) &&  ! defined(__NO_INLINE__) && \
-    ! defined(__clang__) && defined(GCC_VERSION) && (GCC_VERSION < 40900)
-static float __attribute__((always_inline)) i2f(int v) {
-    float x;
-    __asm__("xorps %0, %0; cvtsi2ss %1, %0" : "=X"(x) : "r"(v) );
-    return x;
-}
-#else
-static float inline i2f(int v) { return (float) v; }
-#endif
-
-
 Imaging
 ImagingResampleHorizontal(Imaging imIn, int xsize, struct filter *filterp)
 {
     ImagingSectionCookie cookie;
     Imaging imOut;
     float support, scale, filterscale;
-    float center, ww, ss, ss0, ss1, ss2, ss3;
+    float center, ww, ss;
+    int ss0, ss1, ss2, ss3;
     int xx, yy, x, kmax, xmin, xmax;
     int *xbounds;
-    float *k, *kk, *kw;
+    int *k, *kk;
+    float *kw;
 
     /* prepare for horizontal stretch */
     filterscale = scale = (float) imIn->xsize / xsize;
@@ -118,7 +110,7 @@ ImagingResampleHorizontal(Imaging imIn, int xsize, struct filter *filterp)
     kmax = (int) ceil(support) * 2 + 1;
 
     // check for overflow
-    if (xsize > SIZE_MAX / (kmax * sizeof(float)))
+    if (xsize > SIZE_MAX / (kmax * sizeof(int)))
         return (Imaging) ImagingError_MemoryError();
 
     // sizeof(int) should be greater than 0 as well
@@ -126,7 +118,7 @@ ImagingResampleHorizontal(Imaging imIn, int xsize, struct filter *filterp)
         return (Imaging) ImagingError_MemoryError();
 
     /* coefficient buffer */
-    kk = malloc(xsize * kmax * sizeof(float));
+    kk = malloc(xsize * kmax * sizeof(int));
     if ( ! kk)
         return (Imaging) ImagingError_MemoryError();
 
@@ -162,7 +154,7 @@ ImagingResampleHorizontal(Imaging imIn, int xsize, struct filter *filterp)
         k = &kk[xx * kmax];
         for (x = 0; x < xmax - xmin; x++) {
             if (ww != 0.0)
-                k[x] = kw[x] / ww;
+                k[x] = (int) floor(0.5 + kw[x] / ww * (1 << PRECISION_BITS));
         }
         xbounds[xx * 2 + 0] = xmin;
         xbounds[xx * 2 + 1] = xmax;
@@ -186,10 +178,10 @@ ImagingResampleHorizontal(Imaging imIn, int xsize, struct filter *filterp)
                 xmin = xbounds[xx * 2 + 0];
                 xmax = xbounds[xx * 2 + 1];
                 k = &kk[xx * kmax];
-                ss = 0.5;
+                ss0 = 0;
                 for (x = xmin; x < xmax; x++)
-                    ss += i2f(imIn->image8[yy][x]) * k[x - xmin];
-                imOut->image8[yy][xx] = clip8(ss);
+                    ss0 += ((UINT8) imIn->image8[yy][x]) * k[x - xmin];
+                imOut->image8[yy][xx] = clip8(ss0);
             }
         } else {
             switch(imIn->type) {
@@ -200,10 +192,10 @@ ImagingResampleHorizontal(Imaging imIn, int xsize, struct filter *filterp)
                         xmin = xbounds[xx * 2 + 0];
                         xmax = xbounds[xx * 2 + 1];
                         k = &kk[xx * kmax];
-                        ss0 = ss1 = 0.5;
+                        ss0 = ss1 = 0;
                         for (x = xmin; x < xmax; x++) {
-                            ss0 += i2f((UINT8) imIn->image[yy][x*4 + 0]) * k[x - xmin];
-                            ss1 += i2f((UINT8) imIn->image[yy][x*4 + 3]) * k[x - xmin];
+                            ss0 += ((UINT8) imIn->image[yy][x*4 + 0]) * k[x - xmin];
+                            ss1 += ((UINT8) imIn->image[yy][x*4 + 3]) * k[x - xmin];
                         }
                         imOut->image[yy][xx*4 + 0] = clip8(ss0);
                         imOut->image[yy][xx*4 + 3] = clip8(ss1);
@@ -213,11 +205,11 @@ ImagingResampleHorizontal(Imaging imIn, int xsize, struct filter *filterp)
                         xmin = xbounds[xx * 2 + 0];
                         xmax = xbounds[xx * 2 + 1];
                         k = &kk[xx * kmax];
-                        ss0 = ss1 = ss2 = 0.5;
+                        ss0 = ss1 = ss2 = 0;
                         for (x = xmin; x < xmax; x++) {
-                            ss0 += i2f((UINT8) imIn->image[yy][x*4 + 0]) * k[x - xmin];
-                            ss1 += i2f((UINT8) imIn->image[yy][x*4 + 1]) * k[x - xmin];
-                            ss2 += i2f((UINT8) imIn->image[yy][x*4 + 2]) * k[x - xmin];
+                            ss0 += ((UINT8) imIn->image[yy][x*4 + 0]) * k[x - xmin];
+                            ss1 += ((UINT8) imIn->image[yy][x*4 + 1]) * k[x - xmin];
+                            ss2 += ((UINT8) imIn->image[yy][x*4 + 2]) * k[x - xmin];
                         }
                         imOut->image[yy][xx*4 + 0] = clip8(ss0);
                         imOut->image[yy][xx*4 + 1] = clip8(ss1);
@@ -228,12 +220,12 @@ ImagingResampleHorizontal(Imaging imIn, int xsize, struct filter *filterp)
                         xmin = xbounds[xx * 2 + 0];
                         xmax = xbounds[xx * 2 + 1];
                         k = &kk[xx * kmax];
-                        ss0 = ss1 = ss2 = ss3 = 0.5;
+                        ss0 = ss1 = ss2 = ss3 = 0;
                         for (x = xmin; x < xmax; x++) {
-                            ss0 += i2f((UINT8) imIn->image[yy][x*4 + 0]) * k[x - xmin];
-                            ss1 += i2f((UINT8) imIn->image[yy][x*4 + 1]) * k[x - xmin];
-                            ss2 += i2f((UINT8) imIn->image[yy][x*4 + 2]) * k[x - xmin];
-                            ss3 += i2f((UINT8) imIn->image[yy][x*4 + 3]) * k[x - xmin];
+                            ss0 += ((UINT8) imIn->image[yy][x*4 + 0]) * k[x - xmin];
+                            ss1 += ((UINT8) imIn->image[yy][x*4 + 1]) * k[x - xmin];
+                            ss2 += ((UINT8) imIn->image[yy][x*4 + 2]) * k[x - xmin];
+                            ss3 += ((UINT8) imIn->image[yy][x*4 + 3]) * k[x - xmin];
                         }
                         imOut->image[yy][xx*4 + 0] = clip8(ss0);
                         imOut->image[yy][xx*4 + 1] = clip8(ss1);
@@ -250,7 +242,7 @@ ImagingResampleHorizontal(Imaging imIn, int xsize, struct filter *filterp)
                     k = &kk[xx * kmax];
                     ss = 0.0;
                     for (x = xmin; x < xmax; x++)
-                        ss += i2f(IMAGING_PIXEL_I(imIn, x, yy)) * k[x - xmin];
+                        ss += (IMAGING_PIXEL_I(imIn, x, yy)) * k[x - xmin];
                     IMAGING_PIXEL_I(imOut, xx, yy) = (int) ss;
                 }
                 break;