Merge pull request #2732 from uploadcare/filter-cleanup

Cleanup Filter.c
This commit is contained in:
Alexander Karpinsky 2017-09-14 14:04:03 +03:00 committed by GitHub
commit 51cee56245
2 changed files with 23 additions and 30 deletions

View File

@ -27,13 +27,6 @@
#include "Imaging.h" #include "Imaging.h"
#ifdef WORDS_BIGENDIAN
#define MAKE_UINT32(u0, u1, u2, u3) (u3 | (u2<<8) | (u1<<16) | (u0<<24))
#else
#define MAKE_UINT32(u0, u1, u2, u3) (u0 | (u1<<8) | (u2<<16) | (u3<<24))
#endif
static inline UINT8 clip8(float in) static inline UINT8 clip8(float in)
{ {
if (in <= 0.0) if (in <= 0.0)
@ -91,29 +84,14 @@ ImagingExpand(Imaging imIn, int xmargin, int ymargin, int mode)
} }
/* This is work around bug in GCC prior 4.9 in 64 bit mode.
GCC generates code with partial dependency which 3 times slower.
See: http://stackoverflow.com/a/26588074/253146 */
#if defined(__x86_64__) && defined(__SSE__) && ! defined(__NO_INLINE__) && \
! defined(__clang__) && defined(GCC_VERSION) && (GCC_VERSION < 40900)
static float __attribute__((always_inline)) inline i2f(int v) {
float x;
__asm__("xorps %0, %0; cvtsi2ss %1, %0" : "=X"(x) : "r"(v) );
return x;
}
#else
static float inline i2f(int v) { return (float) v; }
#endif
void void
ImagingFilter3x3(Imaging imOut, Imaging im, const float* kernel, ImagingFilter3x3(Imaging imOut, Imaging im, const float* kernel,
float offset) float offset)
{ {
#define KERNEL1x3(in0, x, kernel, d) ( \ #define KERNEL1x3(in0, x, kernel, d) ( \
i2f((UINT8) in0[x-d]) * (kernel)[0] + \ _i2f((UINT8) in0[x-d]) * (kernel)[0] + \
i2f((UINT8) in0[x]) * (kernel)[1] + \ _i2f((UINT8) in0[x]) * (kernel)[1] + \
i2f((UINT8) in0[x+d]) * (kernel)[2]) _i2f((UINT8) in0[x+d]) * (kernel)[2])
int x = 0, y = 0; int x = 0, y = 0;
@ -210,11 +188,11 @@ ImagingFilter5x5(Imaging imOut, Imaging im, const float* kernel,
float offset) float offset)
{ {
#define KERNEL1x5(in0, x, kernel, d) ( \ #define KERNEL1x5(in0, x, kernel, d) ( \
i2f((UINT8) in0[x-d-d]) * (kernel)[0] + \ _i2f((UINT8) in0[x-d-d]) * (kernel)[0] + \
i2f((UINT8) in0[x-d]) * (kernel)[1] + \ _i2f((UINT8) in0[x-d]) * (kernel)[1] + \
i2f((UINT8) in0[x]) * (kernel)[2] + \ _i2f((UINT8) in0[x]) * (kernel)[2] + \
i2f((UINT8) in0[x+d]) * (kernel)[3] + \ _i2f((UINT8) in0[x+d]) * (kernel)[3] + \
i2f((UINT8) in0[x+d+d]) * (kernel)[4]) _i2f((UINT8) in0[x+d+d]) * (kernel)[4])
int x = 0, y = 0; int x = 0, y = 0;

View File

@ -28,3 +28,18 @@
#define PREBLEND(mask, in1, in2, tmp1)\ #define PREBLEND(mask, in1, in2, tmp1)\
(MULDIV255(in1, (255 - mask), tmp1) + in2) (MULDIV255(in1, (255 - mask), tmp1) + in2)
/* This is to work around a bug in GCC prior 4.9 in 64 bit mode.
GCC generates code with partial dependency which is 3 times slower.
See: http://stackoverflow.com/a/26588074/253146 */
#if defined(__x86_64__) && defined(__SSE__) && ! defined(__NO_INLINE__) && \
! defined(__clang__) && defined(GCC_VERSION) && (GCC_VERSION < 40900)
static float __attribute__((always_inline)) inline _i2f(int v) {
float x;
__asm__("xorps %0, %0; cvtsi2ss %1, %0" : "=X"(x) : "r"(v) );
return x;
}
#else
static float inline _i2f(int v) { return (float) v; }
#endif