pure FPI implementation

This commit is contained in:
Alexander 2018-03-26 17:23:14 +03:00
parent 23827d5250
commit 3a5f0201f5

View File

@ -8,6 +8,14 @@
#define PRECISION_BITS (16 - 8 - 2) #define PRECISION_BITS (16 - 8 - 2)
#define PRECISION_ROUNDING (1<<(PRECISION_BITS-1)) #define PRECISION_ROUNDING (1<<(PRECISION_BITS-1))
/* 8 — scales are multiplyed on byte.
6 max index in the table (size is 65, but index 64 is not reachable) */
#define SCALE_BITS (32 - 8 - 6)
#define SCALE_MASK ((1 << SCALE_BITS) - 1)
#define SHIFT_BITS (16 - 1)
#define SHIFT_ROUNDING (1<<(SHIFT_BITS-1))
UINT8 _lookups2[1024] = { UINT8 _lookups2[1024] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -88,18 +96,18 @@ static inline UINT8 clip8(int in)
static inline void static inline void
interpolate3(INT16 out[3], const INT16 a[3], const INT16 b[3], INT16 shift) interpolate3(INT16 out[3], const INT16 a[3], const INT16 b[3], INT16 shift)
{ {
out[0] = (a[0] * (0x1000-shift) + b[0] * shift + (1<<11)) >> 12; out[0] = (a[0] * ((1<<SHIFT_BITS)-shift) + b[0] * shift + SHIFT_ROUNDING) >> SHIFT_BITS;
out[1] = (a[1] * (0x1000-shift) + b[1] * shift + (1<<11)) >> 12; out[1] = (a[1] * ((1<<SHIFT_BITS)-shift) + b[1] * shift + SHIFT_ROUNDING) >> SHIFT_BITS;
out[2] = (a[2] * (0x1000-shift) + b[2] * shift + (1<<11)) >> 12; out[2] = (a[2] * ((1<<SHIFT_BITS)-shift) + b[2] * shift + SHIFT_ROUNDING) >> SHIFT_BITS;
} }
static inline void static inline void
interpolate4(INT16 out[4], const INT16 a[4], const INT16 b[4], INT16 shift) interpolate4(INT16 out[4], const INT16 a[4], const INT16 b[4], INT16 shift)
{ {
out[0] = (a[0] * (0x1000-shift) + b[0] * shift + (1<<11)) >> 12; out[0] = (a[0] * ((1<<SHIFT_BITS)-shift) + b[0] * shift + SHIFT_ROUNDING) >> SHIFT_BITS;
out[1] = (a[1] * (0x1000-shift) + b[1] * shift + (1<<11)) >> 12; out[1] = (a[1] * ((1<<SHIFT_BITS)-shift) + b[1] * shift + SHIFT_ROUNDING) >> SHIFT_BITS;
out[2] = (a[2] * (0x1000-shift) + b[2] * shift + (1<<11)) >> 12; out[2] = (a[2] * ((1<<SHIFT_BITS)-shift) + b[2] * shift + SHIFT_ROUNDING) >> SHIFT_BITS;
out[3] = (a[3] * (0x1000-shift) + b[3] * shift + (1<<11)) >> 12; out[3] = (a[3] * ((1<<SHIFT_BITS)-shift) + b[3] * shift + SHIFT_ROUNDING) >> SHIFT_BITS;
} }
static inline int static inline int
@ -143,9 +151,9 @@ ImagingColorLUT3D_linear(Imaging imOut, Imaging imIn, int table_channels,
+1 cells will be outside of the table. +1 cells will be outside of the table.
With this compensation we never hit the upper cells With this compensation we never hit the upper cells
but this also doesn't introduce any noticable difference. */ but this also doesn't introduce any noticable difference. */
float scale1D = (size1D - 1) / (255.0002); UINT32 scale1D = (size1D - 1) / 255.0 * (1<<SCALE_BITS);
float scale2D = (size2D - 1) / (255.0002); UINT32 scale2D = (size2D - 1) / 255.0 * (1<<SCALE_BITS);
float scale3D = (size3D - 1) / (255.0002); UINT32 scale3D = (size3D - 1) / 255.0 * (1<<SCALE_BITS);
int size1D_2D = size1D * size2D; int size1D_2D = size1D * size2D;
int x, y; int x, y;
@ -169,18 +177,19 @@ ImagingColorLUT3D_linear(Imaging imOut, Imaging imIn, int table_channels,
for (y = 0; y < imOut->ysize; y++) { for (y = 0; y < imOut->ysize; y++) {
UINT8* rowIn = (UINT8 *)imIn->image[y]; UINT8* rowIn = (UINT8 *)imIn->image[y];
UINT8* rowOut = (UINT8 *)imOut->image[y]; UINT32* rowOut = (UINT32 *)imOut->image[y];
for (x = 0; x < imOut->xsize; x++) { for (x = 0; x < imOut->xsize; x++) {
float scaled1D = rowIn[x*4 + 0] * scale1D; UINT32 index1D = rowIn[x*4 + 0] * scale1D;
float scaled2D = rowIn[x*4 + 1] * scale2D; UINT32 index2D = rowIn[x*4 + 1] * scale2D;
float scaled3D = rowIn[x*4 + 2] * scale3D; UINT32 index3D = rowIn[x*4 + 2] * scale3D;
int index1D = (int) scaled1D; INT16 shift1D = (SCALE_MASK & index1D) >> (SCALE_BITS - SHIFT_BITS);
int index2D = (int) scaled2D; INT16 shift2D = (SCALE_MASK & index2D) >> (SCALE_BITS - SHIFT_BITS);
int index3D = (int) scaled3D; INT16 shift3D = (SCALE_MASK & index3D) >> (SCALE_BITS - SHIFT_BITS);
INT16 shift1D = (scaled1D - index1D) * 0x1000 + 0.5; int idx = table3D_index3(
INT16 shift2D = (scaled2D - index2D) * 0x1000 + 0.5; index1D >> SCALE_BITS,
INT16 shift3D = (scaled3D - index3D) * 0x1000 + 0.5; index2D >> SCALE_BITS,
int idx = table3D_index3(index1D, index2D, index3D, size1D, size1D_2D); index3D >> SCALE_BITS,
size1D, size1D_2D);
INT16 result[4], left[4], right[4]; INT16 result[4], left[4], right[4];
INT16 leftleft[4], leftright[4], rightleft[4], rightright[4]; INT16 leftleft[4], leftright[4], rightleft[4], rightright[4];
@ -198,10 +207,9 @@ ImagingColorLUT3D_linear(Imaging imOut, Imaging imIn, int table_channels,
interpolate3(result, left, right, shift3D); interpolate3(result, left, right, shift3D);
rowOut[x*4 + 0] = clip8(result[0]); rowOut[x] = MAKE_UINT32(
rowOut[x*4 + 1] = clip8(result[1]); clip8(result[0]), clip8(result[1]),
rowOut[x*4 + 2] = clip8(result[2]); clip8(result[2]), rowIn[x*4 + 3]);
rowOut[x*4 + 3] = rowIn[x*4 + 3];
} }
if (table_channels == 4) { if (table_channels == 4) {
@ -218,10 +226,9 @@ ImagingColorLUT3D_linear(Imaging imOut, Imaging imIn, int table_channels,
interpolate4(result, left, right, shift3D); interpolate4(result, left, right, shift3D);
rowOut[x*4 + 0] = clip8(result[0]); rowOut[x] = MAKE_UINT32(
rowOut[x*4 + 1] = clip8(result[1]); clip8(result[0]), clip8(result[1]),
rowOut[x*4 + 2] = clip8(result[2]); clip8(result[2]), clip8(result[3]));
rowOut[x*4 + 3] = clip8(result[3]);
} }
} }
} }