From 8a52520e6629220d8a60c1ce37c6d367b00ff6cb Mon Sep 17 00:00:00 2001 From: Alexander Date: Tue, 27 Mar 2018 13:22:37 +0300 Subject: [PATCH] SIMD ColorLUT. AVX2 implementation (near the same speed) --- src/libImaging/ColorLUT.c | 75 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/src/libImaging/ColorLUT.c b/src/libImaging/ColorLUT.c index 266c3803b..938653171 100644 --- a/src/libImaging/ColorLUT.c +++ b/src/libImaging/ColorLUT.c @@ -58,8 +58,17 @@ ImagingColorLUT3D_linear( __m128i scale_mask = _mm_set1_epi32(SCALE_MASK); __m128i index_mul = _mm_set_epi32(0, size1D_2D*table_channels, size1D*table_channels, table_channels); +#if defined(__AVX2__) + __m256i shuffle3 = _mm256_set_epi8( + -1,-1, -1,-1, 11,10, 5,4, 9,8, 3,2, 7,6, 1,0, + -1,-1, -1,-1, 11,10, 5,4, 9,8, 3,2, 7,6, 1,0); + __m256i shuffle4 = _mm256_set_epi8( + 15,14, 7,6, 13,12, 5,4, 11,10, 3,2, 9,8, 1,0, + 15,14, 7,6, 13,12, 5,4, 11,10, 3,2, 9,8, 1,0); +#else __m128i shuffle3 = _mm_set_epi8(-1,-1, -1,-1, 11,10, 5,4, 9,8, 3,2, 7,6, 1,0); __m128i shuffle4 = _mm_set_epi8(15,14, 7,6, 13,12, 5,4, 11,10, 3,2, 9,8, 1,0); +#endif int x, y; ImagingSectionCookie cookie; @@ -97,19 +106,57 @@ ImagingColorLUT3D_linear( _mm_setzero_si128()), _mm_setzero_si128())); __m128i shift = _mm_srli_epi32( _mm_and_si128(scale_mask, index), (SCALE_BITS - SHIFT_BITS)); + + #if defined(__AVX2__) + __m256i shift1D, shift2D; + __m128i shift3D, result; + __m256i source, left, right; + #else __m128i shift1D, shift2D, shift3D; __m128i source, left, right, result; __m128i leftleft, leftright, rightleft, rightright; + #endif shift = _mm_or_si128( _mm_sub_epi32(_mm_set1_epi32((1<