mirror of
https://github.com/python-pillow/Pillow.git
synced 2025-08-20 20:24:45 +03:00
SIMD ColorLUT. improve performance by preliminary index calculation
This commit is contained in:
parent
f50dba4aad
commit
c7e3b0ead9
|
@ -104,17 +104,23 @@ ImagingColorLUT3D_linear(
|
||||||
|
|
||||||
ImagingSectionEnter(&cookie);
|
ImagingSectionEnter(&cookie);
|
||||||
for (y = 0; y < imOut->ysize; y++) {
|
for (y = 0; y < imOut->ysize; y++) {
|
||||||
UINT8 *rowIn = (UINT8 *)imIn->image[y];
|
UINT8* rowIn = (UINT8 *)imIn->image[y];
|
||||||
char *rowOut = (char *)imOut->image[y];
|
UINT32* rowOut = (UINT32 *)imOut->image[y];
|
||||||
for (x = 0; x < imOut->xsize; x++) {
|
|
||||||
__m128i index = _mm_mullo_epi32(scale,
|
__m128i index = _mm_mullo_epi32(scale,
|
||||||
_mm_cvtepu8_epi32(*(__m128i *) &rowIn[x*4]));
|
_mm_cvtepu8_epi32(*(__m128i *) &rowIn[0]));
|
||||||
__m128i shift = _mm_srli_epi32(
|
|
||||||
_mm_and_si128(scale_mask, index), (SCALE_BITS - SHIFT_BITS));
|
|
||||||
int idx = table_channels * _mm_extract_epi32(
|
int idx = table_channels * _mm_extract_epi32(
|
||||||
_mm_hadd_epi32(_mm_hadd_epi32(
|
_mm_hadd_epi32(_mm_hadd_epi32(
|
||||||
_mm_madd_epi16(index_mul, _mm_srli_epi32(index, SCALE_BITS)),
|
_mm_madd_epi16(index_mul, _mm_srli_epi32(index, SCALE_BITS)),
|
||||||
_mm_setzero_si128()), _mm_setzero_si128()), 0);
|
_mm_setzero_si128()), _mm_setzero_si128()), 0);
|
||||||
|
for (x = 0; x < imOut->xsize; x++) {
|
||||||
|
__m128i next_index = _mm_mullo_epi32(scale,
|
||||||
|
_mm_cvtepu8_epi32(*(__m128i *) &rowIn[x*4 + 4]));
|
||||||
|
int next_idx = table_channels * _mm_extract_epi32(
|
||||||
|
_mm_hadd_epi32(_mm_hadd_epi32(
|
||||||
|
_mm_madd_epi16(index_mul, _mm_srli_epi32(next_index, SCALE_BITS)),
|
||||||
|
_mm_setzero_si128()), _mm_setzero_si128()), 0);
|
||||||
|
__m128i shift = _mm_srli_epi32(
|
||||||
|
_mm_and_si128(scale_mask, index), (SCALE_BITS - SHIFT_BITS));
|
||||||
__m128i shift1D, shift2D, shift3D;
|
__m128i shift1D, shift2D, shift3D;
|
||||||
__m128i source, left, right, result;
|
__m128i source, left, right, result;
|
||||||
__m128i leftleft, leftright, rightleft, rightright;
|
__m128i leftleft, leftright, rightleft, rightright;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user