SIMD Resample. fix wrong usage of xmax for division compensation

This commit is contained in:
Alexander 2019-01-20 15:27:30 +03:00 committed by Alexander Karpinsky
parent 247cba2e30
commit b3c28911c0
2 changed files with 8 additions and 7 deletions

View File

@ -231,10 +231,11 @@ ImagingResampleHorizontalConvolution8u(UINT32 *lineOut, UINT32 *lineIn,
#if defined(__AVX2__) #if defined(__AVX2__)
if (xmax < 8) { if (xmax < 8) {
sss = _mm_set1_epi32((1 << (coefs_precision -1)) + xmax / 2); sss = _mm_set1_epi32(1 << (coefs_precision-1));
} else { } else {
__m256i sss256 = _mm256_set1_epi32((1 << (coefs_precision -2)) + xmax / 4); // Lower part will be added to higher, use only half of the error
__m256i sss256 = _mm256_set1_epi32(1 << (coefs_precision-2));
for (; x < xmax - 7; x += 8) { for (; x < xmax - 7; x += 8) {
__m256i pix, mmk, source; __m256i pix, mmk, source;
@ -289,7 +290,7 @@ ImagingResampleHorizontalConvolution8u(UINT32 *lineOut, UINT32 *lineIn,
#else #else
sss = _mm_set1_epi32((1 << (coefs_precision -1)) + xmax / 2); sss = _mm_set1_epi32(1 << (coefs_precision-1));
for (; x < xmax - 7; x += 8) { for (; x < xmax - 7; x += 8) {
__m128i pix, mmk, source; __m128i pix, mmk, source;