mirror of
https://github.com/python-pillow/Pillow.git
synced 2025-08-20 20:24:45 +03:00
SIMD Resample. fix wrong usage of xmax for division compensation
This commit is contained in:
parent
247cba2e30
commit
b3c28911c0
|
@ -17,7 +17,7 @@ ImagingResampleHorizontalConvolution8u4x(
|
||||||
{
|
{
|
||||||
__m256i sss0, sss1;
|
__m256i sss0, sss1;
|
||||||
__m256i zero = _mm256_setzero_si256();
|
__m256i zero = _mm256_setzero_si256();
|
||||||
__m256i initial = _mm256_set1_epi32(1 << (coefs_precision -1));
|
__m256i initial = _mm256_set1_epi32(1 << (coefs_precision-1));
|
||||||
sss0 = initial;
|
sss0 = initial;
|
||||||
sss1 = initial;
|
sss1 = initial;
|
||||||
|
|
||||||
|
@ -106,7 +106,7 @@ ImagingResampleHorizontalConvolution8u4x(
|
||||||
#else
|
#else
|
||||||
{
|
{
|
||||||
__m128i sss0, sss1, sss2, sss3;
|
__m128i sss0, sss1, sss2, sss3;
|
||||||
__m128i initial = _mm_set1_epi32(1 << (coefs_precision -1));
|
__m128i initial = _mm_set1_epi32(1 << (coefs_precision-1));
|
||||||
sss0 = initial;
|
sss0 = initial;
|
||||||
sss1 = initial;
|
sss1 = initial;
|
||||||
sss2 = initial;
|
sss2 = initial;
|
||||||
|
@ -231,10 +231,11 @@ ImagingResampleHorizontalConvolution8u(UINT32 *lineOut, UINT32 *lineIn,
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
|
|
||||||
if (xmax < 8) {
|
if (xmax < 8) {
|
||||||
sss = _mm_set1_epi32((1 << (coefs_precision -1)) + xmax / 2);
|
sss = _mm_set1_epi32(1 << (coefs_precision-1));
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
__m256i sss256 = _mm256_set1_epi32((1 << (coefs_precision -2)) + xmax / 4);
|
// Lower part will be added to higher, use only half of the error
|
||||||
|
__m256i sss256 = _mm256_set1_epi32(1 << (coefs_precision-2));
|
||||||
|
|
||||||
for (; x < xmax - 7; x += 8) {
|
for (; x < xmax - 7; x += 8) {
|
||||||
__m256i pix, mmk, source;
|
__m256i pix, mmk, source;
|
||||||
|
@ -289,7 +290,7 @@ ImagingResampleHorizontalConvolution8u(UINT32 *lineOut, UINT32 *lineIn,
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
sss = _mm_set1_epi32((1 << (coefs_precision -1)) + xmax / 2);
|
sss = _mm_set1_epi32(1 << (coefs_precision-1));
|
||||||
|
|
||||||
for (; x < xmax - 7; x += 8) {
|
for (; x < xmax - 7; x += 8) {
|
||||||
__m128i pix, mmk, source;
|
__m128i pix, mmk, source;
|
||||||
|
|
|
@ -7,11 +7,11 @@ ImagingResampleVerticalConvolution8u(UINT32 *lineOut, Imaging imIn,
|
||||||
int xx = 0;
|
int xx = 0;
|
||||||
int xsize = imIn->xsize;
|
int xsize = imIn->xsize;
|
||||||
|
|
||||||
__m128i initial = _mm_set1_epi32(1 << (coefs_precision -1));
|
__m128i initial = _mm_set1_epi32(1 << (coefs_precision-1));
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
|
|
||||||
__m256i initial_256 = _mm256_set1_epi32(1 << (coefs_precision -1));
|
__m256i initial_256 = _mm256_set1_epi32(1 << (coefs_precision-1));
|
||||||
|
|
||||||
for (; xx < xsize - 7; xx += 8) {
|
for (; xx < xsize - 7; xx += 8) {
|
||||||
__m256i sss0 = initial_256;
|
__m256i sss0 = initial_256;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user