mirror of
https://github.com/python-pillow/Pillow.git
synced 2025-08-25 22:54:46 +03:00
fix offset
This commit is contained in:
parent
db69139906
commit
98bed5abae
|
@ -29,22 +29,12 @@
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#include <mmintrin.h>
|
#include <mmintrin.h>
|
||||||
#include <smmintrin.h>
|
#include <smmintrin.h>
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static inline UINT8 clip8(float in)
|
|
||||||
{
|
|
||||||
if (in <= 0.0)
|
|
||||||
return 0;
|
|
||||||
if (in >= 255.0)
|
|
||||||
return 255;
|
|
||||||
return (UINT8) in;
|
|
||||||
}
|
|
||||||
|
|
||||||
Imaging
|
Imaging
|
||||||
ImagingExpand(Imaging imIn, int xmargin, int ymargin, int mode)
|
ImagingExpand(Imaging imIn, int xmargin, int ymargin, int mode)
|
||||||
{
|
{
|
||||||
|
@ -471,8 +461,6 @@ ImagingFilter5x5(Imaging imOut, Imaging im, const float* kernel,
|
||||||
memcpy(imOut->image[0], im->image[0], im->linesize);
|
memcpy(imOut->image[0], im->image[0], im->linesize);
|
||||||
memcpy(imOut->image[1], im->image[1], im->linesize);
|
memcpy(imOut->image[1], im->image[1], im->linesize);
|
||||||
if (im->bands == 1) {
|
if (im->bands == 1) {
|
||||||
// Add one time for rounding
|
|
||||||
offset += 0.5;
|
|
||||||
for (y = 2; y < im->ysize-2; y++) {
|
for (y = 2; y < im->ysize-2; y++) {
|
||||||
UINT8* in_2 = (UINT8*) im->image[y-2];
|
UINT8* in_2 = (UINT8*) im->image[y-2];
|
||||||
UINT8* in_1 = (UINT8*) im->image[y-1];
|
UINT8* in_1 = (UINT8*) im->image[y-1];
|
||||||
|
@ -533,14 +521,31 @@ ImagingFilter5x5(Imaging imOut, Imaging im, const float* kernel,
|
||||||
ssi0 = _mm_packus_epi16(ssi0, ssi0);
|
ssi0 = _mm_packus_epi16(ssi0, ssi0);
|
||||||
*((UINT32*) &out[x]) = _mm_cvtsi128_si32(ssi0);
|
*((UINT32*) &out[x]) = _mm_cvtsi128_si32(ssi0);
|
||||||
}
|
}
|
||||||
for (; x < im->xsize-2; x++) {
|
for (; x < im->xsize-2; x ++) {
|
||||||
float ss = offset;
|
__m128 ss0;
|
||||||
ss += KERNEL1x5(in2, x, &kernel[0], 1);
|
__m128i ssi0;
|
||||||
ss += KERNEL1x5(in1, x, &kernel[5], 1);
|
__m128 pix00, pix10, pix20, pix30, pix40;
|
||||||
ss += KERNEL1x5(in0, x, &kernel[10], 1);
|
|
||||||
ss += KERNEL1x5(in_1, x, &kernel[15], 1);
|
MM_KERNEL1x5_LOAD(0, x-2);
|
||||||
ss += KERNEL1x5(in_2, x, &kernel[20], 1);
|
|
||||||
out[x] = clip8(ss);
|
ss0 = _mm_setzero_ps();
|
||||||
|
ss0 = _mm_add_ps(ss0, _mm_mul_ps(pix00, kernel01));
|
||||||
|
ss0 = _mm_add_ps(ss0, _mm_mul_ps(pix10, kernel11));
|
||||||
|
ss0 = _mm_add_ps(ss0, _mm_mul_ps(pix20, kernel21));
|
||||||
|
ss0 = _mm_add_ps(ss0, _mm_mul_ps(pix30, kernel31));
|
||||||
|
ss0 = _mm_add_ps(ss0, _mm_mul_ps(pix40, kernel41));
|
||||||
|
ss0 = _mm_add_ps(ss0, _mm_mul_ps(
|
||||||
|
_mm_set_ps(in_1[x+2], in0[x+2], in1[x+2], in2[x+2]),
|
||||||
|
kernelx1));
|
||||||
|
|
||||||
|
ss0 = _mm_add_ps(ss0, _mm_set_ps(
|
||||||
|
0, 0, offset, kernel[24] * in_2[x+2]));
|
||||||
|
ss0 = _mm_hadd_ps(ss0, ss0);
|
||||||
|
ss0 = _mm_hadd_ps(ss0, ss0);
|
||||||
|
ssi0 = _mm_cvtps_epi32(ss0);
|
||||||
|
ssi0 = _mm_packs_epi32(ssi0, ssi0);
|
||||||
|
ssi0 = _mm_packus_epi16(ssi0, ssi0);
|
||||||
|
out[x] = _mm_cvtsi128_si32(ssi0);
|
||||||
}
|
}
|
||||||
out[x+0] = in0[x+0];
|
out[x+0] = in0[x+0];
|
||||||
out[x+1] = in0[x+1];
|
out[x+1] = in0[x+1];
|
||||||
|
|
Loading…
Reference in New Issue
Block a user