mirror of
https://github.com/python-pillow/Pillow.git
synced 2025-08-21 04:34:47 +03:00
SIMD Filter. 3x3 SSE4 singleband
This commit is contained in:
parent
4da1f70050
commit
64ab983164
|
@ -104,11 +104,9 @@ ImagingExpand(Imaging imIn, int xmargin, int ymargin, int mode) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
ImagingFilter3x3(Imaging imOut, Imaging im, const float *kernel, float offset) {
|
ImagingFilter3x3(Imaging imOut, Imaging im, const float* kernel,
|
||||||
#define KERNEL1x3(in0, x, kernel, d) \
|
float offset)
|
||||||
(_i2f((UINT8)in0[x - d]) * (kernel)[0] + _i2f((UINT8)in0[x]) * (kernel)[1] + \
|
{
|
||||||
_i2f((UINT8)in0[x + d]) * (kernel)[2])
|
|
||||||
|
|
||||||
#define MM_KERNEL1x3_LOAD(row, x) \
|
#define MM_KERNEL1x3_LOAD(row, x) \
|
||||||
pix0##row = _mm_cvtepi32_ps(_mm_cvtepu8_epi32(*(__m128i*) &in1[x])); \
|
pix0##row = _mm_cvtepi32_ps(_mm_cvtepu8_epi32(*(__m128i*) &in1[x])); \
|
||||||
pix1##row = _mm_cvtepi32_ps(_mm_cvtepu8_epi32(*(__m128i*) &in0[x])); \
|
pix1##row = _mm_cvtepi32_ps(_mm_cvtepu8_epi32(*(__m128i*) &in0[x])); \
|
||||||
|
@ -124,20 +122,27 @@ ImagingFilter3x3(Imaging imOut, Imaging im, const float *kernel, float offset) {
|
||||||
memcpy(imOut->image[0], im->image[0], im->linesize);
|
memcpy(imOut->image[0], im->image[0], im->linesize);
|
||||||
if (im->bands == 1) {
|
if (im->bands == 1) {
|
||||||
// Add one time for rounding
|
// Add one time for rounding
|
||||||
offset += 0.5;
|
offset += 0.5 / 4;
|
||||||
for (y = 1; y < im->ysize - 1; y++) {
|
for (y = 1; y < im->ysize-1; y++) {
|
||||||
UINT8 *in_1 = (UINT8 *)im->image[y - 1];
|
UINT8* in_1 = (UINT8*) im->image[y-1];
|
||||||
UINT8 *in0 = (UINT8 *)im->image[y];
|
UINT8* in0 = (UINT8*) im->image[y];
|
||||||
UINT8 *in1 = (UINT8 *)im->image[y + 1];
|
UINT8* in1 = (UINT8*) im->image[y+1];
|
||||||
UINT8 *out = (UINT8 *)imOut->image[y];
|
UINT8* out = (UINT8*) imOut->image[y];
|
||||||
|
__m128 kernel0 = _mm_set_ps(0, kernel[2], kernel[1], kernel[0]);
|
||||||
|
__m128 kernel1 = _mm_set_ps(0, kernel[5], kernel[4], kernel[3]);
|
||||||
|
__m128 kernel2 = _mm_set_ps(0, kernel[8], kernel[7], kernel[6]);
|
||||||
|
|
||||||
out[0] = in0[0];
|
out[0] = in0[0];
|
||||||
for (x = 1; x < im->xsize - 1; x++) {
|
for (x = 1; x < im->xsize-1; x++) {
|
||||||
float ss = offset;
|
__m128 ss = _mm_set1_ps(offset);;
|
||||||
ss += KERNEL1x3(in1, x, &kernel[0], 1);
|
__m128 pix00, pix10, pix20;
|
||||||
ss += KERNEL1x3(in0, x, &kernel[3], 1);
|
MM_KERNEL1x3_LOAD(0, x-1);
|
||||||
ss += KERNEL1x3(in_1, x, &kernel[6], 1);
|
ss = _mm_add_ps(ss, _mm_mul_ps(pix00, kernel0));
|
||||||
out[x] = clip8(ss);
|
ss = _mm_add_ps(ss, _mm_mul_ps(pix10, kernel1));
|
||||||
|
ss = _mm_add_ps(ss, _mm_mul_ps(pix20, kernel2));
|
||||||
|
ss = _mm_hadd_ps(ss, ss);
|
||||||
|
ss = _mm_hadd_ps(ss, ss);
|
||||||
|
out[x] = clip8(_mm_cvtss_f32(ss));
|
||||||
}
|
}
|
||||||
out[x] = in0[x];
|
out[x] = in0[x];
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user