mirror of
https://github.com/python-pillow/Pillow.git
synced 2025-08-20 20:24:45 +03:00
SIMD Bands
This commit is contained in:
parent
8c3d6a32d1
commit
e4ca18a9c8
|
@ -17,6 +17,16 @@
|
||||||
|
|
||||||
#include "Imaging.h"
|
#include "Imaging.h"
|
||||||
|
|
||||||
|
#include <emmintrin.h>
|
||||||
|
#include <mmintrin.h>
|
||||||
|
#include <smmintrin.h>
|
||||||
|
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
#include <immintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Imaging
|
Imaging
|
||||||
ImagingGetBand(Imaging imIn, int band) {
|
ImagingGetBand(Imaging imIn, int band) {
|
||||||
Imaging imOut;
|
Imaging imOut;
|
||||||
|
@ -52,8 +62,10 @@ ImagingGetBand(Imaging imIn, int band) {
|
||||||
UINT8 *out = imOut->image8[y];
|
UINT8 *out = imOut->image8[y];
|
||||||
x = 0;
|
x = 0;
|
||||||
for (; x < imIn->xsize - 3; x += 4) {
|
for (; x < imIn->xsize - 3; x += 4) {
|
||||||
UINT32 v = MAKE_UINT32(in[0], in[4], in[8], in[12]);
|
__m128i source = _mm_loadu_si128((__m128i *) in);
|
||||||
memcpy(out + x, &v, sizeof(v));
|
*((UINT32*) (out + x)) = _mm_cvtsi128_si32(
|
||||||
|
_mm_shuffle_epi8(source, _mm_set_epi8(
|
||||||
|
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 12,8,4,0)));
|
||||||
in += 16;
|
in += 16;
|
||||||
}
|
}
|
||||||
for (; x < imIn->xsize; x++) {
|
for (; x < imIn->xsize; x++) {
|
||||||
|
@ -99,10 +111,12 @@ ImagingSplit(Imaging imIn, Imaging bands[4]) {
|
||||||
UINT8 *out1 = bands[1]->image8[y];
|
UINT8 *out1 = bands[1]->image8[y];
|
||||||
x = 0;
|
x = 0;
|
||||||
for (; x < imIn->xsize - 3; x += 4) {
|
for (; x < imIn->xsize - 3; x += 4) {
|
||||||
UINT32 v = MAKE_UINT32(in[0], in[4], in[8], in[12]);
|
__m128i source = _mm_loadu_si128((__m128i *) in);
|
||||||
memcpy(out0 + x, &v, sizeof(v));
|
source = _mm_shuffle_epi8(source, _mm_set_epi8(
|
||||||
v = MAKE_UINT32(in[0 + 3], in[4 + 3], in[8 + 3], in[12 + 3]);
|
15,11,7,3, 14,10,6,2, 13,9,5,1, 12,8,4,0));
|
||||||
memcpy(out1 + x, &v, sizeof(v));
|
*((UINT32*) (out0 + x)) = _mm_cvtsi128_si32(source);
|
||||||
|
*((UINT32*) (out1 + x)) = _mm_cvtsi128_si32(
|
||||||
|
_mm_srli_si128(source, 12));
|
||||||
in += 16;
|
in += 16;
|
||||||
}
|
}
|
||||||
for (; x < imIn->xsize; x++) {
|
for (; x < imIn->xsize; x++) {
|
||||||
|
@ -119,12 +133,14 @@ ImagingSplit(Imaging imIn, Imaging bands[4]) {
|
||||||
UINT8 *out2 = bands[2]->image8[y];
|
UINT8 *out2 = bands[2]->image8[y];
|
||||||
x = 0;
|
x = 0;
|
||||||
for (; x < imIn->xsize - 3; x += 4) {
|
for (; x < imIn->xsize - 3; x += 4) {
|
||||||
UINT32 v = MAKE_UINT32(in[0], in[4], in[8], in[12]);
|
__m128i source = _mm_loadu_si128((__m128i *) in);
|
||||||
memcpy(out0 + x, &v, sizeof(v));
|
source = _mm_shuffle_epi8(source, _mm_set_epi8(
|
||||||
v = MAKE_UINT32(in[0 + 1], in[4 + 1], in[8 + 1], in[12 + 1]);
|
15,11,7,3, 14,10,6,2, 13,9,5,1, 12,8,4,0));
|
||||||
memcpy(out1 + x, &v, sizeof(v));
|
*((UINT32*) (out0 + x)) = _mm_cvtsi128_si32(source);
|
||||||
v = MAKE_UINT32(in[0 + 2], in[4 + 2], in[8 + 2], in[12 + 2]);
|
*((UINT32*) (out1 + x)) = _mm_cvtsi128_si32(
|
||||||
memcpy(out2 + x, &v, sizeof(v));
|
_mm_srli_si128(source, 4));
|
||||||
|
*((UINT32*) (out2 + x)) = _mm_cvtsi128_si32(
|
||||||
|
_mm_srli_si128(source, 8));
|
||||||
in += 16;
|
in += 16;
|
||||||
}
|
}
|
||||||
for (; x < imIn->xsize; x++) {
|
for (; x < imIn->xsize; x++) {
|
||||||
|
@ -143,14 +159,16 @@ ImagingSplit(Imaging imIn, Imaging bands[4]) {
|
||||||
UINT8 *out3 = bands[3]->image8[y];
|
UINT8 *out3 = bands[3]->image8[y];
|
||||||
x = 0;
|
x = 0;
|
||||||
for (; x < imIn->xsize - 3; x += 4) {
|
for (; x < imIn->xsize - 3; x += 4) {
|
||||||
UINT32 v = MAKE_UINT32(in[0], in[4], in[8], in[12]);
|
__m128i source = _mm_loadu_si128((__m128i *) in);
|
||||||
memcpy(out0 + x, &v, sizeof(v));
|
source = _mm_shuffle_epi8(source, _mm_set_epi8(
|
||||||
v = MAKE_UINT32(in[0 + 1], in[4 + 1], in[8 + 1], in[12 + 1]);
|
15,11,7,3, 14,10,6,2, 13,9,5,1, 12,8,4,0));
|
||||||
memcpy(out1 + x, &v, sizeof(v));
|
*((UINT32*) (out0 + x)) = _mm_cvtsi128_si32(source);
|
||||||
v = MAKE_UINT32(in[0 + 2], in[4 + 2], in[8 + 2], in[12 + 2]);
|
*((UINT32*) (out1 + x)) = _mm_cvtsi128_si32(
|
||||||
memcpy(out2 + x, &v, sizeof(v));
|
_mm_srli_si128(source, 4));
|
||||||
v = MAKE_UINT32(in[0 + 3], in[4 + 3], in[8 + 3], in[12 + 3]);
|
*((UINT32*) (out2 + x)) = _mm_cvtsi128_si32(
|
||||||
memcpy(out3 + x, &v, sizeof(v));
|
_mm_srli_si128(source, 8));
|
||||||
|
*((UINT32*) (out3 + x)) = _mm_cvtsi128_si32(
|
||||||
|
_mm_srli_si128(source, 12));
|
||||||
in += 16;
|
in += 16;
|
||||||
}
|
}
|
||||||
for (; x < imIn->xsize; x++) {
|
for (; x < imIn->xsize; x++) {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user