mirror of
https://github.com/python-pillow/Pillow.git
synced 2025-03-30 23:14:13 +03:00
Make SSE4 optional
This commit is contained in:
parent
2ef6424b55
commit
d1717f3ffc
|
@ -23,28 +23,6 @@ Imaging
|
||||||
ImagingAlphaComposite(Imaging imDst, Imaging imSrc) {
|
ImagingAlphaComposite(Imaging imDst, Imaging imSrc) {
|
||||||
Imaging imOut;
|
Imaging imOut;
|
||||||
int x, y;
|
int x, y;
|
||||||
int xsize = imDst->xsize;
|
|
||||||
__m128i mm_max_alpha = _mm_set1_epi32(255);
|
|
||||||
__m128i mm_max_alpha2 = _mm_set1_epi32(255 * 255);
|
|
||||||
__m128i mm_zero = _mm_setzero_si128();
|
|
||||||
__m128i mm_half = _mm_set1_epi16(128);
|
|
||||||
__m128i mm_get_lo = _mm_set_epi8(
|
|
||||||
-1,-1, 5,4, 5,4, 5,4, -1,-1, 1,0, 1,0, 1,0);
|
|
||||||
__m128i mm_get_hi = _mm_set_epi8(
|
|
||||||
-1,-1, 13,12, 13,12, 13,12, -1,-1, 9,8, 9,8, 9,8);
|
|
||||||
#if defined(__AVX2__)
|
|
||||||
__m256i vmm_max_alpha = _mm256_set1_epi32(255);
|
|
||||||
__m256i vmm_max_alpha2 = _mm256_set1_epi32(255 * 255);
|
|
||||||
__m256i vmm_zero = _mm256_setzero_si256();
|
|
||||||
__m256i vmm_half = _mm256_set1_epi16(128);
|
|
||||||
__m256i vmm_get_lo = _mm256_set_epi8(
|
|
||||||
-1,-1, 5,4, 5,4, 5,4, -1,-1, 1,0, 1,0, 1,0,
|
|
||||||
-1,-1, 5,4, 5,4, 5,4, -1,-1, 1,0, 1,0, 1,0);
|
|
||||||
__m256i vmm_get_hi = _mm256_set_epi8(
|
|
||||||
-1,-1, 13,12, 13,12, 13,12, -1,-1, 9,8, 9,8, 9,8,
|
|
||||||
-1,-1, 13,12, 13,12, 13,12, -1,-1, 9,8, 9,8, 9,8);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/* Check arguments */
|
/* Check arguments */
|
||||||
if (!imDst || !imSrc || strcmp(imDst->mode, "RGBA") ||
|
if (!imDst || !imSrc || strcmp(imDst->mode, "RGBA") ||
|
||||||
|
@ -71,11 +49,22 @@ ImagingAlphaComposite(Imaging imDst, Imaging imSrc) {
|
||||||
x = 0;
|
x = 0;
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
|
{
|
||||||
|
__m256i vmm_max_alpha = _mm256_set1_epi32(255);
|
||||||
|
__m256i vmm_max_alpha2 = _mm256_set1_epi32(255 * 255);
|
||||||
|
__m256i vmm_zero = _mm256_setzero_si256();
|
||||||
|
__m256i vmm_half = _mm256_set1_epi16(128);
|
||||||
|
__m256i vmm_get_lo = _mm256_set_epi8(
|
||||||
|
-1,-1, 5,4, 5,4, 5,4, -1,-1, 1,0, 1,0, 1,0,
|
||||||
|
-1,-1, 5,4, 5,4, 5,4, -1,-1, 1,0, 1,0, 1,0);
|
||||||
|
__m256i vmm_get_hi = _mm256_set_epi8(
|
||||||
|
-1,-1, 13,12, 13,12, 13,12, -1,-1, 9,8, 9,8, 9,8,
|
||||||
|
-1,-1, 13,12, 13,12, 13,12, -1,-1, 9,8, 9,8, 9,8);
|
||||||
|
|
||||||
#define MM_SHIFTDIV255_epi16(src)\
|
#define MM_SHIFTDIV255_epi16(src)\
|
||||||
_mm256_srli_epi16(_mm256_add_epi16(src, _mm256_srli_epi16(src, 8)), 8)
|
_mm256_srli_epi16(_mm256_add_epi16(src, _mm256_srli_epi16(src, 8)), 8)
|
||||||
|
|
||||||
for (; x < xsize - 7; x += 8) {
|
for (; x < imDst->xsize - 7; x += 8) {
|
||||||
__m256i mm_dst, mm_dst_lo, mm_dst_hi;
|
__m256i mm_dst, mm_dst_lo, mm_dst_hi;
|
||||||
__m256i mm_src, mm_src_lo, mm_src_hi;
|
__m256i mm_src, mm_src_lo, mm_src_hi;
|
||||||
__m256i mm_dst_a, mm_src_a, mm_out_a, mm_blend;
|
__m256i mm_dst_a, mm_src_a, mm_out_a, mm_blend;
|
||||||
|
@ -126,13 +115,23 @@ ImagingAlphaComposite(Imaging imDst, Imaging imSrc) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef MM_SHIFTDIV255_epi16
|
#undef MM_SHIFTDIV255_epi16
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(__SSE4__)
|
||||||
|
{
|
||||||
|
__m128i mm_max_alpha = _mm_set1_epi32(255);
|
||||||
|
__m128i mm_max_alpha2 = _mm_set1_epi32(255 * 255);
|
||||||
|
__m128i mm_zero = _mm_setzero_si128();
|
||||||
|
__m128i mm_half = _mm_set1_epi16(128);
|
||||||
|
__m128i mm_get_lo = _mm_set_epi8(
|
||||||
|
-1,-1, 5,4, 5,4, 5,4, -1,-1, 1,0, 1,0, 1,0);
|
||||||
|
__m128i mm_get_hi = _mm_set_epi8(
|
||||||
|
-1,-1, 13,12, 13,12, 13,12, -1,-1, 9,8, 9,8, 9,8);
|
||||||
|
|
||||||
#define MM_SHIFTDIV255_epi16(src)\
|
#define MM_SHIFTDIV255_epi16(src)\
|
||||||
_mm_srli_epi16(_mm_add_epi16(src, _mm_srli_epi16(src, 8)), 8)
|
_mm_srli_epi16(_mm_add_epi16(src, _mm_srli_epi16(src, 8)), 8)
|
||||||
|
|
||||||
for (; x < xsize - 3; x += 4) {
|
for (; x < imDst->xsize - 3; x += 4) {
|
||||||
__m128i mm_dst, mm_dst_lo, mm_dst_hi;
|
__m128i mm_dst, mm_dst_lo, mm_dst_hi;
|
||||||
__m128i mm_src, mm_src_hi, mm_src_lo;
|
__m128i mm_src, mm_src_hi, mm_src_lo;
|
||||||
__m128i mm_dst_a, mm_src_a, mm_out_a, mm_blend;
|
__m128i mm_dst_a, mm_src_a, mm_out_a, mm_blend;
|
||||||
|
@ -191,8 +190,10 @@ ImagingAlphaComposite(Imaging imDst, Imaging imSrc) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef MM_SHIFTDIV255_epi16
|
#undef MM_SHIFTDIV255_epi16
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
for (; x < xsize; x += 1) {
|
for (; x < imDst->xsize; x += 1) {
|
||||||
if (src[x].a == 0) {
|
if (src[x].a == 0) {
|
||||||
// Copy 4 bytes at once.
|
// Copy 4 bytes at once.
|
||||||
out[x] = dst[x];
|
out[x] = dst[x];
|
||||||
|
|
Loading…
Reference in New Issue
Block a user