clang-format

This commit is contained in:
Aleksandr Karpinskii 2024-07-07 19:09:35 +04:00
parent 2db9cd3499
commit 5df34a24f8
2 changed files with 46 additions and 28 deletions

View File

@ -51,7 +51,22 @@ ImagingGetBand(Imaging imIn, int band) {
#ifdef __SSE4__ #ifdef __SSE4__
shuffle_mask = _mm_set_epi8( shuffle_mask = _mm_set_epi8(
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 12+band,8+band,4+band,0+band); -1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
12 + band,
8 + band,
4 + band,
0 + band);
#endif #endif
/* Extract band from image */ /* Extract band from image */
@ -62,8 +77,8 @@ ImagingGetBand(Imaging imIn, int band) {
for (; x < imIn->xsize - 3; x += 4) { for (; x < imIn->xsize - 3; x += 4) {
#ifdef __SSE4__ #ifdef __SSE4__
__m128i source = _mm_loadu_si128((__m128i *)(in - band)); __m128i source = _mm_loadu_si128((__m128i *)(in - band));
*((UINT32 *)(out + x)) = _mm_cvtsi128_si32( *((UINT32 *)(out + x)) =
_mm_shuffle_epi8(source, shuffle_mask)); _mm_cvtsi128_si32(_mm_shuffle_epi8(source, shuffle_mask));
#else #else
UINT32 v = MAKE_UINT32(in[0], in[4], in[8], in[12]); UINT32 v = MAKE_UINT32(in[0], in[4], in[8], in[12]);
memcpy(out + x, &v, sizeof(v)); memcpy(out + x, &v, sizeof(v));
@ -115,8 +130,9 @@ ImagingSplit(Imaging imIn, Imaging bands[4]) {
for (; x < imIn->xsize - 3; x += 4) { for (; x < imIn->xsize - 3; x += 4) {
#ifdef __SSE4__ #ifdef __SSE4__
__m128i source = _mm_loadu_si128((__m128i *)in); __m128i source = _mm_loadu_si128((__m128i *)in);
source = _mm_shuffle_epi8(source, _mm_set_epi8( source = _mm_shuffle_epi8(
15,11,7,3, 14,10,6,2, 13,9,5,1, 12,8,4,0)); source,
_mm_set_epi8(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0));
*((UINT32 *)(out0 + x)) = _mm_cvtsi128_si32(source); *((UINT32 *)(out0 + x)) = _mm_cvtsi128_si32(source);
*((UINT32 *)(out1 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 12)); *((UINT32 *)(out1 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 12));
#else #else
@ -143,8 +159,9 @@ ImagingSplit(Imaging imIn, Imaging bands[4]) {
for (; x < imIn->xsize - 3; x += 4) { for (; x < imIn->xsize - 3; x += 4) {
#ifdef __SSE4__ #ifdef __SSE4__
__m128i source = _mm_loadu_si128((__m128i *)in); __m128i source = _mm_loadu_si128((__m128i *)in);
source = _mm_shuffle_epi8(source, _mm_set_epi8( source = _mm_shuffle_epi8(
15,11,7,3, 14,10,6,2, 13,9,5,1, 12,8,4,0)); source,
_mm_set_epi8(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0));
*((UINT32 *)(out0 + x)) = _mm_cvtsi128_si32(source); *((UINT32 *)(out0 + x)) = _mm_cvtsi128_si32(source);
*((UINT32 *)(out1 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 4)); *((UINT32 *)(out1 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 4));
*((UINT32 *)(out2 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 8)); *((UINT32 *)(out2 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 8));
@ -176,8 +193,9 @@ ImagingSplit(Imaging imIn, Imaging bands[4]) {
for (; x < imIn->xsize - 3; x += 4) { for (; x < imIn->xsize - 3; x += 4) {
#ifdef __SSE4__ #ifdef __SSE4__
__m128i source = _mm_loadu_si128((__m128i *)in); __m128i source = _mm_loadu_si128((__m128i *)in);
source = _mm_shuffle_epi8(source, _mm_set_epi8( source = _mm_shuffle_epi8(
15,11,7,3, 14,10,6,2, 13,9,5,1, 12,8,4,0)); source,
_mm_set_epi8(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0));
*((UINT32 *)(out0 + x)) = _mm_cvtsi128_si32(source); *((UINT32 *)(out0 + x)) = _mm_cvtsi128_si32(source);
*((UINT32 *)(out1 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 4)); *((UINT32 *)(out1 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 4));
*((UINT32 *)(out2 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 8)); *((UINT32 *)(out2 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 8));

View File

@ -1,46 +1,46 @@
/* Microsoft compiler doesn't limit intrinsics for an architecture. /* Microsoft compiler doesn't limit intrinsics for an architecture.
This macro is set only on x86 and means SSE2 and above including AVX2. */ This macro is set only on x86 and means SSE2 and above including AVX2. */
#if defined(_M_X64) || _M_IX86_FP == 2 #if defined(_M_X64) || _M_IX86_FP == 2
#define __SSE2__ #define __SSE2__
/* However, Microsoft compiler set __AVX2__ if /arch:AVX2 option is set */ /* However, Microsoft compiler set __AVX2__ if /arch:AVX2 option is set */
#ifdef __AVX2__ #ifdef __AVX2__
#define __SSE4_2__ #define __SSE4_2__
#endif #endif
#endif #endif
/* For better readability */ /* For better readability */
#ifdef __SSE4_2__ #ifdef __SSE4_2__
#define __SSE4__ #define __SSE4__
#endif #endif
#ifdef __SSE2__ #ifdef __SSE2__
#include <mmintrin.h> // MMX #include <mmintrin.h> // MMX
#include <xmmintrin.h> // SSE #include <xmmintrin.h> // SSE
#include <emmintrin.h> // SSE2 #include <emmintrin.h> // SSE2
#endif #endif
#ifdef __SSE4__ #ifdef __SSE4__
#include <pmmintrin.h> // SSE3 #include <pmmintrin.h> // SSE3
#include <tmmintrin.h> // SSSE3 #include <tmmintrin.h> // SSSE3
#include <smmintrin.h> // SSE4.1 #include <smmintrin.h> // SSE4.1
#include <nmmintrin.h> // SSE4.2 #include <nmmintrin.h> // SSE4.2
#endif #endif
#ifdef __AVX2__ #ifdef __AVX2__
#include <immintrin.h> // AVX, AVX2 #include <immintrin.h> // AVX, AVX2
#endif #endif
#ifdef __aarch64__ #ifdef __aarch64__
#include <arm_neon.h> // ARM NEON #include <arm_neon.h> // ARM NEON
#endif #endif
#ifdef __SSE4__ #ifdef __SSE4__
static __m128i inline static inline __m128i
mm_cvtepu8_epi32(void *ptr) { mm_cvtepu8_epi32(void *ptr) {
return _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*(INT32 *) ptr)); return _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*(INT32 *)ptr));
} }
#endif #endif
#ifdef __AVX2__ #ifdef __AVX2__
static __m256i inline static inline __m256i
mm256_cvtepu8_epi32(void *ptr) { mm256_cvtepu8_epi32(void *ptr) {
return _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i *) ptr)); return _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i *)ptr));
} }
#endif #endif