From: Andrei Tatar Date: Mon, 17 Jul 2023 14:50:18 +0000 (+0200) Subject: Add compatibility with clang >= 15 X-Git-Tag: RELEASE-0.14.0~11 X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=54dbf40dd10f641340c38370661e8f74ffe87ddc;p=unikraft%2Flibs%2Fintel-intrinsics.git Add compatibility with clang >= 15 Implementations based off headers in LLVM 15.0.0. Signed-off-by: Andrei Tatar Reviewed-by: Maria Sfiraiala Reviewed-by: Radu Nichita Approved-by: Razvan Deaconescu Tested-by: Unikraft CI GitHub-Closes: #3 --- diff --git a/include/avx2intrin.h b/include/avx2intrin.h index e33514a..b6ff8b2 100644 --- a/include/avx2intrin.h +++ b/include/avx2intrin.h @@ -92,25 +92,41 @@ _mm256_add_epi64(__m256i __a, __m256i __b) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epi8(__m256i __a, __m256i __b) { +#if (__clang_major__ > 14) + return (__m256i)__builtin_elementwise_add_sat((__v32qs)__a, (__v32qs)__b); +#else return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b); +#endif } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epi16(__m256i __a, __m256i __b) { +#if (__clang_major__ > 14) + return (__m256i)__builtin_elementwise_add_sat((__v16hi)__a, (__v16hi)__b); +#else return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b); +#endif } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epu8(__m256i __a, __m256i __b) { +#if (__clang_major__ > 14) + return (__m256i)__builtin_elementwise_add_sat((__v32qu)__a, (__v32qu)__b); +#else return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b); +#endif } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epu16(__m256i __a, __m256i __b) { +#if (__clang_major__ > 14) + return (__m256i)__builtin_elementwise_add_sat((__v16hu)__a, (__v16hu)__b); +#else return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b); +#endif } #define _mm256_alignr_epi8(a, b, n) \ @@ -628,25 +644,41 @@ _mm256_sub_epi64(__m256i __a, __m256i __b) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epi8(__m256i __a, __m256i __b) { +#if (__clang_major__ > 14) + return (__m256i)__builtin_elementwise_sub_sat((__v32qs)__a, (__v32qs)__b); +#else return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b); +#endif } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epi16(__m256i __a, __m256i __b) { +#if (__clang_major__ > 14) + return (__m256i)__builtin_elementwise_sub_sat((__v16hi)__a, (__v16hi)__b); +#else return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b); +#endif } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epu8(__m256i __a, __m256i __b) { +#if (__clang_major__ > 14) + return (__m256i)__builtin_elementwise_sub_sat((__v32qu)__a, (__v32qu)__b); +#else return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b); +#endif } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epu16(__m256i __a, __m256i __b) { +#if (__clang_major__ > 14) + return (__m256i)__builtin_elementwise_sub_sat((__v16hu)__a, (__v16hu)__b); +#else return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b); +#endif } static __inline__ __m256i __DEFAULT_FN_ATTRS256 diff --git a/include/avx512bwintrin.h b/include/avx512bwintrin.h index 522ef10..15d9bd0 100644 --- a/include/avx512bwintrin.h +++ b/include/avx512bwintrin.h @@ -617,7 +617,11 @@ _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_adds_epi8 (__m512i __A, __m512i __B) { +#if (__clang_major__ > 14) + return (__m512i)__builtin_elementwise_add_sat((__v64qs)__A, (__v64qs)__B); +#else return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B); +#endif } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -639,7 +643,11 @@ _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_adds_epi16 (__m512i __A, __m512i __B) { +#if (__clang_major__ > 14) + return (__m512i)__builtin_elementwise_add_sat((__v32hi)__A, (__v32hi)__B); +#else return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B); +#endif } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -661,7 +669,11 @@ _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_adds_epu8 (__m512i __A, __m512i __B) { +#if (__clang_major__ > 14) + return (__m512i)__builtin_elementwise_add_sat((__v64qu) __A, (__v64qu) __B); +#else return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B); +#endif } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -683,7 +695,11 @@ _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_adds_epu16 (__m512i __A, __m512i __B) { +#if (__clang_major__ > 14) + return (__m512i)__builtin_elementwise_add_sat((__v32hu) __A, (__v32hu) __B); +#else return (__m512i)__builtin_ia32_paddusw512((__v32hi) __A, (__v32hi) __B); +#endif } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -950,7 +966,11 @@ _mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_subs_epi8 (__m512i __A, __m512i __B) { +#if (__clang_major__ > 14) + return (__m512i)__builtin_elementwise_sub_sat((__v64qs)__A, (__v64qs)__B); +#else return (__m512i)__builtin_ia32_psubsb512((__v64qi)__A, (__v64qi)__B); +#endif } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -972,7 +992,11 @@ _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_subs_epi16 (__m512i __A, __m512i __B) { +#if (__clang_major__ > 14) + return (__m512i)__builtin_elementwise_sub_sat((__v32hi)__A, (__v32hi)__B); +#else return (__m512i)__builtin_ia32_psubsw512((__v32hi)__A, (__v32hi)__B); +#endif } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -994,7 +1018,11 @@ _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_subs_epu8 (__m512i __A, __m512i __B) { +#if (__clang_major__ > 14) + return (__m512i)__builtin_elementwise_sub_sat((__v64qu) __A, (__v64qu) __B); +#else return (__m512i)__builtin_ia32_psubusb512((__v64qi) __A, (__v64qi) __B); +#endif } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1016,7 +1044,11 @@ _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_subs_epu16 (__m512i __A, __m512i __B) { +#if (__clang_major__ > 14) + return (__m512i)__builtin_elementwise_sub_sat((__v32hu) __A, (__v32hu) __B); +#else return (__m512i)__builtin_ia32_psubusw512((__v32hi) __A, (__v32hi) __B); +#endif } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1506,7 +1538,11 @@ _mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi16(__m512i __A, unsigned int __B) { +#if (__clang_major__ > 14) + return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, (int)__B); +#else return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B); +#endif } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1598,7 +1634,11 @@ _mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi16(__m512i __A, unsigned int __B) { +#if (__clang_major__ > 14) + return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, (int)__B); +#else return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B); +#endif } static __inline__ __m512i __DEFAULT_FN_ATTRS512 diff --git a/include/avx512fintrin.h b/include/avx512fintrin.h index 50e0e28..fe58278 100644 --- a/include/avx512fintrin.h +++ b/include/avx512fintrin.h @@ -9316,11 +9316,19 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) */ static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) { +#if (__clang_major__ > 14) + return __builtin_reduce_add((__v8di)__W); +#else return __builtin_ia32_reduce_add_q512(__W); +#endif } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) { +#if (__clang_major__ > 14) + return __builtin_reduce_mul((__v8di)__W); +#else return __builtin_ia32_reduce_mul_q512(__W); +#endif } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) { @@ -9334,13 +9342,21 @@ static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) { __W = _mm512_maskz_mov_epi64(__M, __W); +#if (__clang_major__ > 14) + return __builtin_reduce_add((__v8di)__W); +#else return __builtin_ia32_reduce_add_q512(__W); +#endif } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) { __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W); +#if (__clang_major__ > 14) + return __builtin_reduce_mul((__v8di)__W); +#else return __builtin_ia32_reduce_mul_q512(__W); +#endif } static __inline__ long long __DEFAULT_FN_ATTRS512 @@ -9380,12 +9396,20 @@ _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) { static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi32(__m512i __W) { +#if (__clang_major__ > 14) + return __builtin_reduce_add((__v16si)__W); +#else return __builtin_ia32_reduce_add_d512((__v16si)__W); +#endif } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi32(__m512i __W) { +#if (__clang_major__ > 14) + return __builtin_reduce_mul((__v16si)__W); +#else return __builtin_ia32_reduce_mul_d512((__v16si)__W); +#endif } static __inline__ int __DEFAULT_FN_ATTRS512 @@ -9401,13 +9425,21 @@ _mm512_reduce_or_epi32(__m512i __W) { static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) { __W = _mm512_maskz_mov_epi32(__M, __W); +#if (__clang_major__ > 14) + return __builtin_reduce_add((__v16si)__W); +#else return __builtin_ia32_reduce_add_d512((__v16si)__W); +#endif } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) { __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W); +#if (__clang_major__ > 14) + return __builtin_reduce_mul((__v16si)__W); +#else return __builtin_ia32_reduce_mul_d512((__v16si)__W); +#endif } static __inline__ int __DEFAULT_FN_ATTRS512 diff --git a/include/emmintrin.h b/include/emmintrin.h index 4618b80..c2a8d4a 100644 --- a/include/emmintrin.h +++ b/include/emmintrin.h @@ -2225,7 +2225,11 @@ _mm_add_epi64(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, __m128i __b) { +#if (__clang_major__ > 14) + return (__m128i)__builtin_elementwise_add_sat((__v16qs)__a, (__v16qs)__b); +#else return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b); +#endif } /// Adds, with saturation, the corresponding elements of two 128-bit @@ -2247,7 +2251,11 @@ _mm_adds_epi8(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, __m128i __b) { +#if (__clang_major__ > 14) + return (__m128i)__builtin_elementwise_add_sat((__v8hi)__a, (__v8hi)__b); +#else return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b); +#endif } /// Adds, with saturation, the corresponding elements of two 128-bit @@ -2268,7 +2276,11 @@ _mm_adds_epi16(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, __m128i __b) { +#if (__clang_major__ > 14) + return (__m128i)__builtin_elementwise_add_sat((__v16qu)__a, (__v16qu)__b); +#else return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b); +#endif } /// Adds, with saturation, the corresponding elements of two 128-bit @@ -2289,7 +2301,11 @@ _mm_adds_epu8(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, __m128i __b) { +#if (__clang_major__ > 14) + return (__m128i)__builtin_elementwise_add_sat((__v8hu)__a, (__v8hu)__b); +#else return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b); +#endif } /// Computes the rounded averages of corresponding elements of two @@ -2667,7 +2683,11 @@ _mm_sub_epi64(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, __m128i __b) { +#if (__clang_major__ > 14) + return (__m128i)__builtin_elementwise_sub_sat((__v16qs)__a, (__v16qs)__b); +#else return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b); +#endif } /// Subtracts corresponding 16-bit signed integer values in the input and @@ -2688,7 +2708,11 @@ _mm_subs_epi8(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, __m128i __b) { +#if (__clang_major__ > 14) + return (__m128i)__builtin_elementwise_sub_sat((__v8hi)__a, (__v8hi)__b); +#else return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b); +#endif } /// Subtracts corresponding 8-bit unsigned integer values in the input @@ -2708,7 +2732,11 @@ _mm_subs_epi16(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, __m128i __b) { +#if (__clang_major__ > 14) + return (__m128i)__builtin_elementwise_sub_sat((__v16qu)__a, (__v16qu)__b); +#else return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b); +#endif } /// Subtracts corresponding 16-bit unsigned integer values in the input @@ -2728,7 +2756,11 @@ _mm_subs_epu8(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, __m128i __b) { +#if (__clang_major__ > 14) + return (__m128i)__builtin_elementwise_sub_sat((__v8hu)__a, (__v8hu)__b); +#else return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b); +#endif } /// Performs a bitwise AND of two 128-bit integer vectors.