From: Andrei Tatar Date: Mon, 17 Jul 2023 19:06:27 +0000 (+0200) Subject: Add compatibility with gcc <= 12 X-Git-Tag: RELEASE-0.14.0~6 X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=ee86c1d8503929eedf9a96b7b87ffec0fd140cdf;p=unikraft%2Flibs%2Fintel-intrinsics.git Add compatibility with gcc <= 12 Implementations based off headers in GCC release 12.3.0. Signed-off-by: Andrei Tatar Reviewed-by: Maria Sfiraiala Reviewed-by: Radu Nichita Approved-by: Razvan Deaconescu Tested-by: Unikraft CI GitHub-Closes: #3 --- diff --git a/include-gcc/avx512bf16intrin.h b/include-gcc/avx512bf16intrin.h index 107f4a4..cfe7acc 100644 --- a/include-gcc/avx512bf16intrin.h +++ b/include-gcc/avx512bf16intrin.h @@ -34,6 +34,24 @@ #define __DISABLE_AVX512BF16__ #endif /* __AVX512BF16__ */ +#if (__GNUC__ < 13) +/* Internal data types for implementing the intrinsics. */ +typedef short __v32bh __attribute__ ((__vector_size__ (64))); + +/* The Intel API is flexible enough that we must allow aliasing with other + vector types, and their scalar components. */ +typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__)); + +/* Convert One BF16 Data to One Single Float Data. */ +extern __inline float +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsbh_ss (__bfloat16 __A) +{ + union{ float __a; unsigned int __b;} __tmp; + __tmp.__b = ((unsigned int)(__A)) << 16; + return __tmp.__a; +} +#else /* Internal data types for implementing the intrinsics. */ typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64))); @@ -48,6 +66,7 @@ _mm_cvtsbh_ss (__bf16 __A) { return __builtin_ia32_cvtbf2sf (__A); } +#endif /* vcvtne2ps2bf16 */ @@ -55,21 +74,33 @@ extern __inline __m512bh __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm512_cvtne2ps_pbh (__m512 __A, __m512 __B) { +#if (__GNUC__ < 13) + return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi(__A, __B); +#else return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf(__A, __B); +#endif } extern __inline __m512bh __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 __D) { +#if (__GNUC__ < 13) + return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_mask(__C, __D, __A, __B); +#else return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf_mask(__C, __D, __A, __B); +#endif } extern __inline __m512bh __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C) { +#if (__GNUC__ < 13) + return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_maskz(__B, __C, __A); +#else return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf_maskz(__B, __C, __A); +#endif } /* vcvtneps2bf16 */ diff --git a/include-gcc/avx512bf16vlintrin.h b/include-gcc/avx512bf16vlintrin.h index 6e8a6a0..861931a 100644 --- a/include-gcc/avx512bf16vlintrin.h +++ b/include-gcc/avx512bf16vlintrin.h @@ -34,6 +34,18 @@ #define __DISABLE_AVX512BF16VL__ #endif /* __AVX512BF16__ */ +#if (__GNUC__ < 13) +/* Internal data types for implementing the intrinsics. */ +typedef short __v16bh __attribute__ ((__vector_size__ (32))); +typedef short __v8bh __attribute__ ((__vector_size__ (16))); + +/* The Intel API is flexible enough that we must allow aliasing with other + vector types, and their scalar components. */ +typedef short __m256bh __attribute__ ((__vector_size__ (32), __may_alias__)); +typedef short __m128bh __attribute__ ((__vector_size__ (16), __may_alias__)); + +typedef unsigned short __bfloat16; +#else /* Internal data types for implementing the intrinsics. */ typedef __bf16 __v16bf __attribute__ ((__vector_size__ (32))); typedef __bf16 __v8bf __attribute__ ((__vector_size__ (16))); @@ -44,6 +56,7 @@ typedef __bf16 __m256bh __attribute__ ((__vector_size__ (32), __may_alias__)); typedef __bf16 __m128bh __attribute__ ((__vector_size__ (16), __may_alias__)); typedef __bf16 __bfloat16; +#endif #define _mm256_cvtneps_pbh(A) \ (__m128bh) __builtin_ia32_cvtneps2bf16_v8sf (A) @@ -56,42 +69,66 @@ extern __inline __m256bh __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtne2ps_pbh (__m256 __A, __m256 __B) { +#if (__GNUC__ < 13) + return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi(__A, __B); +#else return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16bf(__A, __B); +#endif } extern __inline __m256bh __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtne2ps_pbh (__m256bh __A, __mmask16 __B, __m256 __C, __m256 __D) { +#if (__GNUC__ < 13) + return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_mask(__C, __D, __A, __B); +#else return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16bf_mask(__C, __D, __A, __B); +#endif } extern __inline __m256bh __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvtne2ps_pbh (__mmask16 __A, __m256 __B, __m256 __C) { +#if (__GNUC__ < 13) + return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_maskz(__B, __C, __A); +#else return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16bf_maskz(__B, __C, __A); +#endif } extern __inline __m128bh __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtne2ps_pbh (__m128 __A, __m128 __B) { +#if (__GNUC__ < 13) + return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi(__A, __B); +#else return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8bf(__A, __B); +#endif } extern __inline __m128bh __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cvtne2ps_pbh (__m128bh __A, __mmask8 __B, __m128 __C, __m128 __D) { +#if (__GNUC__ < 13) + return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_mask(__C, __D, __A, __B); +#else return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8bf_mask(__C, __D, __A, __B); +#endif } extern __inline __m128bh __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_cvtne2ps_pbh (__mmask8 __A, __m128 __B, __m128 __C) { +#if (__GNUC__ < 13) + return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_maskz(__B, __C, __A); +#else return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8bf_maskz(__B, __C, __A); +#endif } /* vcvtneps2bf16 */ @@ -168,13 +205,18 @@ _mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D) return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A); } -extern __inline __bf16 +extern __inline __bfloat16 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtness_sbh (float __A) { __v4sf __V = {__A, 0, 0, 0}; +#if (__GNUC__ < 13) + __v8hi __R = __builtin_ia32_cvtneps2bf16_v4sf_mask ((__v4sf)__V, + (__v8hi)_mm_undefined_si128 (), (__mmask8)-1); +#else __v8bf __R = __builtin_ia32_cvtneps2bf16_v4sf_mask ((__v4sf)__V, (__v8bf)_mm_undefined_si128 (), (__mmask8)-1); +#endif return __R[0]; } diff --git a/include-gcc/avx512fp16intrin.h b/include-gcc/avx512fp16intrin.h index dd083e5..31de9ba 100644 --- a/include-gcc/avx512fp16intrin.h +++ b/include-gcc/avx512fp16intrin.h @@ -183,21 +183,33 @@ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_setzero_ph (void) { +#if (__GNUC__ < 13) + return _mm_set1_ph (0.0f); +#else return _mm_set1_ph (0.0f16); +#endif } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_setzero_ph (void) { +#if (__GNUC__ < 13) + return _mm256_set1_ph (0.0f); +#else return _mm256_set1_ph (0.0f16); +#endif } extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_setzero_ph (void) { +#if (__GNUC__ < 13) + return _mm512_set1_ph (0.0f); +#else return _mm512_set1_ph (0.0f16); +#endif } extern __inline __m128h @@ -367,8 +379,12 @@ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_sh (_Float16 __F) { +#if (__GNUC__ < 13) + return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, __F); +#else return _mm_set_ph (0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, __F); +#endif } /* Create a vector with element 0 as *P and the rest zero. */ @@ -376,8 +392,13 @@ extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_sh (void const *__P) { +#if (__GNUC__ < 13) + return _mm_set_ph (0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + *(_Float16 const *) __P); +#else return _mm_set_ph (0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, *(_Float16 const *) __P); +#endif } extern __inline __m512h diff --git a/include-gcc/immintrin.h b/include-gcc/immintrin.h index b220d87..ca1c509 100644 --- a/include-gcc/immintrin.h +++ b/include-gcc/immintrin.h @@ -44,9 +44,11 @@ #include +#if (__GNUC__ >= 13) #include #include +#endif #include @@ -123,8 +125,10 @@ #include +#if (__GNUC__ >= 13) #include #endif +#endif #include @@ -132,12 +136,16 @@ #include +#if (__GNUC__ >= 13) #include +#endif #include #include +#if (__GNUC__ >= 13) #include +#endif #endif /* _IMMINTRIN_H_INCLUDED */ diff --git a/include-gcc/x86gprintrin.h b/include-gcc/x86gprintrin.h index f41be3f..6719e62 100644 --- a/include-gcc/x86gprintrin.h +++ b/include-gcc/x86gprintrin.h @@ -52,7 +52,9 @@ #include +#if (__GNUC__ >= 13) #include +#endif #include @@ -74,9 +76,11 @@ #include +#if (__GNUC__ >= 13) #include #include +#endif #include diff --git a/include-gcc/xmmintrin.h b/include-gcc/xmmintrin.h index cb518fc..1885c13 100644 --- a/include-gcc/xmmintrin.h +++ b/include-gcc/xmmintrin.h @@ -53,13 +53,22 @@ enum _mm_hint extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_prefetch (const void *__P, enum _mm_hint __I) { +#if (__GNUC__ < 13) + __builtin_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3); +#else __builtin_ia32_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3, (__I & 0x10) >> 4); +#endif } #else +#if (__GNUC__ < 13) +#define _mm_prefetch(P, I) \ + __builtin_prefetch ((P), ((I & 0x4) >> 2), (I & 0x3)) +#else #define _mm_prefetch(P, I) \ __builtin_ia32_prefetch ((P), ((I) & 0x4) >> 2, ((I) & 0x3), ((I) & 0x10) >> 4) #endif +#endif #ifndef __SSE__ #pragma GCC push_options