Conversions doubles/int64 efficaces sans AVX512
SSE2 fournit des instructions pour la conversion entre les flottants et les entiers 32 bits, mais manque d'équivalents pour la conversion entre doubles et entiers de 64 bits. Comment pouvons-nous simuler efficacement de telles conversions ?
Conversions avec coins coupés
Si les restrictions de plage sont acceptables, les astuces suivantes peuvent effectuer des conversions en seulement deux instructions :
Double à Int64
__m128i double_to_int64(__m128d x) { x = _mm_add_pd(x, _mm_set1_pd(0x0018000000000000)); return _mm_sub_epi64(_mm_castpd_si128(x), _mm_castpd_si128(_mm_set1_pd(0x0018000000000000))); }
Double à UInt64
__m128i double_to_uint64(__m128d x){ x = _mm_add_pd(x, _mm_set1_pd(0x0010000000000000)); return _mm_xor_si128(_mm_castpd_si128(x), _mm_castpd_si128(_mm_set1_pd(0x0010000000000000))); }
Int64 à doubler
__m128d int64_to_double(__m128i x){ x = _mm_add_epi64(x, _mm_castpd_si128(_mm_set1_pd(0x0018000000000000))); return _mm_sub_pd(_mm_castsi128_pd(x), _mm_set1_pd(0x0018000000000000)); }
UInt64 à doubler
__m128d uint64_to_double(__m128i x){ x = _mm_or_si128(x, _mm_castpd_si128(_mm_set1_pd(0x0010000000000000))); return _mm_sub_pd(_mm_castsi128_pd(x), _mm_set1_pd(0x0010000000000000)); }
Gamme complète Conversions
Pour les conversions qui gèrent la gamme complète des entiers 64 bits, voici des implémentations optimisées :
UInt64 vers Double
__m128d uint64_to_double_full(__m128i x){ __m128i xH = _mm_srli_epi64(x, 32); xH = _mm_or_si128(xH, _mm_castpd_si128(_mm_set1_pd(19342813113834066795298816.))); // 2^84 __m128i xL = _mm_blend_epi16(x, _mm_castpd_si128(_mm_set1_pd(0x0010000000000000)), 0xcc); // 2^52 __m128d f = _mm_sub_pd(_mm_castsi128_pd(xH), _mm_set1_pd(19342813118337666422669312.)); // 2^84 + 2^52 return _mm_add_pd(f, _mm_castsi128_pd(xL)); }
Int64 pour doubler
__m128d int64_to_double_full(__m128i x){ __m128i xH = _mm_srai_epi32(x, 16); xH = _mm_blend_epi16(xH, _mm_setzero_si128(), 0x33); xH = _mm_add_epi64(xH, _mm_castpd_si128(_mm_set1_pd(442721857769029238784.))); // 3*2^67 __m128i xL = _mm_blend_epi16(x, _mm_castpd_si128(_mm_set1_pd(0x0010000000000000)), 0x88); // 2^52 __m128d f = _mm_sub_pd(_mm_castsi128_pd(xH), _mm_set1_pd(442726361368656609280.)); // 3*2^67 + 2^52 return _mm_add_pd(f, _mm_castsi128_pd(xL)); }
Ce qui précède est le contenu détaillé de. pour plus d'informations, suivez d'autres articles connexes sur le site Web de PHP en chinois!