@@ -32,5 +32,30 @@ namespace cp_algo {
3232 [[gnu::always_inline]] inline uint64_t read_bits64 (char const * p) {
3333 return read_bits (p) | (uint64_t (read_bits (p + 32 )) << 32 );
3434 }
35+
36+ [[gnu::target(" avx2" ), gnu::always_inline]] inline void write_bits (char *p, uint32_t bits) {
37+ auto bytes = u32x8 () + bits;
38+ static constexpr u8x32 shuffler = {
39+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
40+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
41+ 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 ,
42+ 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3
43+ };
44+ auto shuffled = u8x32 (_mm256_shuffle_epi8 (__m256i () + bits, __m256i (shuffler)));
45+ static constexpr u8x32 mask = {
46+ 1 , 2 , 4 , 8 , 16 , 32 , 64 , 128 ,
47+ 1 , 2 , 4 , 8 , 16 , 32 , 64 , 128 ,
48+ 1 , 2 , 4 , 8 , 16 , 32 , 64 , 128 ,
49+ 1 , 2 , 4 , 8 , 16 , 32 , 64 , 128
50+ };
51+ u8x32 to_save = (shuffled & mask) ? ' 1' : ' 0' ;
52+ for (int z = 0 ; z < 32 ; z++) {
53+ p[z] = to_save[z];
54+ }
55+ }
56+ [[gnu::target(" avx2" ), gnu::always_inline]] inline void write_bits64 (char *p, uint64_t bits) {
57+ write_bits (p, uint32_t (bits));
58+ write_bits (p + 32 , uint32_t (bits >> 32 ));
59+ }
3560}
3661#endif // CP_ALGO_UTIL_BIT_HPP
0 commit comments