Skip to content

Commit aaec742

Browse files
Improve xsimd::expand common implementation
Previous implementation was making the assumption of cheap xsimd::insert, and it always generated batch::size inserts. This implementation can take advantage of smaller popcount on the bitmask. Note to self: it would be great to have a good implementation for constant mask.
1 parent 71a344e commit aaec742

File tree

1 file changed

+10
-17
lines changed

1 file changed

+10
-17
lines changed

include/xsimd/arch/common/xsimd_common_memory.hpp

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -88,28 +88,21 @@ namespace xsimd
8888
}
8989

9090
// expand
91-
namespace detail
92-
{
93-
template <class IT, class A, class I, size_t... Is>
94-
XSIMD_INLINE batch<IT, A> create_expand_swizzle_mask(I bitmask, ::xsimd::detail::index_sequence<Is...>)
95-
{
96-
batch<IT, A> swizzle_mask(IT(0));
97-
IT j = 0;
98-
(void)std::initializer_list<bool> { ((swizzle_mask = insert(swizzle_mask, j, index<Is>())), (j += ((bitmask >> Is) & 1u)), true)... };
99-
return swizzle_mask;
100-
}
101-
}
102-
10391
template <typename A, typename T>
10492
XSIMD_INLINE batch<T, A>
10593
expand(batch<T, A> const& x, batch_bool<T, A> const& mask,
10694
kernel::requires_arch<common>) noexcept
10795
{
108-
constexpr std::size_t size = batch_bool<T, A>::size;
109-
auto bitmask = mask.mask();
110-
auto swizzle_mask = detail::create_expand_swizzle_mask<as_unsigned_integer_t<T>, A>(bitmask, ::xsimd::detail::make_index_sequence<size>());
111-
auto z = swizzle(x, swizzle_mask);
112-
return select(mask, z, batch<T, A>(T(0)));
96+
constexpr auto size = batch<T, A>::size;
97+
alignas(A::alignment()) T x_in[size], x_out[size] = { T() };
98+
x.store_aligned(x_in);
99+
int i = 0, j = 0;
100+
for (auto bitmask = mask.mask(); bitmask; bitmask >>= 1, ++i)
101+
{
102+
if (bitmask & 1)
103+
x_out[i] = x_in[j++];
104+
}
105+
return xsimd::batch<T, A>::load_aligned(x_out);
113106
}
114107

115108
// extract_pair

0 commit comments

Comments
 (0)