Skip to content

Commit 17c1c53

Browse files
JohanMabilleserge-sans-paille
authored andcommitted
Fixed implementation of load / store functions
1 parent ca9533e commit 17c1c53

File tree

1 file changed

+86
-111
lines changed

1 file changed

+86
-111
lines changed

include/xsimd/types/xsimd_api.hpp

Lines changed: 86 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -928,27 +928,53 @@ batch<T, A> lgamma(batch<T, A> const& x) {
928928
/**
929929
* @ingroup batch_data_transfer
930930
*
931-
* Creates a batch from the buffer \c ptr. The
932-
* memory needs to be aligned.
931+
* Creates a batch from the buffer \c ptr and the specifed
932+
* batch value type \c To. The memory needs to be aligned.
933933
* @param ptr the memory buffer to read
934934
* @return a new batch instance
935935
*/
936-
template<class A=default_arch, class From>
937-
batch<From, A> load(From const* ptr, aligned_mode= {}) {
938-
return kernel::load_aligned<A>(ptr, kernel::convert<From>{}, A{});
936+
template <class To, class A=default_arch, class From>
937+
simd_return_type<From, To> load_as(From const* ptr, aligned_mode) {
938+
using batch_value_type = typename simd_return_type<From, To>::value_type;
939+
return kernel::load_aligned<A>(ptr, kernel::convert<batch_value_type>{}, A{});
940+
}
941+
942+
template <class To, class A = default_arch>
943+
simd_return_type<bool, To> load_as(bool const* ptr, aligned_mode) {
944+
return simd_return_type<bool, To>::load_aligned(ptr);
945+
}
946+
947+
template <class To, class A=default_arch, class From>
948+
simd_return_type<std::complex<From>, To> load_as(std::complex<From> const* ptr, aligned_mode)
949+
{
950+
using batch_value_type = typename simd_return_type<std::complex<From>, To>::value_type;
951+
return kernel::load_complex_aligned<A>(ptr, kernel::convert<batch_value_type>{}, A{});
939952
}
940953

941954
/**
942955
* @ingroup batch_data_transfer
943956
*
944-
* Creates a batch from the buffer \c ptr. The
945-
* memory does not need to be aligned.
957+
* Creates a batch from the buffer \c ptr and the specifed
958+
* batch value type \c To. The memory does not need to be aligned.
946959
* @param ptr the memory buffer to read
947960
* @return a new batch instance
948961
*/
949-
template<class A=default_arch, class From>
950-
batch<From, A> load(From const* ptr, unaligned_mode) {
951-
return kernel::load_unaligned<A>(ptr, kernel::convert<From>{}, A{});
962+
template <class To, class A=default_arch, class From>
963+
simd_return_type<From, To> load_as(From const* ptr, unaligned_mode) {
964+
using batch_value_type = typename simd_return_type<From, To>::value_type;
965+
return kernel::load_unaligned<A>(ptr, kernel::convert<batch_value_type>{}, A{});
966+
}
967+
968+
template <class To, class A = default_arch>
969+
simd_return_type<bool, To> load_as(bool const* ptr, unaligned_mode) {
970+
return simd_return_type<bool, To>::load_unaligned(ptr);
971+
}
972+
973+
template <class To, class A=default_arch, class From>
974+
simd_return_type<std::complex<From>, To> load_as(std::complex<From> const* ptr, unaligned_mode)
975+
{
976+
using batch_value_type = typename simd_return_type<std::complex<From>, To>::value_type;
977+
return kernel::load_complex_unaligned<A>(ptr, kernel::convert<batch_value_type>{}, A{});
952978
}
953979

954980
/**
@@ -960,8 +986,8 @@ batch<From, A> load(From const* ptr, unaligned_mode) {
960986
* @return a new batch instance
961987
*/
962988
template<class A=default_arch, class From>
963-
batch<From, A> load_aligned(From const* ptr) {
964-
return kernel::load_aligned<A>(ptr, kernel::convert<From>{}, A{});
989+
batch<From, A> load(From const* ptr, aligned_mode= {}) {
990+
return load_as<From, A>(ptr, aligned_mode{});
965991
}
966992

967993
/**
@@ -973,60 +999,34 @@ batch<From, A> load_aligned(From const* ptr) {
973999
* @return a new batch instance
9741000
*/
9751001
template<class A=default_arch, class From>
976-
batch<From, A> load_unaligned(From const* ptr) {
977-
return kernel::load_unaligned<A>(ptr, kernel::convert<From>{}, A{});
1002+
batch<From, A> load(From const* ptr, unaligned_mode) {
1003+
return load_as<From, A>(ptr, unaligned_mode{});
9781004
}
9791005

9801006
/**
9811007
* @ingroup batch_data_transfer
9821008
*
983-
* Creates a batch from the buffer \c ptr and the specifed
984-
* batch value type \c To. The memory needs to be aligned.
1009+
* Creates a batch from the buffer \c ptr. The
1010+
* memory needs to be aligned.
9851011
* @param ptr the memory buffer to read
9861012
* @return a new batch instance
9871013
*/
988-
template <class To, class A=default_arch, class From>
989-
simd_return_type<From, To> load_as(From const* ptr, aligned_mode) {
990-
using batch_value_type = typename simd_return_type<From, To>::value_type;
991-
return kernel::load_aligned<A>(ptr, kernel::convert<batch_value_type>{}, A{});
992-
}
993-
994-
template <class To, class A = default_arch>
995-
simd_return_type<bool, To> load_as(bool const* ptr, aligned_mode) {
996-
return simd_return_type<bool, To>::load_aligned(ptr);
997-
}
998-
999-
template <class To, class A=default_arch, class From>
1000-
simd_return_type<std::complex<From>, To> load_as(std::complex<From> const* ptr, aligned_mode)
1001-
{
1002-
using batch_value_type = typename simd_return_type<std::complex<From>, To>::value_type;
1003-
return kernel::load_complex_aligned<A>(ptr, kernel::convert<batch_value_type>{}, A{});
1014+
template<class A=default_arch, class From>
1015+
batch<From, A> load_aligned(From const* ptr) {
1016+
return load_as<From, A>(ptr, aligned_mode{});
10041017
}
10051018

10061019
/**
10071020
* @ingroup batch_data_transfer
10081021
*
1009-
* Creates a batch from the buffer \c ptr and the specifed
1010-
* batch value type \c To. The memory does not need to be aligned.
1022+
* Creates a batch from the buffer \c ptr. The
1023+
* memory does not need to be aligned.
10111024
* @param ptr the memory buffer to read
10121025
* @return a new batch instance
10131026
*/
1014-
template <class To, class A=default_arch, class From>
1015-
simd_return_type<From, To> load_as(From const* ptr, unaligned_mode) {
1016-
using batch_value_type = typename simd_return_type<From, To>::value_type;
1017-
return kernel::load_unaligned<A>(ptr, kernel::convert<batch_value_type>{}, A{});
1018-
}
1019-
1020-
template <class To, class A = default_arch>
1021-
simd_return_type<bool, To> load_as(bool const* ptr, unaligned_mode) {
1022-
return simd_return_type<bool, To>::load_unaligned(ptr);
1023-
}
1024-
1025-
template <class To, class A=default_arch, class From>
1026-
simd_return_type<std::complex<From>, To> load_as(std::complex<From> const* ptr, unaligned_mode)
1027-
{
1028-
using batch_value_type = typename simd_return_type<std::complex<From>, To>::value_type;
1029-
return kernel::load_complex_unaligned<A>(ptr, kernel::convert<batch_value_type>{}, A{});
1027+
template <class A=default_arch, class From>
1028+
batch<From, A> load_unaligned(From const* ptr) {
1029+
return load_as<From, A>(ptr, unaligned_mode{});
10301030
}
10311031

10321032
/**
@@ -1485,58 +1485,6 @@ auto ssub(T const& x, Tp const& y) -> decltype(x - y) {
14851485
return kernel::ssub<A>(B(x), B(y), A{});
14861486
}
14871487

1488-
/**
1489-
* @ingroup batch_data_transfer
1490-
*
1491-
* Copy content of batch \c val to the buffer \c mem. The
1492-
* memory does not need to be aligned.
1493-
* @param mem the memory buffer to write to
1494-
* @param val the batch to copy from
1495-
*/
1496-
template<class A, class T>
1497-
void store(T* mem, batch<T, A> const& val, aligned_mode={}) {
1498-
return kernel::store_aligned<A>(mem, val, A{});
1499-
}
1500-
1501-
/**
1502-
* @ingroup batch_data_transfer
1503-
*
1504-
* Copy content of batch \c val to the buffer \c mem. The
1505-
* memory does not need to be aligned.
1506-
* @param mem the memory buffer to write to
1507-
* @param val the batch to copy from
1508-
*/
1509-
template<class A, class T>
1510-
void store(T* mem, batch<T, A> const& val, unaligned_mode) {
1511-
return kernel::store_unaligned<A>(mem, val, A{});
1512-
}
1513-
1514-
/**
1515-
* @ingroup batch_data_transfer
1516-
*
1517-
* Copy content of batch \c val to the buffer \c mem. The
1518-
* memory needs to be aligned.
1519-
* @param mem the memory buffer to write to
1520-
* @param val the batch to copy from
1521-
*/
1522-
template<class A, class T>
1523-
void store_aligned(T* mem, batch<T, A> const& val) {
1524-
return kernel::store_aligned<A>(mem, val, A{});
1525-
}
1526-
1527-
/**
1528-
* @ingroup batch_data_transfer
1529-
*
1530-
* Copy content of batch \c val to the buffer \c mem. The
1531-
* memory does not need to be aligned.
1532-
* @param mem the memory buffer to write to
1533-
* @param val the batch to copy
1534-
*/
1535-
template<class A, class T>
1536-
void store_unaligned(T* mem, batch<T, A> const& val) {
1537-
return kernel::store_unaligned<A>(mem, val, A{});
1538-
}
1539-
15401488
/**
15411489
* @ingroup batch_data_transfer
15421490
*
@@ -1586,28 +1534,55 @@ void store_as(std::complex<To>* dst, batch<std::complex<From>, A> const& src, un
15861534
/**
15871535
* @ingroup batch_data_transfer
15881536
*
1589-
* Copy content of batch of boolean \c src to the buffer \c dst. The
1537+
* Copy content of batch \c val to the buffer \c mem. The
1538+
* memory does not need to be aligned.
1539+
* @param mem the memory buffer to write to
1540+
* @param val the batch to copy from
1541+
*/
1542+
template<class A, class T>
1543+
void store(T* mem, batch<T, A> const& val, aligned_mode={}) {
1544+
store_as<T, A>(mem, val, aligned_mode{});
1545+
}
1546+
1547+
/**
1548+
* @ingroup batch_data_transfer
1549+
*
1550+
* Copy content of batch \c val to the buffer \c mem. The
1551+
* memory does not need to be aligned.
1552+
* @param mem the memory buffer to write to
1553+
* @param val the batch to copy from
1554+
*/
1555+
template<class A, class T>
1556+
void store(T* mem, batch<T, A> const& val, unaligned_mode) {
1557+
store_as<T, A>(mem, val, unaligned_mode{});
1558+
}
1559+
1560+
/**
1561+
* @ingroup batch_data_transfer
1562+
*
1563+
* Copy content of batch \c val to the buffer \c mem. The
15901564
* memory needs to be aligned.
15911565
* @param mem the memory buffer to write to
1592-
* @param val the batch to copy
1566+
* @param val the batch to copy from
15931567
*/
1594-
template <class To, class A=default_arch, class From>
1595-
void store_batch(To* dst, batch_bool<From, A> const& src, aligned_mode) {
1596-
kernel::store(src, dst, A{});
1568+
template<class A, class T>
1569+
void store_aligned(T* mem, batch<T, A> const& val) {
1570+
store_as<T, A>(mem, val, aligned_mode{});
15971571
}
15981572

15991573
/**
16001574
* @ingroup batch_data_transfer
16011575
*
1602-
* Copy content of batch of boolean \c src to the buffer \c dst. The
1576+
* Copy content of batch \c val to the buffer \c mem. The
16031577
* memory does not need to be aligned.
16041578
* @param mem the memory buffer to write to
16051579
* @param val the batch to copy
16061580
*/
1607-
template <class To, class A=default_arch, class From>
1608-
void store_batch(To* dst, batch_bool<From, A> const& src, unaligned_mode) {
1609-
kernel::store(src, dst, A{});
1581+
template<class A, class T>
1582+
void store_unaligned(T* mem, batch<T, A> const& val) {
1583+
store_as<T, A>(mem, val, unaligned_mode{});
16101584
}
1585+
16111586
/**
16121587
* @ingroup batch_arithmetic
16131588
*

0 commit comments

Comments
 (0)