VecCore 0.8.1
C++ Library for Portable SIMD Vectorization
Loading...
Searching...
No Matches
SIMD.h
Go to the documentation of this file.
1#ifndef VECCORE_BACKEND_STD_SIMD_H
2#define VECCORE_BACKEND_STD_SIMD_H
3
4#if __cplusplus >= 202002L && defined(__has_include)
5#if __has_include(<experimental/simd>)
6#define VECCORE_ENABLE_STD_SIMD
7#endif
8#endif
9
10#ifdef VECCORE_ENABLE_STD_SIMD
11
12#include <experimental/simd>
13
14namespace vecCore {
15
16template <typename T, class Abi>
17struct TypeTraits<std::experimental::simd_mask<T, Abi>> {
18 using IndexType = typename std::experimental::simd_mask<T, Abi>::simd_type;
19 using ScalarType = typename std::experimental::simd_mask<T, Abi>::value_type;
20 static constexpr size_t Size = std::experimental::simd<T, Abi>::size();
21};
22
23template <typename T, class Abi>
24struct TypeTraits<std::experimental::simd<T, Abi>> {
25 using MaskType = typename std::experimental::simd<T, Abi>::mask_type;
26 using IndexType = typename std::experimental::simd<T, Abi>;
27 using ScalarType = typename std::experimental::simd<T, Abi>::value_type;
28 static constexpr size_t Size = std::experimental::simd<T, Abi>::size();
29};
30
31namespace backend {
32
33template <class Abi> class SIMD {
34public:
35 using Real_v = std::experimental::simd<Real_s, Abi>;
36 using Float_v = std::experimental::simd<float, Abi>;
37 using Double_v = std::experimental::simd<double, Abi>;
38
39 using Int_v = std::experimental::simd<int, Abi>;
40 using Int16_v = std::experimental::simd<int16_t, Abi>;
41 using Int32_v = std::experimental::simd<int32_t, Abi>;
42 using Int64_v = std::experimental::simd<int64_t, Abi>;
43
44 using UInt_v = std::experimental::simd<unsigned int, Abi>;
45 using UInt16_v = std::experimental::simd<uint16_t, Abi>;
46 using UInt32_v = std::experimental::simd<uint32_t, Abi>;
47 using UInt64_v = std::experimental::simd<uint64_t, Abi>;
48};
49
50class SIMDNative {
51public:
52 using Real_v = std::experimental::native_simd<Real_s>;
53 using Float_v = std::experimental::native_simd<float>;
54 using Double_v = std::experimental::native_simd<double>;
55
56 using Int_v = std::experimental::native_simd<int>;
57 using Int16_v = std::experimental::native_simd<int16_t>;
58 using Int32_v = std::experimental::native_simd<int32_t>;
59 using Int64_v = std::experimental::native_simd<int64_t>;
60
61 using UInt_v = std::experimental::native_simd<unsigned int>;
62 using UInt16_v = std::experimental::native_simd<uint16_t>;
63 using UInt32_v = std::experimental::native_simd<uint32_t>;
64 using UInt64_v = std::experimental::native_simd<uint64_t>;
65};
66
67using SIMDScalar = SIMD<std::experimental::simd_abi::scalar>;
68
69template <size_t N>
70using SIMDVector = SIMD<std::experimental::simd_abi::fixed_size<N>>;
71
72} // namespace backend
73
74template <typename T, class Abi>
75bool MaskEmpty(std::experimental::simd_mask<T, Abi> mask) {
76 for (int i = 0; i < mask.size(); ++i)
77 if (mask[i])
78 return false;
79 return true;
80}
81
82template <typename T, class Abi>
83bool MaskFull(std::experimental::simd_mask<T, Abi> mask) {
84 for (int i = 0; i < mask.size(); ++i)
85 if (!mask[i])
86 return false;
87 return true;
88}
89
90template <typename T, class Abi>
91struct IndexingImplementation<std::experimental::simd<T, Abi>> {
92 using V = std::experimental::simd<T, Abi>;
93
94 static inline T Get(const V &v, size_t i) { return v[i]; }
95 static inline void Set(V &v, size_t i, T const val) { v[i] = val; }
96};
97
98template <typename T, class Abi>
99struct IndexingImplementation<std::experimental::simd_mask<T, Abi>> {
100 using V = std::experimental::simd_mask<T, Abi>;
101
102 static inline T Get(const V &v, size_t i) { return v[i]; }
103 static inline void Set(V &v, size_t i, T const val) { v[i] = !!val; }
104};
105
106template <typename T, class Abi>
107struct LoadStoreImplementation<std::experimental::simd<T, Abi>> {
108 using V = std::experimental::simd<T, Abi>;
109
110 template <typename S = T> static inline void Load(V &v, S const *ptr) {
111 for (size_t i = 0; i < V::size(); ++i)
112 v[i] = ptr[i];
113 }
114
115 template <typename S = T> static inline void Store(V const &v, S *ptr) {
116 for (size_t i = 0; i < V::size(); ++i)
117 ptr[i] = static_cast<S>(v[i]);
118 }
119};
120
121template <typename T, class Abi>
122struct LoadStoreImplementation<std::experimental::simd_mask<T, Abi>> {
123 using V = typename std::experimental::simd_mask<T, Abi>;
124
125 template <typename S = T> static inline void Load(V &v, S const *ptr) {
126 for (size_t i = 0; i < V::size(); ++i)
127 v[i] = !!ptr[i];
128 }
129
130 template <typename S = T> static inline void Store(V const &v, S *ptr) {
131 for (size_t i = 0; i < V::size(); ++i)
132 ptr[i] = static_cast<S>(v[i]);
133 }
134};
135
136template <typename T, class Abi>
137struct MaskingImplementation<std::experimental::simd<T, Abi>> {
138 using V = typename std::experimental::simd<T, Abi>;
139 using M = typename std::experimental::simd<T, Abi>::mask_type;
140
141 static inline void Assign(V &dst, M const &mask, V const &src) {
142 where(mask, dst) = src;
143 }
144
145 static inline void Blend(V &dst, M const &mask, V const &src1,
146 V const &src2) {
147 where(mask, dst) = src1;
148 where(!mask, dst) = src2;
149 }
150};
151
152template <class Abi>
153struct GatherScatterImplementation<std::experimental::simd<float, Abi>> {
154 using V = typename std::experimental::simd<float, Abi>;
155
156 template <typename S = float>
157 static inline void Gather(V &v, S const *ptr, V const &idx) {
158 auto ii = std::experimental::static_simd_cast<int, float, Abi>(idx);
159 for (size_t i = 0; i < V::size(); ++i)
160 v[i] = ptr[ii[i]];
161 }
162
163 template <typename S = float>
164 static inline void Scatter(V const &v, S *ptr, V const &idx) {
165 auto ii = std::experimental::static_simd_cast<int, float, Abi>(idx);
166 for (size_t i = 0; i < V::size(); ++i)
167 ptr[ii[i]] = v[i];
168 }
169};
170
171template <class Abi>
172struct GatherScatterImplementation<std::experimental::simd<double, Abi>> {
173 using V = typename std::experimental::simd<double, Abi>;
174
175 template <typename S = double>
176 static inline void Gather(V &v, S const *ptr, V const &idx) {
177 auto ii = std::experimental::static_simd_cast<int64_t, double, Abi>(idx);
178 for (size_t i = 0; i < V::size(); ++i)
179 v[i] = ptr[ii[i]];
180 }
181
182 template <typename S = double>
183 static inline void Scatter(V const &v, S *ptr, V const &idx) {
184 auto ii = std::experimental::static_simd_cast<int64_t, double, Abi>(idx);
185 for (size_t i = 0; i < V::size(); ++i)
186 ptr[ii[i]] = v[i];
187 }
188};
189
190} // namespace vecCore
191
192#endif
193#endif
VECCORE_ATT_HOST_DEVICE bool MaskEmpty(const M &mask)
VECCORE_ATT_HOST_DEVICE bool MaskFull(const M &mask)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Scatter(T const &v, S *ptr, Index< T > const &idx)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Gather(T &v, S const *ptr, Index< T > const &idx)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Set(T &v, size_t i, Scalar< T > const val)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE Scalar< T > Get(const T &v, size_t i)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Load(T &v, S const *ptr)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Store(T const &v, S *ptr)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Blend(T &dst, Mask< T > const &mask, T const &src1, T const &src2)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Assign(T &dst, Mask< T > const &mask, T const &src)
static constexpr size_t Size
Definition: Scalar.h:14
size_t IndexType
Definition: Scalar.h:13