VecCore 0.8.1
C++ Library for Portable SIMD Vectorization
Loading...
Searching...
No Matches
Implementation.h
Go to the documentation of this file.
1#ifndef VECCORE_BACKEND_IMPLEMENTATION_H
2#define VECCORE_BACKEND_IMPLEMENTATION_H
3
4#include "Interface.h"
5#include "../Limits.h"
6
7#include <algorithm>
8#include <type_traits>
9
10namespace vecCore {
11
12template <typename T>
15constexpr size_t VectorSize()
16{
17 using V = typename std::decay<T>::type;
19}
20
21template <typename T>
24constexpr size_t VectorSize(const T &)
25{
26 return VectorSize<T>();
27}
28
29
30// Iterators
31
32template <typename T>
35 {
36 Scalar<T> *addr = (Scalar<T> *)(&v);
37 return addr;
38 }
39
41 {
42 Scalar<T> *addr = (Scalar<T> *)(&v);
43 return addr + sizeof(v);
44 }
45
47 {
48 Scalar<T> const *addr = (Scalar<T> *)(&v);
49 return addr;
50 }
51
53 {
54 Scalar<T> const *addr = (Scalar<T> *)(&v);
55 return addr + sizeof(v);
56 }
57};
58
59template <typename T>
63{
65}
66
67template <typename T>
71{
73}
74
75template <typename T>
78Scalar<T> const *Begin(T const &v)
79{
81}
82
83template <typename T>
86Scalar<T> const *End(T const &v)
87{
89}
90
91// Get/Set
92
93template <typename T>
95 VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE static Scalar<T> Get(const T &v, size_t i) { return *(Begin(v) + i); }
96
97 VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE static void Set(T &v, size_t i, Scalar<T> const val)
98 {
99 *(Begin(v) + i) = val;
100 }
101};
102
103template <typename T>
106Scalar<T> Get(const T &v, size_t i)
107{
109}
110
111template <typename T>
114void Set(T &v, size_t i, Scalar<T> const val)
115{
117}
118
119// Load/Store
120
121template <typename T>
123 template <typename S = Scalar<T>>
126 static void Load(T &v, S const *ptr)
127 {
128 for (size_t i = 0; i < VectorSize<T>(); ++i)
129 Set(v, i, ptr[i]);
130 }
131
132 template <typename S = Scalar<T>>
135 static void Store(T const &v, S *ptr)
136 {
137 for (size_t i = 0; i < VectorSize<T>(); ++i)
138 ptr[i] = static_cast<S>(Get(v, i));
139 }
140};
141
142template <typename T>
145void Load(T &v, Scalar<T> const *ptr)
146{
148}
149
150template <typename T>
153T Load(Scalar<T> const *ptr)
154{
155 T v;
157 return v;
158}
159
160template <typename T>
163void Store(T const &v, Scalar<T> *ptr)
164{
166}
167
168// Gather/Scatter
169
170template <typename T>
172 template <typename S = Scalar<T>>
175 static void Gather(T &v, S const *ptr, Index<T> const &idx)
176 {
177 for (size_t i = 0; i < VectorSize<T>(); ++i)
178 Set(v, i, ptr[Get(idx, i)]);
179 }
180
181 template <typename S = Scalar<T>>
184 static void Scatter(T const &v, S *ptr, Index<T> const &idx)
185 {
186 for (size_t i = 0; i < VectorSize<T>(); ++i)
187 ptr[Get(idx, i)] = Get(v, i);
188 }
189};
190
191template <typename T, typename S>
194T Gather(S const *ptr, Index<T> const &idx)
195{
196 T v;
197 GatherScatterImplementation<T>::template Gather<S>(v, ptr, idx);
198 return v;
199}
200
201template <typename T, typename S>
204void Scatter(T const &v, S *ptr, Index<T> const &idx)
205{
206 GatherScatterImplementation<T>::template Scatter<S>(v, ptr, idx);
207}
208
209// Masking
210
211template <typename M>
213bool MaskFull(const M &mask)
214{
215 for (size_t i = 0; i < VectorSize<M>(); i++)
216 if (!Get(mask, i)) return false;
217 return true;
218}
219
220template <typename M>
222bool MaskEmpty(const M &mask)
223{
224 for (size_t i = 0; i < VectorSize<M>(); i++)
225 if (Get(mask, i)) return false;
226 return true;
227}
228
229// Split generic scalar/vector implementations to avoid performance loss
230
231template <typename T, bool>
235 static void Assign(T &dst, Mask<T> const &mask, T const &src)
236 {
237 for (size_t i = 0; i < VectorSize<T>(); i++)
238 if (Get(mask, i)) Set(dst, i, Get(src, i));
239 }
240
243 static void Blend(T &dst, Mask<T> const &mask, T const &src1, T const &src2)
244 {
245 for (size_t i = 0; i < VectorSize<T>(); i++)
246 Set(dst, i, Get(mask, i) ? Get(src1, i) : Get(src2, i));
247 }
248};
249
250template <typename T>
254 static void Assign(T &dst, Mask<T> const &mask, T const &src)
255 {
256 if (mask) dst = src;
257 }
258
261 static void Blend(T &dst, Mask<T> const &mask, T const &src1, T const &src2) { dst = mask ? src1 : src2; }
262};
263
264template <typename T>
268 static void Assign(T &dst, Mask<T> const &mask, T const &src)
269 {
271 }
272
275 static void Blend(T &dst, Mask<T> const &mask, T const &src1, T const &src2)
276 {
278 }
279};
280
281template <typename T>
284void MaskedAssign(T &dst, const Mask<T> &mask, const T &src)
285{
286 MaskingImplementation<T>::Assign(dst, mask, src);
287}
288
289template <typename T>
292T Blend(const Mask<T> &mask, const T &src1, const T &src2)
293{
294 T v;
295 MaskingImplementation<T>::Blend(v, mask, src1, src2);
296 return v;
297}
298
299// Miscellaneous
300
302{
303#ifdef VECCORE_CUDA_DEVICE_COMPILATION
304 return false;
305#else
306 return true;
307#endif
308}
309
310template <typename T>
312constexpr bool EarlyReturnMaxLength(T &, size_t n)
313{
314 return EarlyReturnAllowed() && VectorSize<T>() <= n;
315}
316
317// Reduction
318
319template <typename T>
323{
324 Scalar<T> result(0);
325 for (size_t i = 0; i < VectorSize<T>(); ++i)
326 result += Get(v, i);
327 return result;
328}
329
330template <typename T>
334{
336 for (size_t i = 0; i < VectorSize<T>(); ++i)
337 if (Get(v, i) < result)
338 result = Get(v, i);
339 return result;
340}
341
342template <typename T>
346{
347 Scalar<T> result(NumericLimits<Scalar<T>>::Lowest());
348 for (size_t i = 0; i < VectorSize<T>(); ++i)
349 if (Get(v, i) > result)
350 result = Get(v, i);
351 return result;
352}
353
354template<typename Vout, typename Vin>
355Vout Convert(const Vin& v)
356{
357 Vout out;
358 static_assert(VectorSize<Vin>() == VectorSize<Vout>(),
359 "Cannot convert SIMD vectors of different sizes");
360 for (size_t i = 0; i < VectorSize<Vin>(); ++i)
361 Set(out, i, Get(v, i));
362 return out;
363}
364
365} // namespace vecCore
366
367#endif
#define VECCORE_ATT_HOST_DEVICE
Definition: CUDA.h:10
#define VECCORE_FORCE_INLINE
Definition: Common.h:32
VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE T Max(const T &a, const T &b)
Definition: VecMath.h:79
VECCORE_ATT_HOST_DEVICE bool MaskEmpty(const M &mask)
VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE void MaskedAssign(T &dst, const Mask< T > &mask, const T &src)
VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE Scalar< T > Get(const T &v, size_t i)
typename TypeTraits< T >::ScalarType Scalar
Definition: Interface.h:16
typename TypeTraits< T >::MaskType Mask
Definition: Interface.h:10
VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE T Blend(const Mask< T > &mask, const T &src1, const T &src2)
VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE void Set(T &v, size_t i, Scalar< T > const val)
VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE Scalar< T > ReduceMax(const T &v)
VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE void Scatter(T const &v, S *ptr, Index< T > const &idx)
VECCORE_ATT_HOST_DEVICE bool MaskFull(const M &mask)
VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE constexpr size_t VectorSize()
VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE Scalar< T > * End(T &v)
VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE void Load(T &v, Scalar< T > const *ptr)
VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE constexpr bool EarlyReturnAllowed()
VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE Scalar< T > * Begin(T &v)
typename TypeTraits< T >::IndexType Index
Definition: Interface.h:13
Vout Convert(const Vin &v)
VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE T Gather(S const *ptr, Index< T > const &idx)
VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE constexpr bool EarlyReturnMaxLength(T &, size_t n)
VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE Scalar< T > ReduceMin(const T &v)
VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE void Store(T const &v, Scalar< T > *ptr)
VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE Scalar< T > ReduceAdd(const T &v)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Scatter(T const &v, S *ptr, Index< T > const &idx)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Gather(T &v, S const *ptr, Index< T > const &idx)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Blend(T &dst, Mask< T > const &mask, T const &src1, T const &src2)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Assign(T &dst, Mask< T > const &mask, T const &src)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Assign(T &dst, Mask< T > const &mask, T const &src)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Blend(T &dst, Mask< T > const &mask, T const &src1, T const &src2)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Set(T &v, size_t i, Scalar< T > const val)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE Scalar< T > Get(const T &v, size_t i)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE Scalar< T > const * End(const T &v)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE Scalar< T > * End(T &v)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE Scalar< T > * Begin(T &v)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE Scalar< T > const * Begin(const T &v)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Load(T &v, S const *ptr)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Store(T const &v, S *ptr)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Blend(T &dst, Mask< T > const &mask, T const &src1, T const &src2)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Assign(T &dst, Mask< T > const &mask, T const &src)