VecCore 0.8.1
C++ Library for Portable SIMD Vectorization
Loading...
Searching...
No Matches
UMESimdCommon.h
Go to the documentation of this file.
1// internal header; common parts to multiple UMESIMD backends
2#ifndef VECCORE_UMESIMDCOMMON_H
3#define VECCORE_UMESIMDCOMMON_H
4
5namespace vecCore {
6
7// type traits for UME::SIMD
8
9template <uint32_t N>
10struct TypeTraits<UME::SIMD::SIMDVecMask<N>> {
11 using MaskType = typename UME::SIMD::SIMDVecMask<N>;
12 using IndexType = int;
13 using ScalarType = bool;
14 static constexpr size_t Size = N;
15};
16
17template <typename T, uint32_t N>
18struct TypeTraits<UME::SIMD::SIMDVec_f<T, N>> {
19 using ScalarType = T;
20 using MaskType = typename UME::SIMD::SIMDVecMask<N>;
21 using IndexType = typename UME::SIMD::SIMDVec_u<uint32_t, N>;
22 static constexpr size_t Size = N;
23};
24
25template <typename T, uint32_t N>
26struct TypeTraits<UME::SIMD::SIMDVec_i<T, N>> {
27 using ScalarType = T;
28 using MaskType = typename UME::SIMD::SIMDVecMask<N>;
29 using IndexType = typename UME::SIMD::SIMDVec_u<uint32_t, N>;
30 static constexpr size_t Size = N;
31};
32
33template <typename T, uint32_t N>
34struct TypeTraits<UME::SIMD::SIMDVec_u<T, N>> {
35 using ScalarType = T;
36 using MaskType = typename UME::SIMD::SIMDVecMask<N>;
37 using IndexType = typename UME::SIMD::SIMDVec_u<uint32_t, N>;
38 static constexpr size_t Size = N;
39};
40
41// backend functions for UME::SIMD
42
43template <uint32_t N>
45bool MaskFull(const UME::SIMD::SIMDVecMask<N> &cond)
46{
47 return cond.hland();
48}
49
50template <uint32_t N>
52bool MaskEmpty(const UME::SIMD::SIMDVecMask<N> &cond)
53{
54 return !cond.hlor();
55}
56
57template <uint32_t N>
58struct IndexingImplementation<UME::SIMD::SIMDVecMask<N>> {
59 using M = UME::SIMD::SIMDVecMask<N>;
60
61 VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE static bool Get(const M &mask, int i) { return mask[i]; }
62
63 VECCORE_FORCE_INLINE VECCORE_ATT_HOST_DEVICE static void Set(M &mask, int i, const bool val)
64 {
65 mask.insert(i, val);
66 }
67};
68
69template <typename T, uint32_t N>
70struct LoadStoreImplementation<UME::SIMD::SIMDVec_f<T, N>> {
71 using V = UME::SIMD::SIMDVec_f<T, N>;
72
73 template <typename S = Scalar<V>>
74 static inline void Load(V &v, S const *ptr)
75 {
76 v.load(ptr);
77 }
78
79 template <typename S = Scalar<V>>
80 static inline void Store(V const &v, S *ptr)
81 {
82 v.store(ptr);
83 }
84};
85
86template <typename T, uint32_t N>
87struct LoadStoreImplementation<UME::SIMD::SIMDVec_i<T, N>> {
88 using V = UME::SIMD::SIMDVec_i<T, N>;
89
90 template <typename S = Scalar<V>>
91 static inline void Load(V &v, S const *ptr)
92 {
93 v.load(ptr);
94 }
95
96 template <typename S = Scalar<V>>
97 static inline void Store(V const &v, S *ptr)
98 {
99 v.store(ptr);
100 }
101};
102
103template <typename T, uint32_t N>
104struct LoadStoreImplementation<UME::SIMD::SIMDVec_u<T, N>> {
105 using V = UME::SIMD::SIMDVec_u<T, N>;
106
107 template <typename S = Scalar<V>>
108 static inline void Load(V &v, S const *ptr)
109 {
110 v.load(ptr);
111 }
112
113 template <typename S = Scalar<V>>
114 static inline void Store(V const &v, S *ptr)
115 {
116 v.store(ptr);
117 }
118};
119
120template <uint32_t N>
121struct LoadStoreImplementation<UME::SIMD::SIMDVecMask<N>> {
122 using M = UME::SIMD::SIMDVecMask<N>;
123
124 template <typename S = Scalar<M>>
125 static inline void Load(M &mask, S const *ptr)
126 {
127 mask.load(ptr);
128 }
129
130 template <typename S = Scalar<M>>
131 static inline void Store(M const &mask, S *ptr)
132 {
133 mask.store(ptr);
134 }
135};
136
137template <typename T, uint32_t N>
138struct MaskingImplementation<UME::SIMD::SIMDVec_f<T, N>> {
139 using V = UME::SIMD::SIMDVec_f<T, N>;
140 using M = UME::SIMD::SIMDVecMask<N>;
141
142 static inline void Assign(V &dst, M const &mask, V const &src) { dst.assign(mask, src); }
143
144 static inline void Blend(V &dst, M const &mask, V const &src1, V const &src2) { dst = src2.blend(mask, src1); }
145};
146
147template <typename T, uint32_t N>
148struct MaskingImplementation<UME::SIMD::SIMDVec_i<T, N>> {
149 using V = UME::SIMD::SIMDVec_i<T, N>;
150 using M = UME::SIMD::SIMDVecMask<N>;
151
152 static inline void Assign(V &dst, M const &mask, V const &src) { dst.assign(mask, src); }
153
154 static inline void Blend(V &dst, M const &mask, V const &src1, V const &src2) { dst = src2.blend(mask, src1); }
155};
156
157template <typename T, uint32_t N>
158struct MaskingImplementation<UME::SIMD::SIMDVec_u<T, N>> {
159 using V = UME::SIMD::SIMDVec_u<T, N>;
160 using M = UME::SIMD::SIMDVecMask<N>;
161
162 static inline void Assign(V &dst, M const &mask, V const &src) { dst.assign(mask, src); }
163
164 static inline void Blend(V &dst, M const &mask, V const &src1, V const &src2) { dst = src2.blend(mask, src1); }
165};
166
167inline namespace math {
168
169template <typename T, uint32_t N>
171void SinCos(const UME::SIMD::SIMDVec_f<T, N> &x, UME::SIMD::SIMDVec_f<T, N> *s, UME::SIMD::SIMDVec_f<T, N> *c)
172{
173 *s = x.sin();
174 *c = x.cos();
175}
176
177#define UMESIMD_MATH_UNARY_FUNCTION(F, f) \
178template <typename T, uint32_t N> \
179VECCORE_FORCE_INLINE \
180typename UME::SIMD::SIMDVec_f<T, N> \
181F(const UME::SIMD::SIMDVec_f<T, N> &x) \
182{ return x.f(); }
183
184// UMESIMD_MATH_UNARY_FUNCTION(Abs, abs) // Generic implementation is faster
185
186// UMESIMD_MATH_UNARY_FUNCTION(Sin, sin) // broken
187// UMESIMD_MATH_UNARY_FUNCTION(Cos, cos) // broken
190
191// UMESIMD_MATH_UNARY_FUNCTION(Exp, exp) // broken
192// UMESIMD_MATH_UNARY_FUNCTION(Log, log) // broken
193// UMESIMD_MATH_UNARY_FUNCTION(Sqrt, sqrt) // slower than std::sqrt()
194// UMESIMD_MATH_UNARY_FUNCTION(Rsqrt, rsqrt) // slower than std::sqrt(1/x)
195
196UMESIMD_MATH_UNARY_FUNCTION(Round, round)
197// UMESIMD_MATH_UNARY_FUNCTION(Floor, floor) // slower than std::floor()
198// UMESIMD_MATH_UNARY_FUNCTION(Ceil, ceil) // slower than std::ceil()
199
200#undef UMESIMD_MATH_UNARY_FUNCTION
201
202template <typename T, uint32_t N>
204UME::SIMD::SIMDVecMask<N> IsInf(const UME::SIMD::SIMDVec_f<T, N> &x)
205{
206 return x.isinf();
207}
208
209} // end namespace math
210} // end namespace vecCore
211
212#endif
#define VECCORE_ATT_HOST_DEVICE
Definition: CUDA.h:10
#define VECCORE_FORCE_INLINE
Definition: Common.h:32
#define UMESIMD_MATH_UNARY_FUNCTION(F, f)
VECCORE_FORCE_INLINE void SinCos(const UME::SIMD::SIMDVec_f< T, N > &x, UME::SIMD::SIMDVec_f< T, N > *s, UME::SIMD::SIMDVec_f< T, N > *c)
VECCORE_FORCE_INLINE UME::SIMD::SIMDVecMask< N > IsInf(const UME::SIMD::SIMDVec_f< T, N > &x)
VECCORE_FORCE_INLINE Vc::SimdArray< T, N > Tan(const Vc::SimdArray< T, N > &x)
Definition: VcSimdArray.h:148
VECCORE_ATT_HOST_DEVICE bool MaskEmpty(const M &mask)
VECCORE_ATT_HOST_DEVICE bool MaskFull(const M &mask)
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE void Set(M &mask, int i, const bool val)
Definition: UMESimdCommon.h:63
VECCORE_FORCE_INLINE static VECCORE_ATT_HOST_DEVICE bool Get(const M &mask, int i)
Definition: UMESimdCommon.h:61
static void Assign(V &dst, M const &mask, V const &src)
static void Blend(V &dst, M const &mask, V const &src1, V const &src2)
static void Assign(V &dst, M const &mask, V const &src)
static void Blend(V &dst, M const &mask, V const &src1, V const &src2)
static void Assign(V &dst, M const &mask, V const &src)
static void Blend(V &dst, M const &mask, V const &src1, V const &src2)
typename UME::SIMD::SIMDVecMask< N > MaskType
Definition: UMESimdCommon.h:11
typename UME::SIMD::SIMDVec_u< uint32_t, N > IndexType
Definition: UMESimdCommon.h:21
typename UME::SIMD::SIMDVecMask< N > MaskType
Definition: UMESimdCommon.h:20
typename UME::SIMD::SIMDVecMask< N > MaskType
Definition: UMESimdCommon.h:28
typename UME::SIMD::SIMDVec_u< uint32_t, N > IndexType
Definition: UMESimdCommon.h:29
typename UME::SIMD::SIMDVec_u< uint32_t, N > IndexType
Definition: UMESimdCommon.h:37
typename UME::SIMD::SIMDVecMask< N > MaskType
Definition: UMESimdCommon.h:36
static constexpr size_t Size
Definition: Scalar.h:14