11 #ifndef DIST_MATRIX_CUDA_HPP
12 #define DIST_MATRIX_CUDA_HPP
18 #include <thrust/inner_product.h>
19 #include <thrust/extrema.h>
39 template<
typename MatT,
typename VecT>
49 typedef typename thrust::device_vector<MatT>
DevMatV;
50 typedef typename thrust::device_vector<VecT>
DevVecV;
59 VecT sum =
Mv[vtx] *
v[vtx];
62 for(
uint i = 0; i <
nbs; i++, edg++)
64 sum +=
Me[edg] *
v[
Nb[edg]];
78 typedef typename thrust::device_vector<T>
DevMatV;
88 for(
int i = 0; i < T::numrows; ++i)
103 thrust::device_vector<T> *_Av, thrust::device_vector<T> *_Ae,
uint _nbs)
110 const T &a =
Av[vtx];
144 struct minElementOP :
public thrust::unary_function<T, typename T::value_type>
154 for(
uint i = 0; i <
n; i++) {
165 struct maxElementOP :
public thrust::unary_function<T, typename T::value_type>
175 for(
uint i = 0; i <
n; i++) {
194 for(
uint i = 0; i < T::numElems; ++i)
202 struct addOP :
public thrust::binary_function<T, T, T>
217 template <
typename T>
218 int multGPU(T *v,
typename T::value_type::value_type a, T *r)
230 int multGPU(T *v1, T *v2,
typename T::value_type::value_type &r)
235 typename T::value_type
init;
236 for(
uint i = 0; i < T::value_type::numElems; ++i)
240 #ifdef THRUST_BACKEND_CUDA
241 typename T::value_type res = thrust::inner_product(thrust::cuda::par(mgx::cachedAlloc), v1->begin(), v1->end(),
244 typename T::value_type res = thrust::inner_product(v1->begin(), v1->end(),
248 for(
uint i = 0; i < T::value_type::numElems; ++i)
255 template <
typename TM,
typename TV>
258 if(!nb or !r or !mv or !me or !v)
262 uint nbs = me->size()/mv->size();
263 uint n = nb->size()/nbs;
265 thrust::counting_iterator<int, thrust::device_system_tag> first(0);
266 thrust::counting_iterator<int, thrust::device_system_tag> last(
n);
279 thrust::counting_iterator<int, thrust::device_system_tag> first(0);
280 thrust::counting_iterator<int, thrust::device_system_tag> last(r->size());
293 thrust::transform(v1->begin(), v1->end(), v2->begin(), r->begin(), thrust::plus<typename T::value_type>());
305 thrust::transform(v1->begin(), v1->end(), v2->begin(), r->begin(), thrust::minus<typename T::value_type>());
312 int saxpyGPU(T *v1, T *v2,
typename T::value_type::value_type a, T *r)
324 int fillGPU(
typename T::value_type::value_type a, T *r)
329 typename T::value_type fill(a);
331 thrust::fill(r->begin(), r->end(), fill);
338 int minGPU(T *v,
typename T::value_type::value_type &r)
343 thrust::device_vector<typename T::value_type::value_type> s(v->size());
346 #ifdef THRUST_BACKEND_CUDA
347 r = *thrust::min_element(thrust::cuda::par(mgx::cachedAlloc), s.begin(), s.end());
349 r = *thrust::min_element(s.begin(), s.end());
357 int maxGPU(T *v,
typename T::value_type::value_type &r)
362 thrust::device_vector<typename T::value_type::value_type> s(v->size());
365 #ifdef THRUST_BACKEND_CUDA
366 r = *thrust::max_element(thrust::cuda::par(mgx::cachedAlloc), s.begin(), s.end());
368 r = *thrust::max_element(s.begin(), s.end());
378 if(!nb or !mv or !me or !av or !ae)
382 uint nbs = me->size()/mv->size();
383 uint n = nb->size()/nbs;
385 thrust::counting_iterator<int, thrust::device_system_tag> first(0);
386 thrust::counting_iterator<int, thrust::device_system_tag> last(
n);