suanPan
🧮 An Open Source, Parallel and Heterogeneous Finite Element Analysis Framework
Loading...
Searching...
No Matches
SparseMatCUDA.hpp
Go to the documentation of this file.
1/*******************************************************************************
2 * Copyright (C) 2017-2026 Theodore Chang
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 ******************************************************************************/
33// ReSharper disable CppCStyleCast
34#ifndef SPARSEMATCUDA_HPP
35#define SPARSEMATCUDA_HPP
36
37#include "../SparseMat.hpp"
38#include "../csr_form.hpp"
39#include "../cuda_ptr.hpp"
40
41#include <cusolverSp.h>
42
43template<sp_d T> class SparseMatCUDA final : public SparseMat<T> {
44 cusolverSpHandle_t handle = nullptr;
45 cudaStream_t stream = nullptr;
46 cusparseMatDescr_t descr = nullptr;
47
48 cuda_ptr d_val_idx{}, d_col_idx{}, d_row_ptr{};
49
50 void init_config() {
51 cusolverSpCreate(&handle);
52 cudaStreamCreate(&stream);
53 cusolverSpSetStream(handle, stream);
54 cusparseCreateMatDescr(&descr);
55 cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL);
56 cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO);
57 d_row_ptr = cuda_ptr(sizeof(int), static_cast<int>(this->n_rows) + 1);
58 this->factored = false;
59 }
60
61 void release() const {
62 if(handle) cusolverSpDestroy(handle);
63 if(stream) cudaStreamDestroy(stream);
64 if(descr) cusparseDestroyMatDescr(descr);
65 }
66
67 template<sp_d ET> void device_alloc(csr_form<ET, int>&& csr_mat) {
68 d_val_idx = cuda_ptr(sizeof(ET), csr_mat.n_elem);
69 d_col_idx = cuda_ptr(sizeof(int), csr_mat.n_elem);
70
71 d_val_idx.copy_from(csr_mat.val_mem(), stream);
72 d_col_idx.copy_from(csr_mat.col_mem(), stream);
73 d_row_ptr.copy_from(csr_mat.row_mem(), stream);
74 }
75
76protected:
78
79 int direct_solve(Mat<T>&, const Mat<T>&) override;
80
81public:
82 SparseMatCUDA(const uword in_row, const uword in_col, const uword in_elem = 0)
83 : SparseMat<T>(in_row, in_col, in_elem) { init_config(); }
84
86 : SparseMat<T>(other) { init_config(); }
87
91
92 ~SparseMatCUDA() override { release(); }
93
94 unique_ptr<MetaMat<T>> unique_copy() override { return std::make_unique<SparseMatCUDA>(*this); }
95};
96
97template<sp_d T> int SparseMatCUDA<T>::direct_solve(Mat<T>& X, const Mat<T>& B) {
98 const auto single_precision = std::is_same_v<T, float> || Precision::MIXED == this->setting.precision;
99
100 if(!this->factored) {
101 this->factored = true;
102 single_precision ? device_alloc(csr_form<float, int>(this->triplet_mat)) : device_alloc(csr_form<double, int>(this->triplet_mat));
103 }
104
105 const size_t unit_size = single_precision ? sizeof(float) : sizeof(double);
106
107 const cuda_ptr d_b{unit_size, static_cast<int>(B.n_elem)}, d_x{unit_size, static_cast<int>(B.n_elem)};
108
109 int singularity;
110 Col<int> code(B.n_cols);
111
112 if constexpr(std::is_same_v<T, float>) {
113 d_b.copy_from(B.memptr(), stream);
114
115 for(auto I = 0llu, J = 0llu; I < B.n_elem; I += B.n_rows, ++J) code[J] = cusolverSpScsrlsvqr(handle, static_cast<int>(this->n_rows), d_val_idx.size, descr, d_val_idx.get<float>(), d_row_ptr.get(), d_col_idx.get(), d_b.get<float>(I), std::numeric_limits<float>::epsilon(), 3, d_x.get<float>(I), &singularity);
116
117 if(0 == code.max()) {
118 X.set_size(arma::size(B));
119 d_x.copy_to(X.memptr(), stream);
120 }
121 }
122 else if(Precision::FULL == this->setting.precision) {
123 d_b.copy_from(B.memptr(), stream);
124
125 for(auto I = 0llu, J = 0llu; I < B.n_elem; I += B.n_rows, ++J) code[J] = cusolverSpDcsrlsvqr(handle, static_cast<int>(this->n_rows), d_val_idx.size, descr, d_val_idx.get<double>(), d_row_ptr.get(), d_col_idx.get(), d_b.get<double>(I), std::numeric_limits<double>::epsilon(), 3, d_x.get<double>(I), &singularity);
126
127 if(0 == code.max()) {
128 X.set_size(arma::size(B));
129 d_x.copy_to(X.memptr(), stream);
130 }
131 }
132 else {
133 X = arma::zeros(arma::size(B));
134
135 mat full_residual = B;
136
137 std::uint8_t counter{0};
138 while(counter++ < this->setting.iterative_refinement) {
139 const auto multiplier = norm(full_residual);
140 if(multiplier < this->setting.tolerance) break;
141 suanpan_debug("Mixed precision algorithm multiplier: {:.5E}.\n", multiplier);
142
143 auto residual = conv_to<fmat>::from(full_residual / multiplier);
144 d_b.copy_from(residual.memptr(), stream);
145
146 for(auto I = 0llu, J = 0llu; I < B.n_elem; I += B.n_rows, ++J) code[J] = cusolverSpScsrlsvqr(handle, static_cast<int>(this->n_rows), d_val_idx.size, descr, d_val_idx.get<float>(), d_row_ptr.get(), d_col_idx.get(), d_b.get<float>(I), std::numeric_limits<float>::epsilon(), 3, d_x.get<float>(I), &singularity);
147 if(0 != code.max()) break;
148
149 d_x.copy_to(residual.memptr(), stream);
150 full_residual = B - this->operator*(X += multiplier * conv_to<mat>::from(residual));
151 }
152 }
153
154 return 0 == code.max() ? SUANPAN_SUCCESS : SUANPAN_FAIL;
155}
156
157#endif
158
const uword n_rows
Definition MetaMat.hpp:115
bool factored
Definition MetaMat.hpp:76
A SparseMatCUDA class that holds matrices.
Definition SparseMatCUDA.hpp:43
SparseMatCUDA & operator=(const SparseMatCUDA &)=delete
SparseMatCUDA & operator=(SparseMatCUDA &&)=delete
~SparseMatCUDA() override
Definition SparseMatCUDA.hpp:92
SparseMatCUDA(const uword in_row, const uword in_col, const uword in_elem=0)
Definition SparseMatCUDA.hpp:82
SparseMatCUDA(const SparseMatCUDA &other)
Definition SparseMatCUDA.hpp:85
SparseMatCUDA(SparseMatCUDA &&)=delete
unique_ptr< MetaMat< T > > unique_copy() override
Definition SparseMatCUDA.hpp:94
A SparseMat class that holds matrices.
Definition SparseMat.hpp:34
Definition csr_form.hpp:25
Definition cuda_ptr.hpp:32
auto copy_from(const void *src, const cudaStream_t s) const
Definition cuda_ptr.hpp:67
int direct_solve(Mat< T > &, const Mat< T > &) override
Definition SparseMatCUDA.hpp:97
op_scale< T > operator*(const T value, const shared_ptr< MetaMat< T > > &M)
Definition operator_times.hpp:23
#define suanpan_debug(...)
Definition suanPan.h:347
constexpr auto SUANPAN_SUCCESS
Definition suanPan.h:166
constexpr auto SUANPAN_FAIL
Definition suanPan.h:167