suanPan
SparseMatSuperLU.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * Copyright (C) 2017-2023 Theodore Chang
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  ******************************************************************************/
29 // ReSharper disable CppCStyleCast
30 #ifndef SPARSEMATSUPERLU_HPP
31 #define SPARSEMATSUPERLU_HPP
32 
33 #include <superlu-mt/superlu-mt.h>
34 #include "SparseMat.hpp"
35 #include "csc_form.hpp"
36 
37 template<sp_d T> class SparseMatSuperLU final : public SparseMat<T> {
38  SuperMatrix A{}, L{}, U{}, B{};
39 
40 #ifndef SUANPAN_SUPERLUMT
41  superlu_options_t options{};
42 
43  SuperLUStat_t stat{};
44 #else
45  const int ordering_num = 1;
46 
47  Gstat_t stat{};
48 #endif
49 
50  void* t_val = nullptr;
51  int* t_row = nullptr;
52  int* t_col = nullptr;
53 
54  int* perm_r = nullptr;
55  int* perm_c = nullptr;
56 
57  bool allocated = false;
58 
59  template<sp_d ET> void alloc_supermatrix(csc_form<ET, int>&&);
60  void dealloc_supermatrix();
61 
62  template<sp_d ET> void wrap_b(const Mat<ET>&);
63  template<sp_d ET> void tri_solve(int&);
64  template<sp_d ET> void full_solve(int&);
65 
66  int solve_trs(Mat<T>&, Mat<T>&&);
67  int solve_trs(Mat<T>&, const Mat<T>&);
68 
69 public:
70  SparseMatSuperLU(uword, uword, uword = 0);
72  SparseMatSuperLU(SparseMatSuperLU&&) noexcept = delete;
73  SparseMatSuperLU& operator=(const SparseMatSuperLU&) = delete;
74  SparseMatSuperLU& operator=(SparseMatSuperLU&&) noexcept = delete;
75  ~SparseMatSuperLU() override;
76 
77  void zeros() override;
78 
79  unique_ptr<MetaMat<T>> make_copy() override;
80 
81  int direct_solve(Mat<T>&, Mat<T>&&) override;
82  int direct_solve(Mat<T>&, const Mat<T>&) override;
83 };
84 
85 template<sp_d T> template<sp_d ET> void SparseMatSuperLU<T>::alloc_supermatrix(csc_form<ET, int>&& in) {
86  dealloc_supermatrix();
87 
88  auto t_size = sizeof(ET) * in.n_elem;
89  t_val = superlu_malloc(t_size);
90  memcpy(t_val, (void*)in.val_mem(), t_size);
91 
92  t_size = sizeof(int) * in.n_elem;
93  t_row = (int*)superlu_malloc(t_size);
94  memcpy(t_row, (void*)in.row_mem(), t_size);
95 
96  t_size = sizeof(int) * (in.n_cols + 1llu);
97  t_col = (int*)superlu_malloc(t_size);
98  memcpy(t_col, (void*)in.col_mem(), t_size);
99 
100  if(std::is_same_v<ET, double>) {
101  using E = double;
102  dCreate_CompCol_Matrix(&A, in.n_rows, in.n_cols, in.n_elem, (E*)t_val, t_row, t_col, Stype_t::SLU_NC, Dtype_t::SLU_D, Mtype_t::SLU_GE);
103  }
104  else {
105  using E = float;
106  sCreate_CompCol_Matrix(&A, in.n_rows, in.n_cols, in.n_elem, (E*)t_val, t_row, t_col, Stype_t::SLU_NC, Dtype_t::SLU_S, Mtype_t::SLU_GE);
107  }
108 
109  perm_r = (int*)superlu_malloc(sizeof(int) * (this->n_rows + 1));
110  perm_c = (int*)superlu_malloc(sizeof(int) * (this->n_cols + 1));
111 
112  allocated = true;
113 }
114 
115 template<sp_d T> void SparseMatSuperLU<T>::dealloc_supermatrix() {
116  if(!allocated) return;
117 
118  Destroy_SuperMatrix_Store(&A);
119 #ifdef SUANPAN_SUPERLUMT
120  Destroy_SuperNode_SCP(&L);
121  Destroy_CompCol_NCP(&U);
122 #else
123  Destroy_SuperNode_Matrix(&L);
124  Destroy_CompCol_Matrix(&U);
125 #endif
126 
127  if(t_val) superlu_free(t_val);
128  if(t_row) superlu_free(t_row);
129  if(t_col) superlu_free(t_col);
130  if(perm_r) superlu_free(perm_r);
131  if(perm_c) superlu_free(perm_c);
132 
133  allocated = false;
134 }
135 
136 template<sp_d T> template<sp_d ET> void SparseMatSuperLU<T>::wrap_b(const Mat<ET>& in_mat) {
137  if(std::is_same_v<ET, float>) {
138  using E = float;
139  sCreate_Dense_Matrix(&B, (int)in_mat.n_rows, (int)in_mat.n_cols, (E*)in_mat.memptr(), (int)in_mat.n_rows, Stype_t::SLU_DN, Dtype_t::SLU_S, Mtype_t::SLU_GE);
140  }
141  else {
142  using E = double;
143  dCreate_Dense_Matrix(&B, (int)in_mat.n_rows, (int)in_mat.n_cols, (E*)in_mat.memptr(), (int)in_mat.n_rows, Stype_t::SLU_DN, Dtype_t::SLU_D, Mtype_t::SLU_GE);
144  }
145 }
146 
147 template<sp_d T> template<sp_d ET> void SparseMatSuperLU<T>::tri_solve(int& flag) {
148 #ifdef SUANPAN_SUPERLUMT
149  if(std::is_same_v<ET, float>) sgstrs(NOTRANS, &L, &U, perm_c, perm_r, &B, &stat, &flag);
150  else dgstrs(NOTRANS, &L, &U, perm_c, perm_r, &B, &stat, &flag);
151 #else
152  superlu::gstrs<ET>(options.Trans, &L, &U, perm_c, perm_r, &B, &stat, &flag);
153 #endif
154 
155  Destroy_SuperMatrix_Store(&B);
156 }
157 
158 template<sp_d T> template<sp_d ET> void SparseMatSuperLU<T>::full_solve(int& flag) {
159 #ifdef SUANPAN_SUPERLUMT
160  get_perm_c(ordering_num, &A, perm_c);
161  if(std::is_same_v<ET, float>) psgssv(SUANPAN_NUM_THREADS, &A, perm_c, perm_r, &L, &U, &B, &flag);
162  else pdgssv(SUANPAN_NUM_THREADS, &A, perm_c, perm_r, &L, &U, &B, &flag);
163 #else
164  superlu::gssv<ET>(&options, &A, perm_c, perm_r, &L, &U, &B, &stat, &flag);
165 #endif
166 
167  Destroy_SuperMatrix_Store(&B);
168 }
169 
170 template<sp_d T> SparseMatSuperLU<T>::SparseMatSuperLU(const uword in_row, const uword in_col, const uword in_elem)
171  : SparseMat<T>(in_row, in_col, in_elem) {
172 #ifndef SUANPAN_SUPERLUMT
173  set_default_options(&options);
174  options.IterRefine = std::is_same_v<T, float> ? superlu::IterRefine_t::SLU_SINGLE : superlu::IterRefine_t::SLU_DOUBLE;
175  options.Equil = superlu::yes_no_t::NO;
176 
177  arrayops::fill_zeros(reinterpret_cast<char*>(&stat), sizeof(SuperLUStat_t));
178 
179  StatInit(&stat);
180 #else
181  StatAlloc(static_cast<int>(in_col), SUANPAN_NUM_THREADS, sp_ienv(1), sp_ienv(2), &stat);
182  StatInit(static_cast<int>(in_col), SUANPAN_NUM_THREADS, &stat);
183 #endif
184 }
185 
187  : SparseMat<T>(other) {
188 #ifndef SUANPAN_SUPERLUMT
189  set_default_options(&options);
190  options.IterRefine = std::is_same_v<T, float> ? superlu::IterRefine_t::SLU_SINGLE : superlu::IterRefine_t::SLU_DOUBLE;
191  options.Equil = superlu::yes_no_t::NO;
192 
193  arrayops::fill_zeros(reinterpret_cast<char*>(&stat), sizeof(SuperLUStat_t));
194 
195  StatInit(&stat);
196 #else
197  StatAlloc(static_cast<int>(other.n_cols), SUANPAN_NUM_THREADS, sp_ienv(1), sp_ienv(2), &stat);
198  StatInit(static_cast<int>(other.n_cols), SUANPAN_NUM_THREADS, &stat);
199 #endif
200 }
201 
203  dealloc_supermatrix();
204  StatFree(&stat);
205 }
206 
207 template<sp_d T> void SparseMatSuperLU<T>::zeros() {
209  dealloc_supermatrix();
210 }
211 
212 template<sp_d T> unique_ptr<MetaMat<T>> SparseMatSuperLU<T>::make_copy() { return std::make_unique<SparseMatSuperLU<T>>(*this); }
213 
214 template<sp_d T> int SparseMatSuperLU<T>::direct_solve(Mat<T>& out_mat, const Mat<T>& in_mat) {
215  if(this->factored) return solve_trs(out_mat, in_mat);
216 
217  this->factored = true;
218 
219  auto flag = 0;
220 
221  if(std::is_same_v<T, float> || Precision::FULL == this->setting.precision) {
222  alloc_supermatrix(csc_form<T, int>(this->triplet_mat));
223 
224  out_mat = in_mat;
225 
226  wrap_b(out_mat);
227 
228  full_solve<T>(flag);
229 
230  return flag;
231  }
232 
233  alloc_supermatrix(csc_form<float, int>(this->triplet_mat));
234 
235  const fmat f_mat(arma::size(in_mat), fill::none);
236 
237  wrap_b(f_mat);
238 
239  full_solve<float>(flag);
240 
241  return 0 == flag ? solve_trs(out_mat, in_mat) : flag;
242 }
243 
244 template<sp_d T> int SparseMatSuperLU<T>::solve_trs(Mat<T>& out_mat, const Mat<T>& in_mat) {
245  auto flag = 0;
246 
247  if(std::is_same_v<T, float> || Precision::FULL == this->setting.precision) {
248  out_mat = in_mat;
249 
250  wrap_b(out_mat);
251 
252  tri_solve<T>(flag);
253 
254  return flag;
255  }
256 
257  out_mat.zeros(arma::size(in_mat));
258 
259  mat full_residual = in_mat;
260 
261  auto multiplier = norm(full_residual);
262 
263  auto counter = 0u;
264  while(counter++ < this->setting.iterative_refinement) {
265  if(multiplier < this->setting.tolerance) break;
266 
267  auto residual = conv_to<fmat>::from(full_residual / multiplier);
268 
269  wrap_b(residual);
270 
271  tri_solve<float>(flag);
272 
273  if(0 != flag) break;
274 
275  const mat incre = multiplier * conv_to<mat>::from(residual);
276 
277  out_mat += incre;
278 
279  suanpan_debug("Mixed precision algorithm multiplier: {:.5E}.\n", multiplier = norm(full_residual -= this->operator*(incre)));
280  }
281 
282  return flag;
283 }
284 
285 template<sp_d T> int SparseMatSuperLU<T>::direct_solve(Mat<T>& out_mat, Mat<T>&& in_mat) {
286  if(this->factored) return solve_trs(out_mat, std::forward<Mat<T>>(in_mat));
287 
288  this->factored = true;
289 
290  auto flag = 0;
291 
292  if(std::is_same_v<T, float> || Precision::FULL == this->setting.precision) {
293  alloc_supermatrix(csc_form<T, int>(this->triplet_mat));
294 
295  wrap_b(in_mat);
296 
297  full_solve<T>(flag);
298 
299  out_mat = std::move(in_mat);
300 
301  return flag;
302  }
303 
304  alloc_supermatrix(csc_form<float, int>(this->triplet_mat));
305 
306  const fmat f_mat(arma::size(in_mat), fill::none);
307 
308  wrap_b(f_mat);
309 
310  full_solve<float>(flag);
311 
312  return 0 == flag ? solve_trs(out_mat, std::forward<Mat<T>>(in_mat)) : flag;
313 }
314 
315 template<sp_d T> int SparseMatSuperLU<T>::solve_trs(Mat<T>& out_mat, Mat<T>&& in_mat) {
316  auto flag = 0;
317 
318  if(std::is_same_v<T, float> || Precision::FULL == this->setting.precision) {
319  wrap_b(in_mat);
320 
321  tri_solve<T>(flag);
322 
323  out_mat = std::move(in_mat);
324 
325  return flag;
326  }
327 
328  out_mat.zeros(arma::size(in_mat));
329 
330  auto multiplier = arma::norm(in_mat);
331 
332  auto counter = 0u;
333  while(counter++ < this->setting.iterative_refinement) {
334  if(multiplier < this->setting.tolerance) break;
335 
336  auto residual = conv_to<fmat>::from(in_mat / multiplier);
337 
338  wrap_b(residual);
339 
340  tri_solve<float>(flag);
341 
342  if(0 != flag) break;
343 
344  const mat incre = multiplier * conv_to<mat>::from(residual);
345 
346  out_mat += incre;
347 
348  suanpan_debug("Mixed precision algorithm multiplier: {:.5E}.\n", multiplier = norm(in_mat -= this->operator*(incre)));
349  }
350 
351  return flag;
352 }
353 #endif
354 
A MetaMat class that holds matrices.
Definition: MetaMat.hpp:39
const uword n_cols
Definition: MetaMat.hpp:49
const uword n_rows
Definition: MetaMat.hpp:48
A SparseMat class that holds matrices.
Definition: SparseMat.hpp:34
A SparseMatSuperLU class that holds matrices.
Definition: SparseMatSuperLU.hpp:37
SparseMatSuperLU(SparseMatSuperLU &&) noexcept=delete
Definition: csc_form.hpp:25
int SUANPAN_NUM_THREADS
Definition: command.cpp:67
void zeros() override
Definition: SparseMat.hpp:79
~SparseMatSuperLU() override
Definition: SparseMatSuperLU.hpp:202
unique_ptr< MetaMat< T > > make_copy() override
Definition: SparseMatSuperLU.hpp:212
int direct_solve(Mat< T > &, Mat< T > &&) override
Definition: SparseMatSuperLU.hpp:285
void zeros() override
Definition: SparseMatSuperLU.hpp:207
SparseMatSuperLU(uword, uword, uword=0)
Definition: SparseMatSuperLU.hpp:170
double norm(const vec &)
Definition: tensor.cpp:302
concept sp_d
Definition: suanPan.h:318
#define suanpan_debug(...)
Definition: suanPan.h:295