ergo
template_blas_trmm.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_BLAS_TRMM_HEADER
36 #define TEMPLATE_BLAS_TRMM_HEADER
37 
38 #include "template_blas_common.h"
39 
40 template<class Treal>
41 int template_blas_trmm(const char *side, const char *uplo, const char *transa, const char *diag,
42  const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *
43  lda, Treal *b, const integer *ldb)
44 {
45  /* System generated locals */
46  integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
47  /* Local variables */
48  integer info;
49  Treal temp;
50  integer i__, j, k;
51  logical lside;
52  integer nrowa;
53  logical upper;
54  logical nounit;
55 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
56 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
57 /* Purpose
58  =======
59  DTRMM performs one of the matrix-matrix operations
60  B := alpha*op( A )*B, or B := alpha*B*op( A ),
61  where alpha is a scalar, B is an m by n matrix, A is a unit, or
62  non-unit, upper or lower triangular matrix and op( A ) is one of
63  op( A ) = A or op( A ) = A'.
64  Parameters
65  ==========
66  SIDE - CHARACTER*1.
67  On entry, SIDE specifies whether op( A ) multiplies B from
68  the left or right as follows:
69  SIDE = 'L' or 'l' B := alpha*op( A )*B.
70  SIDE = 'R' or 'r' B := alpha*B*op( A ).
71  Unchanged on exit.
72  UPLO - CHARACTER*1.
73  On entry, UPLO specifies whether the matrix A is an upper or
74  lower triangular matrix as follows:
75  UPLO = 'U' or 'u' A is an upper triangular matrix.
76  UPLO = 'L' or 'l' A is a lower triangular matrix.
77  Unchanged on exit.
78  TRANSA - CHARACTER*1.
79  On entry, TRANSA specifies the form of op( A ) to be used in
80  the matrix multiplication as follows:
81  TRANSA = 'N' or 'n' op( A ) = A.
82  TRANSA = 'T' or 't' op( A ) = A'.
83  TRANSA = 'C' or 'c' op( A ) = A'.
84  Unchanged on exit.
85  DIAG - CHARACTER*1.
86  On entry, DIAG specifies whether or not A is unit triangular
87  as follows:
88  DIAG = 'U' or 'u' A is assumed to be unit triangular.
89  DIAG = 'N' or 'n' A is not assumed to be unit
90  triangular.
91  Unchanged on exit.
92  M - INTEGER.
93  On entry, M specifies the number of rows of B. M must be at
94  least zero.
95  Unchanged on exit.
96  N - INTEGER.
97  On entry, N specifies the number of columns of B. N must be
98  at least zero.
99  Unchanged on exit.
100  ALPHA - DOUBLE PRECISION.
101  On entry, ALPHA specifies the scalar alpha. When alpha is
102  zero then A is not referenced and B need not be set before
103  entry.
104  Unchanged on exit.
105  A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
106  when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'.
107  Before entry with UPLO = 'U' or 'u', the leading k by k
108  upper triangular part of the array A must contain the upper
109  triangular matrix and the strictly lower triangular part of
110  A is not referenced.
111  Before entry with UPLO = 'L' or 'l', the leading k by k
112  lower triangular part of the array A must contain the lower
113  triangular matrix and the strictly upper triangular part of
114  A is not referenced.
115  Note that when DIAG = 'U' or 'u', the diagonal elements of
116  A are not referenced either, but are assumed to be unity.
117  Unchanged on exit.
118  LDA - INTEGER.
119  On entry, LDA specifies the first dimension of A as declared
120  in the calling (sub) program. When SIDE = 'L' or 'l' then
121  LDA must be at least max( 1, m ), when SIDE = 'R' or 'r'
122  then LDA must be at least max( 1, n ).
123  Unchanged on exit.
124  B - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
125  Before entry, the leading m by n part of the array B must
126  contain the matrix B, and on exit is overwritten by the
127  transformed matrix.
128  LDB - INTEGER.
129  On entry, LDB specifies the first dimension of B as declared
130  in the calling (sub) program. LDB must be at least
131  max( 1, m ).
132  Unchanged on exit.
133  Level 3 Blas routine.
134  -- Written on 8-February-1989.
135  Jack Dongarra, Argonne National Laboratory.
136  Iain Duff, AERE Harwell.
137  Jeremy Du Croz, Numerical Algorithms Group Ltd.
138  Sven Hammarling, Numerical Algorithms Group Ltd.
139  Test the input parameters.
140  Parameter adjustments */
141  a_dim1 = *lda;
142  a_offset = 1 + a_dim1 * 1;
143  a -= a_offset;
144  b_dim1 = *ldb;
145  b_offset = 1 + b_dim1 * 1;
146  b -= b_offset;
147  /* Function Body */
148  lside = template_blas_lsame(side, "L");
149  if (lside) {
150  nrowa = *m;
151  } else {
152  nrowa = *n;
153  }
154  nounit = template_blas_lsame(diag, "N");
155  upper = template_blas_lsame(uplo, "U");
156  info = 0;
157  if (! lside && ! template_blas_lsame(side, "R")) {
158  info = 1;
159  } else if (! upper && ! template_blas_lsame(uplo, "L")) {
160  info = 2;
161  } else if (! template_blas_lsame(transa, "N") && ! template_blas_lsame(transa,
162  "T") && ! template_blas_lsame(transa, "C")) {
163  info = 3;
164  } else if (! template_blas_lsame(diag, "U") && ! template_blas_lsame(diag,
165  "N")) {
166  info = 4;
167  } else if (*m < 0) {
168  info = 5;
169  } else if (*n < 0) {
170  info = 6;
171  } else if (*lda < maxMACRO(1,nrowa)) {
172  info = 9;
173  } else if (*ldb < maxMACRO(1,*m)) {
174  info = 11;
175  }
176  if (info != 0) {
177  template_blas_erbla("TRMM ", &info);
178  return 0;
179  }
180 /* Quick return if possible. */
181  if (*n == 0) {
182  return 0;
183  }
184 /* And when alpha.eq.zero. */
185  if (*alpha == 0.) {
186  i__1 = *n;
187  for (j = 1; j <= i__1; ++j) {
188  i__2 = *m;
189  for (i__ = 1; i__ <= i__2; ++i__) {
190  b_ref(i__, j) = 0.;
191 /* L10: */
192  }
193 /* L20: */
194  }
195  return 0;
196  }
197 /* Start the operations. */
198  if (lside) {
199  if (template_blas_lsame(transa, "N")) {
200 /* Form B := alpha*A*B. */
201  if (upper) {
202  i__1 = *n;
203  for (j = 1; j <= i__1; ++j) {
204  i__2 = *m;
205  for (k = 1; k <= i__2; ++k) {
206  if (b_ref(k, j) != 0.) {
207  temp = *alpha * b_ref(k, j);
208  i__3 = k - 1;
209  for (i__ = 1; i__ <= i__3; ++i__) {
210  b_ref(i__, j) = b_ref(i__, j) + temp * a_ref(
211  i__, k);
212 /* L30: */
213  }
214  if (nounit) {
215  temp *= a_ref(k, k);
216  }
217  b_ref(k, j) = temp;
218  }
219 /* L40: */
220  }
221 /* L50: */
222  }
223  } else {
224  i__1 = *n;
225  for (j = 1; j <= i__1; ++j) {
226  for (k = *m; k >= 1; --k) {
227  if (b_ref(k, j) != 0.) {
228  temp = *alpha * b_ref(k, j);
229  b_ref(k, j) = temp;
230  if (nounit) {
231  b_ref(k, j) = b_ref(k, j) * a_ref(k, k);
232  }
233  i__2 = *m;
234  for (i__ = k + 1; i__ <= i__2; ++i__) {
235  b_ref(i__, j) = b_ref(i__, j) + temp * a_ref(
236  i__, k);
237 /* L60: */
238  }
239  }
240 /* L70: */
241  }
242 /* L80: */
243  }
244  }
245  } else {
246 /* Form B := alpha*A'*B. */
247  if (upper) {
248  i__1 = *n;
249  for (j = 1; j <= i__1; ++j) {
250  for (i__ = *m; i__ >= 1; --i__) {
251  temp = b_ref(i__, j);
252  if (nounit) {
253  temp *= a_ref(i__, i__);
254  }
255  i__2 = i__ - 1;
256  for (k = 1; k <= i__2; ++k) {
257  temp += a_ref(k, i__) * b_ref(k, j);
258 /* L90: */
259  }
260  b_ref(i__, j) = *alpha * temp;
261 /* L100: */
262  }
263 /* L110: */
264  }
265  } else {
266  i__1 = *n;
267  for (j = 1; j <= i__1; ++j) {
268  i__2 = *m;
269  for (i__ = 1; i__ <= i__2; ++i__) {
270  temp = b_ref(i__, j);
271  if (nounit) {
272  temp *= a_ref(i__, i__);
273  }
274  i__3 = *m;
275  for (k = i__ + 1; k <= i__3; ++k) {
276  temp += a_ref(k, i__) * b_ref(k, j);
277 /* L120: */
278  }
279  b_ref(i__, j) = *alpha * temp;
280 /* L130: */
281  }
282 /* L140: */
283  }
284  }
285  }
286  } else {
287  if (template_blas_lsame(transa, "N")) {
288 /* Form B := alpha*B*A. */
289  if (upper) {
290  for (j = *n; j >= 1; --j) {
291  temp = *alpha;
292  if (nounit) {
293  temp *= a_ref(j, j);
294  }
295  i__1 = *m;
296  for (i__ = 1; i__ <= i__1; ++i__) {
297  b_ref(i__, j) = temp * b_ref(i__, j);
298 /* L150: */
299  }
300  i__1 = j - 1;
301  for (k = 1; k <= i__1; ++k) {
302  if (a_ref(k, j) != 0.) {
303  temp = *alpha * a_ref(k, j);
304  i__2 = *m;
305  for (i__ = 1; i__ <= i__2; ++i__) {
306  b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
307  i__, k);
308 /* L160: */
309  }
310  }
311 /* L170: */
312  }
313 /* L180: */
314  }
315  } else {
316  i__1 = *n;
317  for (j = 1; j <= i__1; ++j) {
318  temp = *alpha;
319  if (nounit) {
320  temp *= a_ref(j, j);
321  }
322  i__2 = *m;
323  for (i__ = 1; i__ <= i__2; ++i__) {
324  b_ref(i__, j) = temp * b_ref(i__, j);
325 /* L190: */
326  }
327  i__2 = *n;
328  for (k = j + 1; k <= i__2; ++k) {
329  if (a_ref(k, j) != 0.) {
330  temp = *alpha * a_ref(k, j);
331  i__3 = *m;
332  for (i__ = 1; i__ <= i__3; ++i__) {
333  b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
334  i__, k);
335 /* L200: */
336  }
337  }
338 /* L210: */
339  }
340 /* L220: */
341  }
342  }
343  } else {
344 /* Form B := alpha*B*A'. */
345  if (upper) {
346  i__1 = *n;
347  for (k = 1; k <= i__1; ++k) {
348  i__2 = k - 1;
349  for (j = 1; j <= i__2; ++j) {
350  if (a_ref(j, k) != 0.) {
351  temp = *alpha * a_ref(j, k);
352  i__3 = *m;
353  for (i__ = 1; i__ <= i__3; ++i__) {
354  b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
355  i__, k);
356 /* L230: */
357  }
358  }
359 /* L240: */
360  }
361  temp = *alpha;
362  if (nounit) {
363  temp *= a_ref(k, k);
364  }
365  if (temp != 1.) {
366  i__2 = *m;
367  for (i__ = 1; i__ <= i__2; ++i__) {
368  b_ref(i__, k) = temp * b_ref(i__, k);
369 /* L250: */
370  }
371  }
372 /* L260: */
373  }
374  } else {
375  for (k = *n; k >= 1; --k) {
376  i__1 = *n;
377  for (j = k + 1; j <= i__1; ++j) {
378  if (a_ref(j, k) != 0.) {
379  temp = *alpha * a_ref(j, k);
380  i__2 = *m;
381  for (i__ = 1; i__ <= i__2; ++i__) {
382  b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
383  i__, k);
384 /* L270: */
385  }
386  }
387 /* L280: */
388  }
389  temp = *alpha;
390  if (nounit) {
391  temp *= a_ref(k, k);
392  }
393  if (temp != 1.) {
394  i__1 = *m;
395  for (i__ = 1; i__ <= i__1; ++i__) {
396  b_ref(i__, k) = temp * b_ref(i__, k);
397 /* L290: */
398  }
399  }
400 /* L300: */
401  }
402  }
403  }
404  }
405  return 0;
406 /* End of DTRMM . */
407 } /* dtrmm_ */
408 #undef b_ref
409 #undef a_ref
410 
411 #endif