ergo
template_lapack_stevr.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_STEVR_HEADER
36 #define TEMPLATE_LAPACK_STEVR_HEADER
37 
38 template<class Treal>
39 int template_lapack_stevr(const char *jobz, const char *range, const integer *n,
40  Treal * d__, Treal *e, const Treal *vl,
41  const Treal *vu, const integer *il,
42  const integer *iu, const Treal *abstol,
43  integer *m, Treal *w,
44  Treal *z__, const integer *ldz, integer *isuppz,
45  Treal *work,
46  integer *lwork, integer *iwork, integer *liwork,
47  integer *info)
48 {
49  /* System generated locals */
50  integer z_dim1, z_offset, i__1, i__2;
51  Treal d__1, d__2;
52 
53  /* Builtin functions */
54 
55  /* Local variables */
56  integer i__, j, jj;
57  Treal eps, vll, vuu, tmp1;
58  integer imax;
59  Treal rmin, rmax;
60  logical test;
61  Treal tnrm;
62  integer itmp1;
63  Treal sigma;
64  char order[1];
65  integer lwmin;
66  logical wantz;
67  logical alleig, indeig;
68  integer iscale, ieeeok, indibl, indifl;
69  logical valeig;
70  Treal safmin;
71  Treal bignum;
72  integer indisp;
73  integer indiwo;
74  integer liwmin;
75  logical tryrac;
76  integer nsplit;
77  Treal smlnum;
78  logical lquery;
79 
80 
81 /* -- LAPACK driver routine (version 3.2) -- */
82 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
83 /* November 2006 */
84 
85 /* .. Scalar Arguments .. */
86 /* .. */
87 /* .. Array Arguments .. */
88 /* .. */
89 
90 /* Purpose */
91 /* ======= */
92 
93 /* DSTEVR computes selected eigenvalues and, optionally, eigenvectors */
94 /* of a real symmetric tridiagonal matrix T. Eigenvalues and */
95 /* eigenvectors can be selected by specifying either a range of values */
96 /* or a range of indices for the desired eigenvalues. */
97 
98 /* Whenever possible, DSTEVR calls DSTEMR to compute the */
99 /* eigenspectrum using Relatively Robust Representations. DSTEMR */
100 /* computes eigenvalues by the dqds algorithm, while orthogonal */
101 /* eigenvectors are computed from various "good" L D L^T representations */
102 /* (also known as Relatively Robust Representations). Gram-Schmidt */
103 /* orthogonalization is avoided as far as possible. More specifically, */
104 /* the various steps of the algorithm are as follows. For the i-th */
105 /* unreduced block of T, */
106 /* (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T */
107 /* is a relatively robust representation, */
108 /* (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high */
109 /* relative accuracy by the dqds algorithm, */
110 /* (c) If there is a cluster of close eigenvalues, "choose" sigma_i */
111 /* close to the cluster, and go to step (a), */
112 /* (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, */
113 /* compute the corresponding eigenvector by forming a */
114 /* rank-revealing twisted factorization. */
115 /* The desired accuracy of the output can be specified by the input */
116 /* parameter ABSTOL. */
117 
118 /* For more details, see "A new O(n^2) algorithm for the symmetric */
119 /* tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, */
120 /* Computer Science Division Technical Report No. UCB//CSD-97-971, */
121 /* UC Berkeley, May 1997. */
122 
123 
124 /* Note 1 : DSTEVR calls DSTEMR when the full spectrum is requested */
125 /* on machines which conform to the ieee-754 floating point standard. */
126 /* DSTEVR calls DSTEBZ and DSTEIN on non-ieee machines and */
127 /* when partial spectrum requests are made. */
128 
129 /* Normal execution of DSTEMR may create NaNs and infinities and */
130 /* hence may abort due to a floating point exception in environments */
131 /* which do not handle NaNs and infinities in the ieee standard default */
132 /* manner. */
133 
134 /* Arguments */
135 /* ========= */
136 
137 /* JOBZ (input) CHARACTER*1 */
138 /* = 'N': Compute eigenvalues only; */
139 /* = 'V': Compute eigenvalues and eigenvectors. */
140 
141 /* RANGE (input) CHARACTER*1 */
142 /* = 'A': all eigenvalues will be found. */
143 /* = 'V': all eigenvalues in the half-open interval (VL,VU] */
144 /* will be found. */
145 /* = 'I': the IL-th through IU-th eigenvalues will be found. */
146 /* ********* For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and */
147 /* ********* DSTEIN are called */
148 
149 /* N (input) INTEGER */
150 /* The order of the matrix. N >= 0. */
151 
152 /* D (input/output) DOUBLE PRECISION array, dimension (N) */
153 /* On entry, the n diagonal elements of the tridiagonal matrix */
154 /* A. */
155 /* On exit, D may be multiplied by a constant factor chosen */
156 /* to avoid over/underflow in computing the eigenvalues. */
157 
158 /* E (input/output) DOUBLE PRECISION array, dimension (max(1,N-1)) */
159 /* On entry, the (n-1) subdiagonal elements of the tridiagonal */
160 /* matrix A in elements 1 to N-1 of E. */
161 /* On exit, E may be multiplied by a constant factor chosen */
162 /* to avoid over/underflow in computing the eigenvalues. */
163 
164 /* VL (input) DOUBLE PRECISION */
165 /* VU (input) DOUBLE PRECISION */
166 /* If RANGE='V', the lower and upper bounds of the interval to */
167 /* be searched for eigenvalues. VL < VU. */
168 /* Not referenced if RANGE = 'A' or 'I'. */
169 
170 /* IL (input) INTEGER */
171 /* IU (input) INTEGER */
172 /* If RANGE='I', the indices (in ascending order) of the */
173 /* smallest and largest eigenvalues to be returned. */
174 /* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */
175 /* Not referenced if RANGE = 'A' or 'V'. */
176 
177 /* ABSTOL (input) DOUBLE PRECISION */
178 /* The absolute error tolerance for the eigenvalues. */
179 /* An approximate eigenvalue is accepted as converged */
180 /* when it is determined to lie in an interval [a,b] */
181 /* of width less than or equal to */
182 
183 /* ABSTOL + EPS * max( |a|,|b| ) , */
184 
185 /* where EPS is the machine precision. If ABSTOL is less than */
186 /* or equal to zero, then EPS*|T| will be used in its place, */
187 /* where |T| is the 1-norm of the tridiagonal matrix obtained */
188 /* by reducing A to tridiagonal form. */
189 
190 /* See "Computing Small Singular Values of Bidiagonal Matrices */
191 /* with Guaranteed High Relative Accuracy," by Demmel and */
192 /* Kahan, LAPACK Working Note #3. */
193 
194 /* If high relative accuracy is important, set ABSTOL to */
195 /* DLAMCH( 'Safe minimum' ). Doing so will guarantee that */
196 /* eigenvalues are computed to high relative accuracy when */
197 /* possible in future releases. The current code does not */
198 /* make any guarantees about high relative accuracy, but */
199 /* future releases will. See J. Barlow and J. Demmel, */
200 /* "Computing Accurate Eigensystems of Scaled Diagonally */
201 /* Dominant Matrices", LAPACK Working Note #7, for a discussion */
202 /* of which matrices define their eigenvalues to high relative */
203 /* accuracy. */
204 
205 /* M (output) INTEGER */
206 /* The total number of eigenvalues found. 0 <= M <= N. */
207 /* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */
208 
209 /* W (output) DOUBLE PRECISION array, dimension (N) */
210 /* The first M elements contain the selected eigenvalues in */
211 /* ascending order. */
212 
213 /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */
214 /* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */
215 /* contain the orthonormal eigenvectors of the matrix A */
216 /* corresponding to the selected eigenvalues, with the i-th */
217 /* column of Z holding the eigenvector associated with W(i). */
218 /* Note: the user must ensure that at least max(1,M) columns are */
219 /* supplied in the array Z; if RANGE = 'V', the exact value of M */
220 /* is not known in advance and an upper bound must be used. */
221 
222 /* LDZ (input) INTEGER */
223 /* The leading dimension of the array Z. LDZ >= 1, and if */
224 /* JOBZ = 'V', LDZ >= max(1,N). */
225 
226 /* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) */
227 /* The support of the eigenvectors in Z, i.e., the indices */
228 /* indicating the nonzero elements in Z. The i-th eigenvector */
229 /* is nonzero only in elements ISUPPZ( 2*i-1 ) through */
230 /* ISUPPZ( 2*i ). */
231 /* ********* Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 */
232 
233 /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
234 /* On exit, if INFO = 0, WORK(1) returns the optimal (and */
235 /* minimal) LWORK. */
236 
237 /* LWORK (input) INTEGER */
238 /* The dimension of the array WORK. LWORK >= max(1,20*N). */
239 
240 /* If LWORK = -1, then a workspace query is assumed; the routine */
241 /* only calculates the optimal sizes of the WORK and IWORK */
242 /* arrays, returns these values as the first entries of the WORK */
243 /* and IWORK arrays, and no error message related to LWORK or */
244 /* LIWORK is issued by XERBLA. */
245 
246 /* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */
247 /* On exit, if INFO = 0, IWORK(1) returns the optimal (and */
248 /* minimal) LIWORK. */
249 
250 /* LIWORK (input) INTEGER */
251 /* The dimension of the array IWORK. LIWORK >= max(1,10*N). */
252 
253 /* If LIWORK = -1, then a workspace query is assumed; the */
254 /* routine only calculates the optimal sizes of the WORK and */
255 /* IWORK arrays, returns these values as the first entries of */
256 /* the WORK and IWORK arrays, and no error message related to */
257 /* LWORK or LIWORK is issued by XERBLA. */
258 
259 /* INFO (output) INTEGER */
260 /* = 0: successful exit */
261 /* < 0: if INFO = -i, the i-th argument had an illegal value */
262 /* > 0: Internal error */
263 
264 /* Further Details */
265 /* =============== */
266 
267 /* Based on contributions by */
268 /* Inderjit Dhillon, IBM Almaden, USA */
269 /* Osni Marques, LBNL/NERSC, USA */
270 /* Ken Stanley, Computer Science Division, University of */
271 /* California at Berkeley, USA */
272 
273 /* ===================================================================== */
274 
275 /* .. Parameters .. */
276 /* .. */
277 /* .. Local Scalars .. */
278 /* .. */
279 /* .. External Functions .. */
280 /* .. */
281 /* .. External Subroutines .. */
282 /* .. */
283 /* .. Intrinsic Functions .. */
284 /* .. */
285 /* .. Executable Statements .. */
286 
287 
288 /* Test the input parameters. */
289 
290  /* Parameter adjustments */
291  /* Table of constant values */
292 
293  integer c__10 = 10;
294  integer c__1 = 1;
295  integer c__2 = 2;
296  integer c__3 = 3;
297  integer c__4 = 4;
298 
299  --d__;
300  --e;
301  --w;
302  z_dim1 = *ldz;
303  z_offset = 1 + z_dim1;
304  z__ -= z_offset;
305  --isuppz;
306  --work;
307  --iwork;
308 
309  /* Function Body */
310  ieeeok = template_lapack_ilaenv(&c__10, "DSTEVR", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen)6, (ftnlen)1);
311 
312  wantz = template_blas_lsame(jobz, "V");
313  alleig = template_blas_lsame(range, "A");
314  valeig = template_blas_lsame(range, "V");
315  indeig = template_blas_lsame(range, "I");
316 
317  lquery = *lwork == -1 || *liwork == -1;
318 /* Computing MAX */
319  i__1 = 1, i__2 = *n * 20;
320  lwmin = maxMACRO(i__1,i__2);
321 /* Computing MAX */
322  i__1 = 1, i__2 = *n * 10;
323  liwmin = maxMACRO(i__1,i__2);
324 
325 
326  *info = 0;
327  if (! (wantz || template_blas_lsame(jobz, "N"))) {
328  *info = -1;
329  } else if (! (alleig || valeig || indeig)) {
330  *info = -2;
331  } else if (*n < 0) {
332  *info = -3;
333  } else {
334  if (valeig) {
335  if (*n > 0 && *vu <= *vl) {
336  *info = -7;
337  }
338  } else if (indeig) {
339  if (*il < 1 || *il > maxMACRO(1,*n)) {
340  *info = -8;
341  } else if (*iu < minMACRO(*n,*il) || *iu > *n) {
342  *info = -9;
343  }
344  }
345  }
346  if (*info == 0) {
347  if (*ldz < 1 || ( wantz && *ldz < *n ) ) {
348  *info = -14;
349  }
350  }
351 
352  if (*info == 0) {
353  work[1] = (Treal) lwmin;
354  iwork[1] = liwmin;
355 
356  if (*lwork < lwmin && ! lquery) {
357  *info = -17;
358  } else if (*liwork < liwmin && ! lquery) {
359  *info = -19;
360  }
361  }
362 
363  if (*info != 0) {
364  i__1 = -(*info);
365  template_blas_erbla("STEVR", &i__1);
366  return 0;
367  } else if (lquery) {
368  return 0;
369  }
370 
371 /* Quick return if possible */
372 
373  *m = 0;
374  if (*n == 0) {
375  return 0;
376  }
377 
378  if (*n == 1) {
379  if (alleig || indeig) {
380  *m = 1;
381  w[1] = d__[1];
382  } else {
383  if (*vl < d__[1] && *vu >= d__[1]) {
384  *m = 1;
385  w[1] = d__[1];
386  }
387  }
388  if (wantz) {
389  z__[z_dim1 + 1] = 1.;
390  }
391  return 0;
392  }
393 
394 /* Get machine constants. */
395 
396  safmin = template_lapack_lamch("Safe minimum", (Treal)0);
397  eps = template_lapack_lamch("Precision", (Treal)0);
398  smlnum = safmin / eps;
399  bignum = 1. / smlnum;
400  rmin = template_blas_sqrt(smlnum);
401 /* Computing MIN */
402  d__1 = template_blas_sqrt(bignum), d__2 = 1. / template_blas_sqrt(template_blas_sqrt(safmin));
403  rmax = minMACRO(d__1,d__2);
404 
405 
406 /* Scale matrix to allowable range, if necessary. */
407 
408  iscale = 0;
409  vll = *vl;
410  vuu = *vu;
411 
412  tnrm = template_lapack_lanst("M", n, &d__[1], &e[1]);
413  if (tnrm > 0. && tnrm < rmin) {
414  iscale = 1;
415  sigma = rmin / tnrm;
416  } else if (tnrm > rmax) {
417  iscale = 1;
418  sigma = rmax / tnrm;
419  }
420  if (iscale == 1) {
421  template_blas_scal(n, &sigma, &d__[1], &c__1);
422  i__1 = *n - 1;
423  template_blas_scal(&i__1, &sigma, &e[1], &c__1);
424  if (valeig) {
425  vll = *vl * sigma;
426  vuu = *vu * sigma;
427  }
428  }
429 /* Initialize indices into workspaces. Note: These indices are used only */
430 /* if DSTERF or DSTEMR fail. */
431 /* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in DSTEBZ and */
432 /* stores the block indices of each of the M<=N eigenvalues. */
433  indibl = 1;
434 /* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in DSTEBZ and */
435 /* stores the starting and finishing indices of each block. */
436  indisp = indibl + *n;
437 /* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors */
438 /* that corresponding to eigenvectors that fail to converge in */
439 /* DSTEIN. This information is discarded; if any fail, the driver */
440 /* returns INFO > 0. */
441  indifl = indisp + *n;
442 /* INDIWO is the offset of the remaining integer workspace. */
443  indiwo = indisp + *n;
444 
445 /* If all eigenvalues are desired, then */
446 /* call DSTERF or DSTEMR. If this fails for some eigenvalue, then */
447 /* try DSTEBZ. */
448 
449 
450  test = FALSE_;
451  if (indeig) {
452  if (*il == 1 && *iu == *n) {
453  test = TRUE_;
454  }
455  }
456  if ((alleig || test) && ieeeok == 1) {
457  i__1 = *n - 1;
458  template_blas_copy(&i__1, &e[1], &c__1, &work[1], &c__1);
459  if (! wantz) {
460  template_blas_copy(n, &d__[1], &c__1, &w[1], &c__1);
461  template_lapack_sterf(n, &w[1], &work[1], info);
462  } else {
463  template_blas_copy(n, &d__[1], &c__1, &work[*n + 1], &c__1);
464  if (*abstol <= *n * 2. * eps) {
465  tryrac = TRUE_;
466  } else {
467  tryrac = FALSE_;
468  }
469  i__1 = *lwork - (*n << 1);
470  template_lapack_stemr(jobz, "A", n, &work[*n + 1], &work[1], vl, vu, il, iu, m,
471  &w[1], &z__[z_offset], ldz, n, &isuppz[1], &tryrac, &work[
472  (*n << 1) + 1], &i__1, &iwork[1], liwork, info);
473 
474  }
475  if (*info == 0) {
476  *m = *n;
477  goto L10;
478  }
479  *info = 0;
480  }
481 
482 /* Otherwise, call DSTEBZ and, if eigenvectors are desired, DSTEIN. */
483 
484  if (wantz) {
485  *(unsigned char *)order = 'B';
486  } else {
487  *(unsigned char *)order = 'E';
488  }
489  template_lapack_stebz(range, order, n, &vll, &vuu, il, iu, abstol, &d__[1], &e[1], m, &
490  nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[1], &iwork[
491  indiwo], info);
492 
493  if (wantz) {
494  template_lapack_stein(n, &d__[1], &e[1], m, &w[1], &iwork[indibl], &iwork[indisp], &
495  z__[z_offset], ldz, &work[1], &iwork[indiwo], &iwork[indifl],
496  info);
497  }
498 
499 /* If matrix was scaled, then rescale eigenvalues appropriately. */
500 
501 L10:
502  if (iscale == 1) {
503  if (*info == 0) {
504  imax = *m;
505  } else {
506  imax = *info - 1;
507  }
508  d__1 = 1. / sigma;
509  template_blas_scal(&imax, &d__1, &w[1], &c__1);
510  }
511 
512 /* If eigenvalues are not in order, then sort them, along with */
513 /* eigenvectors. */
514 
515  if (wantz) {
516  i__1 = *m - 1;
517  for (j = 1; j <= i__1; ++j) {
518  i__ = 0;
519  tmp1 = w[j];
520  i__2 = *m;
521  for (jj = j + 1; jj <= i__2; ++jj) {
522  if (w[jj] < tmp1) {
523  i__ = jj;
524  tmp1 = w[jj];
525  }
526 /* L20: */
527  }
528 
529  if (i__ != 0) {
530  itmp1 = iwork[i__];
531  w[i__] = w[j];
532  iwork[i__] = iwork[j];
533  w[j] = tmp1;
534  iwork[j] = itmp1;
535  template_blas_swap(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1],
536  &c__1);
537  }
538 /* L30: */
539  }
540  }
541 
542 /* Causes problems with tests 19 & 20: */
543 /* IF (wantz .and. INDEIG ) Z( 1,1) = Z(1,1) / 1.002 + .002 */
544 
545 
546  work[1] = (Treal) lwmin;
547  iwork[1] = liwmin;
548  return 0;
549 
550 /* End of DSTEVR */
551 
552 } /* dstevr_ */
553 
554 #endif
555