ergo
template_lapack_lasq2.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_LASQ2_HEADER
36 #define TEMPLATE_LAPACK_LASQ2_HEADER
37 
38 template<class Treal>
39 int template_lapack_lasq2(integer *n, Treal *z__, integer *info)
40 {
41  /* System generated locals */
42  integer i__1, i__2, i__3;
43  Treal d__1, d__2;
44 
45 
46  /* Local variables */
47  Treal d__, e, g;
48  integer k;
49  Treal s, t;
50  integer i0, i4, n0;
51  Treal dn;
52  integer pp;
53  Treal dn1, dn2, dee, eps, tau, tol;
54  integer ipn4;
55  Treal tol2;
56  logical ieee;
57  integer nbig;
58  Treal dmin__, emin, emax;
59  integer kmin, ndiv, iter;
60  Treal qmin, temp, qmax, zmax;
61  integer splt;
62  Treal dmin1, dmin2;
63  integer nfail;
64  Treal desig, trace, sigma;
65  integer iinfo, ttype;
66  Treal deemin;
67  integer iwhila, iwhilb;
68  Treal oldemn, safmin;
69 
70 
71 /* -- LAPACK routine (version 3.2) -- */
72 
73 /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */
74 /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */
75 /* -- Berkeley -- */
76 /* -- November 2008 -- */
77 
78 /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
79 /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
80 
81 /* .. Scalar Arguments .. */
82 /* .. */
83 /* .. Array Arguments .. */
84 /* .. */
85 
86 /* Purpose */
87 /* ======= */
88 
89 /* DLASQ2 computes all the eigenvalues of the symmetric positive */
90 /* definite tridiagonal matrix associated with the qd array Z to high */
91 /* relative accuracy are computed to high relative accuracy, in the */
92 /* absence of denormalization, underflow and overflow. */
93 
94 /* To see the relation of Z to the tridiagonal matrix, let L be a */
95 /* unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and */
96 /* let U be an upper bidiagonal matrix with 1's above and diagonal */
97 /* Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the */
98 /* symmetric tridiagonal to which it is similar. */
99 
100 /* Note : DLASQ2 defines a logical variable, IEEE, which is true */
101 /* on machines which follow ieee-754 floating-point standard in their */
102 /* handling of infinities and NaNs, and false otherwise. This variable */
103 /* is passed to DLASQ3. */
104 
105 /* Arguments */
106 /* ========= */
107 
108 /* N (input) INTEGER */
109 /* The number of rows and columns in the matrix. N >= 0. */
110 
111 /* Z (input/output) DOUBLE PRECISION array, dimension ( 4*N ) */
112 /* On entry Z holds the qd array. On exit, entries 1 to N hold */
113 /* the eigenvalues in decreasing order, Z( 2*N+1 ) holds the */
114 /* trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If */
115 /* N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) */
116 /* holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of */
117 /* shifts that failed. */
118 
119 /* INFO (output) INTEGER */
120 /* = 0: successful exit */
121 /* < 0: if the i-th argument is a scalar and had an illegal */
122 /* value, then INFO = -i, if the i-th argument is an */
123 /* array and the j-entry had an illegal value, then */
124 /* INFO = -(i*100+j) */
125 /* > 0: the algorithm failed */
126 /* = 1, a split was marked by a positive value in E */
127 /* = 2, current block of Z not diagonalized after 30*N */
128 /* iterations (in inner while loop) */
129 /* = 3, termination criterion of outer while loop not met */
130 /* (program created more than N unreduced blocks) */
131 
132 /* Further Details */
133 /* =============== */
134 /* Local Variables: I0:N0 defines a current unreduced segment of Z. */
135 /* The shifts are accumulated in SIGMA. Iteration count is in ITER. */
136 /* Ping-pong is controlled by PP (alternates between 0 and 1). */
137 
138 /* ===================================================================== */
139 
140 /* .. Parameters .. */
141 /* .. */
142 /* .. Local Scalars .. */
143 /* .. */
144 /* .. External Subroutines .. */
145 /* .. */
146 /* .. External Functions .. */
147 /* .. */
148 /* .. Intrinsic Functions .. */
149 /* .. */
150 /* .. Executable Statements .. */
151 
152 /* Test the input arguments. */
153 /* (in case DLASQ2 is not called by DLASQ1) */
154 
155 /* Table of constant values */
156 
157  integer c__1 = 1;
158  integer c__2 = 2;
159  integer c__10 = 10;
160  integer c__3 = 3;
161  integer c__4 = 4;
162  integer c__11 = 11;
163 
164  /* Parameter adjustments */
165  --z__;
166 
167  /* Function Body */
168  *info = 0;
169  eps = template_lapack_lamch("Precision", (Treal)0);
170  safmin = template_lapack_lamch("Safe minimum", (Treal)0);
171  tol = eps * 100.;
172 /* Computing 2nd power */
173  d__1 = tol;
174  tol2 = d__1 * d__1;
175 
176  if (*n < 0) {
177  *info = -1;
178  template_blas_erbla("DLASQ2", &c__1);
179  return 0;
180  } else if (*n == 0) {
181  return 0;
182  } else if (*n == 1) {
183 
184 /* 1-by-1 case. */
185 
186  if (z__[1] < 0.) {
187  *info = -201;
188  template_blas_erbla("DLASQ2", &c__2);
189  }
190  return 0;
191  } else if (*n == 2) {
192 
193 /* 2-by-2 case. */
194 
195  if (z__[2] < 0. || z__[3] < 0.) {
196  *info = -2;
197  template_blas_erbla("DLASQ2", &c__2);
198  return 0;
199  } else if (z__[3] > z__[1]) {
200  d__ = z__[3];
201  z__[3] = z__[1];
202  z__[1] = d__;
203  }
204  z__[5] = z__[1] + z__[2] + z__[3];
205  if (z__[2] > z__[3] * tol2) {
206  t = (z__[1] - z__[3] + z__[2]) * .5;
207  s = z__[3] * (z__[2] / t);
208  if (s <= t) {
209  s = z__[3] * (z__[2] / (t * (template_blas_sqrt(s / t + 1.) + 1.)));
210  } else {
211  s = z__[3] * (z__[2] / (t + template_blas_sqrt(t) * template_blas_sqrt(t + s)));
212  }
213  t = z__[1] + (s + z__[2]);
214  z__[3] *= z__[1] / t;
215  z__[1] = t;
216  }
217  z__[2] = z__[3];
218  z__[6] = z__[2] + z__[1];
219  return 0;
220  }
221 
222 /* Check for negative data and compute sums of q's and e's. */
223 
224  z__[*n * 2] = 0.;
225  emin = z__[2];
226  qmax = 0.;
227  zmax = 0.;
228  d__ = 0.;
229  e = 0.;
230 
231  i__1 = ( *n - 1 ) << 1;
232  for (k = 1; k <= i__1; k += 2) {
233  if (z__[k] < 0.) {
234  *info = -(k + 200);
235  template_blas_erbla("DLASQ2", &c__2);
236  return 0;
237  } else if (z__[k + 1] < 0.) {
238  *info = -(k + 201);
239  template_blas_erbla("DLASQ2", &c__2);
240  return 0;
241  }
242  d__ += z__[k];
243  e += z__[k + 1];
244 /* Computing MAX */
245  d__1 = qmax, d__2 = z__[k];
246  qmax = maxMACRO(d__1,d__2);
247 /* Computing MIN */
248  d__1 = emin, d__2 = z__[k + 1];
249  emin = minMACRO(d__1,d__2);
250 /* Computing MAX */
251  d__1 = maxMACRO(qmax,zmax), d__2 = z__[k + 1];
252  zmax = maxMACRO(d__1,d__2);
253 /* L10: */
254  }
255  if (z__[(*n << 1) - 1] < 0.) {
256  *info = -((*n << 1) + 199);
257  template_blas_erbla("DLASQ2", &c__2);
258  return 0;
259  }
260  d__ += z__[(*n << 1) - 1];
261 /* Computing MAX */
262  d__1 = qmax, d__2 = z__[(*n << 1) - 1];
263  qmax = maxMACRO(d__1,d__2);
264  zmax = maxMACRO(qmax,zmax);
265 
266 /* Check for diagonality. */
267 
268  if (e == 0.) {
269  i__1 = *n;
270  for (k = 2; k <= i__1; ++k) {
271  z__[k] = z__[(k << 1) - 1];
272 /* L20: */
273  }
274  template_lapack_lasrt("D", n, &z__[1], &iinfo);
275  z__[(*n << 1) - 1] = d__;
276  return 0;
277  }
278 
279  trace = d__ + e;
280 
281 /* Check for zero data. */
282 
283  if (trace == 0.) {
284  z__[(*n << 1) - 1] = 0.;
285  return 0;
286  }
287 
288 /* Check whether the machine is IEEE conformable. */
289 
290  ieee = template_lapack_ilaenv(&c__10, "DLASQ2", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen)6, (ftnlen)1) == 1 && template_lapack_ilaenv(&c__11, "DLASQ2", "N", &c__1, &c__2,
291  &c__3, &c__4, (ftnlen)6, (ftnlen)1) == 1;
292 
293 /* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */
294 
295  for (k = *n << 1; k >= 2; k += -2) {
296  z__[k * 2] = 0.;
297  z__[(k << 1) - 1] = z__[k];
298  z__[(k << 1) - 2] = 0.;
299  z__[(k << 1) - 3] = z__[k - 1];
300 /* L30: */
301  }
302 
303  i0 = 1;
304  n0 = *n;
305 
306 /* Reverse the qd-array, if warranted. */
307 
308  if (z__[(i0 << 2) - 3] * 1.5 < z__[(n0 << 2) - 3]) {
309  ipn4 = ( i0 + n0 ) << 2;
310  i__1 = ( i0 + n0 - 1 ) << 1;
311  for (i4 = i0 << 2; i4 <= i__1; i4 += 4) {
312  temp = z__[i4 - 3];
313  z__[i4 - 3] = z__[ipn4 - i4 - 3];
314  z__[ipn4 - i4 - 3] = temp;
315  temp = z__[i4 - 1];
316  z__[i4 - 1] = z__[ipn4 - i4 - 5];
317  z__[ipn4 - i4 - 5] = temp;
318 /* L40: */
319  }
320  }
321 
322 /* Initial split checking via dqd and Li's test. */
323 
324  pp = 0;
325 
326  for (k = 1; k <= 2; ++k) {
327 
328  d__ = z__[(n0 << 2) + pp - 3];
329  i__1 = (i0 << 2) + pp;
330  for (i4 = ( ( n0 - 1 ) << 2) + pp; i4 >= i__1; i4 += -4) {
331  if (z__[i4 - 1] <= tol2 * d__) {
332  z__[i4 - 1] = -0.;
333  d__ = z__[i4 - 3];
334  } else {
335  d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1]));
336  }
337 /* L50: */
338  }
339 
340 /* dqd maps Z to ZZ plus Li's test. */
341 
342  emin = z__[(i0 << 2) + pp + 1];
343  d__ = z__[(i0 << 2) + pp - 3];
344  i__1 = ( ( n0 - 1 ) << 2) + pp;
345  for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) {
346  z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1];
347  if (z__[i4 - 1] <= tol2 * d__) {
348  z__[i4 - 1] = -0.;
349  z__[i4 - (pp << 1) - 2] = d__;
350  z__[i4 - (pp << 1)] = 0.;
351  d__ = z__[i4 + 1];
352  } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] &&
353  safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) {
354  temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2];
355  z__[i4 - (pp << 1)] = z__[i4 - 1] * temp;
356  d__ *= temp;
357  } else {
358  z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - (
359  pp << 1) - 2]);
360  d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]);
361  }
362 /* Computing MIN */
363  d__1 = emin, d__2 = z__[i4 - (pp << 1)];
364  emin = minMACRO(d__1,d__2);
365 /* L60: */
366  }
367  z__[(n0 << 2) - pp - 2] = d__;
368 
369 /* Now find qmax. */
370 
371  qmax = z__[(i0 << 2) - pp - 2];
372  i__1 = (n0 << 2) - pp - 2;
373  for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) {
374 /* Computing MAX */
375  d__1 = qmax, d__2 = z__[i4];
376  qmax = maxMACRO(d__1,d__2);
377 /* L70: */
378  }
379 
380 /* Prepare for the next iteration on K. */
381 
382  pp = 1 - pp;
383 /* L80: */
384  }
385 
386 /* Initialise variables to pass to DLASQ3. */
387 
388  ttype = 0;
389  dmin1 = 0.;
390  dmin2 = 0.;
391  dn = 0.;
392  dn1 = 0.;
393  dn2 = 0.;
394  g = 0.;
395  tau = 0.;
396 
397  iter = 2;
398  nfail = 0;
399  ndiv = ( n0 - i0 ) << 1;
400 
401  i__1 = *n + 1;
402  for (iwhila = 1; iwhila <= i__1; ++iwhila) {
403  if (n0 < 1) {
404  goto L170;
405  }
406 
407 /* While array unfinished do */
408 
409 /* E(N0) holds the value of SIGMA when submatrix in I0:N0 */
410 /* splits from the rest of the array, but is negated. */
411 
412  desig = 0.;
413  if (n0 == *n) {
414  sigma = 0.;
415  } else {
416  sigma = -z__[(n0 << 2) - 1];
417  }
418  if (sigma < 0.) {
419  *info = 1;
420  return 0;
421  }
422 
423 /* Find last unreduced submatrix's top index I0, find QMAX and */
424 /* EMIN. Find Gershgorin-type bound if Q's much greater than E's. */
425 
426  emax = 0.;
427  if (n0 > i0) {
428  emin = (d__1 = z__[(n0 << 2) - 5], absMACRO(d__1));
429  } else {
430  emin = 0.;
431  }
432  qmin = z__[(n0 << 2) - 3];
433  qmax = qmin;
434  for (i4 = n0 << 2; i4 >= 8; i4 += -4) {
435  if (z__[i4 - 5] <= 0.) {
436  goto L100;
437  }
438  if (qmin >= emax * 4.) {
439 /* Computing MIN */
440  d__1 = qmin, d__2 = z__[i4 - 3];
441  qmin = minMACRO(d__1,d__2);
442 /* Computing MAX */
443  d__1 = emax, d__2 = z__[i4 - 5];
444  emax = maxMACRO(d__1,d__2);
445  }
446 /* Computing MAX */
447  d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5];
448  qmax = maxMACRO(d__1,d__2);
449 /* Computing MIN */
450  d__1 = emin, d__2 = z__[i4 - 5];
451  emin = minMACRO(d__1,d__2);
452 /* L90: */
453  }
454  i4 = 4;
455 
456 L100:
457  i0 = i4 / 4;
458  pp = 0;
459 
460  if (n0 - i0 > 1) {
461  dee = z__[(i0 << 2) - 3];
462  deemin = dee;
463  kmin = i0;
464  i__2 = (n0 << 2) - 3;
465  for (i4 = (i0 << 2) + 1; i4 <= i__2; i4 += 4) {
466  dee = z__[i4] * (dee / (dee + z__[i4 - 2]));
467  if (dee <= deemin) {
468  deemin = dee;
469  kmin = (i4 + 3) / 4;
470  }
471 /* L110: */
472  }
473  if ( ( kmin - i0 ) << 1 < n0 - kmin && deemin <= z__[(n0 << 2) - 3] *
474  .5) {
475  ipn4 = ( i0 + n0 ) << 2;
476  pp = 2;
477  i__2 = ( i0 + n0 - 1 ) << 1;
478  for (i4 = i0 << 2; i4 <= i__2; i4 += 4) {
479  temp = z__[i4 - 3];
480  z__[i4 - 3] = z__[ipn4 - i4 - 3];
481  z__[ipn4 - i4 - 3] = temp;
482  temp = z__[i4 - 2];
483  z__[i4 - 2] = z__[ipn4 - i4 - 2];
484  z__[ipn4 - i4 - 2] = temp;
485  temp = z__[i4 - 1];
486  z__[i4 - 1] = z__[ipn4 - i4 - 5];
487  z__[ipn4 - i4 - 5] = temp;
488  temp = z__[i4];
489  z__[i4] = z__[ipn4 - i4 - 4];
490  z__[ipn4 - i4 - 4] = temp;
491 /* L120: */
492  }
493  }
494  }
495 
496 /* Put -(initial shift) into DMIN. */
497 
498 /* Computing MAX */
499  d__1 = 0., d__2 = qmin - template_blas_sqrt(qmin) * 2. * template_blas_sqrt(emax);
500  dmin__ = -maxMACRO(d__1,d__2);
501 
502 /* Now I0:N0 is unreduced. */
503 /* PP = 0 for ping, PP = 1 for pong. */
504 /* PP = 2 indicates that flipping was applied to the Z array and */
505 /* and that the tests for deflation upon entry in DLASQ3 */
506 /* should not be performed. */
507 
508  nbig = (n0 - i0 + 1) * 30;
509  i__2 = nbig;
510  for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) {
511  if (i0 > n0) {
512  goto L150;
513  }
514 
515 /* While submatrix unfinished take a good dqds step. */
516 
517  template_lapack_lasq3(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, &
518  nfail, &iter, &ndiv, &ieee, &ttype, &dmin1, &dmin2, &dn, &
519  dn1, &dn2, &g, &tau);
520 
521  pp = 1 - pp;
522 
523 /* When EMIN is very small check for splits. */
524 
525  if (pp == 0 && n0 - i0 >= 3) {
526  if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 *
527  sigma) {
528  splt = i0 - 1;
529  qmax = z__[(i0 << 2) - 3];
530  emin = z__[(i0 << 2) - 1];
531  oldemn = z__[i0 * 4];
532  i__3 = ( n0 - 3 ) << 2;
533  for (i4 = i0 << 2; i4 <= i__3; i4 += 4) {
534  if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <=
535  tol2 * sigma) {
536  z__[i4 - 1] = -sigma;
537  splt = i4 / 4;
538  qmax = 0.;
539  emin = z__[i4 + 3];
540  oldemn = z__[i4 + 4];
541  } else {
542 /* Computing MAX */
543  d__1 = qmax, d__2 = z__[i4 + 1];
544  qmax = maxMACRO(d__1,d__2);
545 /* Computing MIN */
546  d__1 = emin, d__2 = z__[i4 - 1];
547  emin = minMACRO(d__1,d__2);
548 /* Computing MIN */
549  d__1 = oldemn, d__2 = z__[i4];
550  oldemn = minMACRO(d__1,d__2);
551  }
552 /* L130: */
553  }
554  z__[(n0 << 2) - 1] = emin;
555  z__[n0 * 4] = oldemn;
556  i0 = splt + 1;
557  }
558  }
559 
560 /* L140: */
561  }
562 
563  *info = 2;
564  return 0;
565 
566 /* end IWHILB */
567 
568 L150:
569 
570 /* L160: */
571  ;
572  }
573 
574  *info = 3;
575  return 0;
576 
577 /* end IWHILA */
578 
579 L170:
580 
581 /* Move q's to the front. */
582 
583  i__1 = *n;
584  for (k = 2; k <= i__1; ++k) {
585  z__[k] = z__[(k << 2) - 3];
586 /* L180: */
587  }
588 
589 /* Sort and compute sum of eigenvalues. */
590 
591  template_lapack_lasrt("D", n, &z__[1], &iinfo);
592 
593  e = 0.;
594  for (k = *n; k >= 1; --k) {
595  e += z__[k];
596 /* L190: */
597  }
598 
599 /* Store trace, sum(eigenvalues) and information on performance. */
600 
601  z__[(*n << 1) + 1] = trace;
602  z__[(*n << 1) + 2] = e;
603  z__[(*n << 1) + 3] = (Treal) iter;
604 /* Computing 2nd power */
605  i__1 = *n;
606  z__[(*n << 1) + 4] = (Treal) ndiv / (Treal) (i__1 * i__1);
607  z__[(*n << 1) + 5] = nfail * 100. / (Treal) iter;
608  return 0;
609 
610 /* End of DLASQ2 */
611 
612 } /* dlasq2_ */
613 
614 #endif