dlaexc.go 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. // Copyright ©2016 The Gonum Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package gonum
  5. import (
  6. "math"
  7. "gonum.org/v1/gonum/blas"
  8. "gonum.org/v1/gonum/blas/blas64"
  9. "gonum.org/v1/gonum/lapack"
  10. )
  11. // Dlaexc swaps two adjacent diagonal blocks of order 1 or 2 in an n×n upper
  12. // quasi-triangular matrix T by an orthogonal similarity transformation.
  13. //
  14. // T must be in Schur canonical form, that is, block upper triangular with 1×1
  15. // and 2×2 diagonal blocks; each 2×2 diagonal block has its diagonal elements
  16. // equal and its off-diagonal elements of opposite sign. On return, T will
  17. // contain the updated matrix again in Schur canonical form.
  18. //
  19. // If wantq is true, the transformation is accumulated in the n×n matrix Q,
  20. // otherwise Q is not referenced.
  21. //
  22. // j1 is the index of the first row of the first block. n1 and n2 are the order
  23. // of the first and second block, respectively.
  24. //
  25. // work must have length at least n, otherwise Dlaexc will panic.
  26. //
  27. // If ok is false, the transformed matrix T would be too far from Schur form.
  28. // The blocks are not swapped, and T and Q are not modified.
  29. //
  30. // If n1 and n2 are both equal to 1, Dlaexc will always return true.
  31. //
  32. // Dlaexc is an internal routine. It is exported for testing purposes.
  33. func (impl Implementation) Dlaexc(wantq bool, n int, t []float64, ldt int, q []float64, ldq int, j1, n1, n2 int, work []float64) (ok bool) {
  34. switch {
  35. case n < 0:
  36. panic(nLT0)
  37. case ldt < max(1, n):
  38. panic(badLdT)
  39. case wantq && ldt < max(1, n):
  40. panic(badLdQ)
  41. case j1 < 0 || n <= j1:
  42. panic(badJ1)
  43. case len(work) < n:
  44. panic(shortWork)
  45. case n1 < 0 || 2 < n1:
  46. panic(badN1)
  47. case n2 < 0 || 2 < n2:
  48. panic(badN2)
  49. }
  50. if n == 0 || n1 == 0 || n2 == 0 {
  51. return true
  52. }
  53. switch {
  54. case len(t) < (n-1)*ldt+n:
  55. panic(shortT)
  56. case wantq && len(q) < (n-1)*ldq+n:
  57. panic(shortQ)
  58. }
  59. if j1+n1 >= n {
  60. // TODO(vladimir-ch): Reference LAPACK does this check whether
  61. // the start of the second block is in the matrix T. It returns
  62. // true if it is not and moreover it does not check whether the
  63. // whole second block fits into T. This does not feel
  64. // satisfactory. The only caller of Dlaexc is Dtrexc, so if the
  65. // caller makes sure that this does not happen, we could be
  66. // stricter here.
  67. return true
  68. }
  69. j2 := j1 + 1
  70. j3 := j1 + 2
  71. bi := blas64.Implementation()
  72. if n1 == 1 && n2 == 1 {
  73. // Swap two 1×1 blocks.
  74. t11 := t[j1*ldt+j1]
  75. t22 := t[j2*ldt+j2]
  76. // Determine the transformation to perform the interchange.
  77. cs, sn, _ := impl.Dlartg(t[j1*ldt+j2], t22-t11)
  78. // Apply transformation to the matrix T.
  79. if n-j3 > 0 {
  80. bi.Drot(n-j3, t[j1*ldt+j3:], 1, t[j2*ldt+j3:], 1, cs, sn)
  81. }
  82. if j1 > 0 {
  83. bi.Drot(j1, t[j1:], ldt, t[j2:], ldt, cs, sn)
  84. }
  85. t[j1*ldt+j1] = t22
  86. t[j2*ldt+j2] = t11
  87. if wantq {
  88. // Accumulate transformation in the matrix Q.
  89. bi.Drot(n, q[j1:], ldq, q[j2:], ldq, cs, sn)
  90. }
  91. return true
  92. }
  93. // Swapping involves at least one 2×2 block.
  94. //
  95. // Copy the diagonal block of order n1+n2 to the local array d and
  96. // compute its norm.
  97. nd := n1 + n2
  98. var d [16]float64
  99. const ldd = 4
  100. impl.Dlacpy(blas.All, nd, nd, t[j1*ldt+j1:], ldt, d[:], ldd)
  101. dnorm := impl.Dlange(lapack.MaxAbs, nd, nd, d[:], ldd, work)
  102. // Compute machine-dependent threshold for test for accepting swap.
  103. eps := dlamchP
  104. thresh := math.Max(10*eps*dnorm, dlamchS/eps)
  105. // Solve T11*X - X*T22 = scale*T12 for X.
  106. var x [4]float64
  107. const ldx = 2
  108. scale, _, _ := impl.Dlasy2(false, false, -1, n1, n2, d[:], ldd, d[n1*ldd+n1:], ldd, d[n1:], ldd, x[:], ldx)
  109. // Swap the adjacent diagonal blocks.
  110. switch {
  111. case n1 == 1 && n2 == 2:
  112. // Generate elementary reflector H so that
  113. // ( scale, X11, X12 ) H = ( 0, 0, * )
  114. u := [3]float64{scale, x[0], 1}
  115. _, tau := impl.Dlarfg(3, x[1], u[:2], 1)
  116. t11 := t[j1*ldt+j1]
  117. // Perform swap provisionally on diagonal block in d.
  118. impl.Dlarfx(blas.Left, 3, 3, u[:], tau, d[:], ldd, work)
  119. impl.Dlarfx(blas.Right, 3, 3, u[:], tau, d[:], ldd, work)
  120. // Test whether to reject swap.
  121. if math.Max(math.Abs(d[2*ldd]), math.Max(math.Abs(d[2*ldd+1]), math.Abs(d[2*ldd+2]-t11))) > thresh {
  122. return false
  123. }
  124. // Accept swap: apply transformation to the entire matrix T.
  125. impl.Dlarfx(blas.Left, 3, n-j1, u[:], tau, t[j1*ldt+j1:], ldt, work)
  126. impl.Dlarfx(blas.Right, j2+1, 3, u[:], tau, t[j1:], ldt, work)
  127. t[j3*ldt+j1] = 0
  128. t[j3*ldt+j2] = 0
  129. t[j3*ldt+j3] = t11
  130. if wantq {
  131. // Accumulate transformation in the matrix Q.
  132. impl.Dlarfx(blas.Right, n, 3, u[:], tau, q[j1:], ldq, work)
  133. }
  134. case n1 == 2 && n2 == 1:
  135. // Generate elementary reflector H so that:
  136. // H ( -X11 ) = ( * )
  137. // ( -X21 ) = ( 0 )
  138. // ( scale ) = ( 0 )
  139. u := [3]float64{1, -x[ldx], scale}
  140. _, tau := impl.Dlarfg(3, -x[0], u[1:], 1)
  141. t33 := t[j3*ldt+j3]
  142. // Perform swap provisionally on diagonal block in D.
  143. impl.Dlarfx(blas.Left, 3, 3, u[:], tau, d[:], ldd, work)
  144. impl.Dlarfx(blas.Right, 3, 3, u[:], tau, d[:], ldd, work)
  145. // Test whether to reject swap.
  146. if math.Max(math.Abs(d[ldd]), math.Max(math.Abs(d[2*ldd]), math.Abs(d[0]-t33))) > thresh {
  147. return false
  148. }
  149. // Accept swap: apply transformation to the entire matrix T.
  150. impl.Dlarfx(blas.Right, j3+1, 3, u[:], tau, t[j1:], ldt, work)
  151. impl.Dlarfx(blas.Left, 3, n-j1-1, u[:], tau, t[j1*ldt+j2:], ldt, work)
  152. t[j1*ldt+j1] = t33
  153. t[j2*ldt+j1] = 0
  154. t[j3*ldt+j1] = 0
  155. if wantq {
  156. // Accumulate transformation in the matrix Q.
  157. impl.Dlarfx(blas.Right, n, 3, u[:], tau, q[j1:], ldq, work)
  158. }
  159. default: // n1 == 2 && n2 == 2
  160. // Generate elementary reflectors H_1 and H_2 so that:
  161. // H_2 H_1 ( -X11 -X12 ) = ( * * )
  162. // ( -X21 -X22 ) ( 0 * )
  163. // ( scale 0 ) ( 0 0 )
  164. // ( 0 scale ) ( 0 0 )
  165. u1 := [3]float64{1, -x[ldx], scale}
  166. _, tau1 := impl.Dlarfg(3, -x[0], u1[1:], 1)
  167. temp := -tau1 * (x[1] + u1[1]*x[ldx+1])
  168. u2 := [3]float64{1, -temp * u1[2], scale}
  169. _, tau2 := impl.Dlarfg(3, -temp*u1[1]-x[ldx+1], u2[1:], 1)
  170. // Perform swap provisionally on diagonal block in D.
  171. impl.Dlarfx(blas.Left, 3, 4, u1[:], tau1, d[:], ldd, work)
  172. impl.Dlarfx(blas.Right, 4, 3, u1[:], tau1, d[:], ldd, work)
  173. impl.Dlarfx(blas.Left, 3, 4, u2[:], tau2, d[ldd:], ldd, work)
  174. impl.Dlarfx(blas.Right, 4, 3, u2[:], tau2, d[1:], ldd, work)
  175. // Test whether to reject swap.
  176. m1 := math.Max(math.Abs(d[2*ldd]), math.Abs(d[2*ldd+1]))
  177. m2 := math.Max(math.Abs(d[3*ldd]), math.Abs(d[3*ldd+1]))
  178. if math.Max(m1, m2) > thresh {
  179. return false
  180. }
  181. // Accept swap: apply transformation to the entire matrix T.
  182. j4 := j1 + 3
  183. impl.Dlarfx(blas.Left, 3, n-j1, u1[:], tau1, t[j1*ldt+j1:], ldt, work)
  184. impl.Dlarfx(blas.Right, j4+1, 3, u1[:], tau1, t[j1:], ldt, work)
  185. impl.Dlarfx(blas.Left, 3, n-j1, u2[:], tau2, t[j2*ldt+j1:], ldt, work)
  186. impl.Dlarfx(blas.Right, j4+1, 3, u2[:], tau2, t[j2:], ldt, work)
  187. t[j3*ldt+j1] = 0
  188. t[j3*ldt+j2] = 0
  189. t[j4*ldt+j1] = 0
  190. t[j4*ldt+j2] = 0
  191. if wantq {
  192. // Accumulate transformation in the matrix Q.
  193. impl.Dlarfx(blas.Right, n, 3, u1[:], tau1, q[j1:], ldq, work)
  194. impl.Dlarfx(blas.Right, n, 3, u2[:], tau2, q[j2:], ldq, work)
  195. }
  196. }
  197. if n2 == 2 {
  198. // Standardize new 2×2 block T11.
  199. a, b := t[j1*ldt+j1], t[j1*ldt+j2]
  200. c, d := t[j2*ldt+j1], t[j2*ldt+j2]
  201. var cs, sn float64
  202. t[j1*ldt+j1], t[j1*ldt+j2], t[j2*ldt+j1], t[j2*ldt+j2], _, _, _, _, cs, sn = impl.Dlanv2(a, b, c, d)
  203. if n-j1-2 > 0 {
  204. bi.Drot(n-j1-2, t[j1*ldt+j1+2:], 1, t[j2*ldt+j1+2:], 1, cs, sn)
  205. }
  206. if j1 > 0 {
  207. bi.Drot(j1, t[j1:], ldt, t[j2:], ldt, cs, sn)
  208. }
  209. if wantq {
  210. bi.Drot(n, q[j1:], ldq, q[j2:], ldq, cs, sn)
  211. }
  212. }
  213. if n1 == 2 {
  214. // Standardize new 2×2 block T22.
  215. j3 := j1 + n2
  216. j4 := j3 + 1
  217. a, b := t[j3*ldt+j3], t[j3*ldt+j4]
  218. c, d := t[j4*ldt+j3], t[j4*ldt+j4]
  219. var cs, sn float64
  220. t[j3*ldt+j3], t[j3*ldt+j4], t[j4*ldt+j3], t[j4*ldt+j4], _, _, _, _, cs, sn = impl.Dlanv2(a, b, c, d)
  221. if n-j3-2 > 0 {
  222. bi.Drot(n-j3-2, t[j3*ldt+j3+2:], 1, t[j4*ldt+j3+2:], 1, cs, sn)
  223. }
  224. bi.Drot(j3, t[j3:], ldt, t[j4:], ldt, cs, sn)
  225. if wantq {
  226. bi.Drot(n, q[j3:], ldq, q[j4:], ldq, cs, sn)
  227. }
  228. }
  229. return true
  230. }