gemv.go 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. // Copyright ©2018 The Gonum Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package gonum
  5. import (
  6. "gonum.org/v1/gonum/blas"
  7. "gonum.org/v1/gonum/internal/asm/f32"
  8. "gonum.org/v1/gonum/internal/asm/f64"
  9. )
  10. // TODO(Kunde21): Merge these methods back into level2double/level2single when Sgemv assembly kernels are merged into f32.
  11. // Dgemv computes
  12. // y = alpha * A * x + beta * y if tA = blas.NoTrans
  13. // y = alpha * Aᵀ * x + beta * y if tA = blas.Trans or blas.ConjTrans
  14. // where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
  15. func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) {
  16. if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
  17. panic(badTranspose)
  18. }
  19. if m < 0 {
  20. panic(mLT0)
  21. }
  22. if n < 0 {
  23. panic(nLT0)
  24. }
  25. if lda < max(1, n) {
  26. panic(badLdA)
  27. }
  28. if incX == 0 {
  29. panic(zeroIncX)
  30. }
  31. if incY == 0 {
  32. panic(zeroIncY)
  33. }
  34. // Set up indexes
  35. lenX := m
  36. lenY := n
  37. if tA == blas.NoTrans {
  38. lenX = n
  39. lenY = m
  40. }
  41. // Quick return if possible
  42. if m == 0 || n == 0 {
  43. return
  44. }
  45. if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) {
  46. panic(shortX)
  47. }
  48. if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
  49. panic(shortY)
  50. }
  51. if len(a) < lda*(m-1)+n {
  52. panic(shortA)
  53. }
  54. // Quick return if possible
  55. if alpha == 0 && beta == 1 {
  56. return
  57. }
  58. if alpha == 0 {
  59. // First form y = beta * y
  60. if incY > 0 {
  61. Implementation{}.Dscal(lenY, beta, y, incY)
  62. } else {
  63. Implementation{}.Dscal(lenY, beta, y, -incY)
  64. }
  65. return
  66. }
  67. // Form y = alpha * A * x + y
  68. if tA == blas.NoTrans {
  69. f64.GemvN(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY))
  70. return
  71. }
  72. // Cases where a is transposed.
  73. f64.GemvT(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY))
  74. }
  75. // Sgemv computes
  76. // y = alpha * A * x + beta * y if tA = blas.NoTrans
  77. // y = alpha * Aᵀ * x + beta * y if tA = blas.Trans or blas.ConjTrans
  78. // where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
  79. //
  80. // Float32 implementations are autogenerated and not directly tested.
  81. func (Implementation) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
  82. if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
  83. panic(badTranspose)
  84. }
  85. if m < 0 {
  86. panic(mLT0)
  87. }
  88. if n < 0 {
  89. panic(nLT0)
  90. }
  91. if lda < max(1, n) {
  92. panic(badLdA)
  93. }
  94. if incX == 0 {
  95. panic(zeroIncX)
  96. }
  97. if incY == 0 {
  98. panic(zeroIncY)
  99. }
  100. // Quick return if possible.
  101. if m == 0 || n == 0 {
  102. return
  103. }
  104. // Set up indexes
  105. lenX := m
  106. lenY := n
  107. if tA == blas.NoTrans {
  108. lenX = n
  109. lenY = m
  110. }
  111. if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) {
  112. panic(shortX)
  113. }
  114. if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
  115. panic(shortY)
  116. }
  117. if len(a) < lda*(m-1)+n {
  118. panic(shortA)
  119. }
  120. // Quick return if possible.
  121. if alpha == 0 && beta == 1 {
  122. return
  123. }
  124. // First form y = beta * y
  125. if incY > 0 {
  126. Implementation{}.Sscal(lenY, beta, y, incY)
  127. } else {
  128. Implementation{}.Sscal(lenY, beta, y, -incY)
  129. }
  130. if alpha == 0 {
  131. return
  132. }
  133. var kx, ky int
  134. if incX < 0 {
  135. kx = -(lenX - 1) * incX
  136. }
  137. if incY < 0 {
  138. ky = -(lenY - 1) * incY
  139. }
  140. // Form y = alpha * A * x + y
  141. if tA == blas.NoTrans {
  142. if incX == 1 && incY == 1 {
  143. for i := 0; i < m; i++ {
  144. y[i] += alpha * f32.DotUnitary(a[lda*i:lda*i+n], x[:n])
  145. }
  146. return
  147. }
  148. iy := ky
  149. for i := 0; i < m; i++ {
  150. y[iy] += alpha * f32.DotInc(x, a[lda*i:lda*i+n], uintptr(n), uintptr(incX), 1, uintptr(kx), 0)
  151. iy += incY
  152. }
  153. return
  154. }
  155. // Cases where a is transposed.
  156. if incX == 1 && incY == 1 {
  157. for i := 0; i < m; i++ {
  158. tmp := alpha * x[i]
  159. if tmp != 0 {
  160. f32.AxpyUnitaryTo(y, tmp, a[lda*i:lda*i+n], y[:n])
  161. }
  162. }
  163. return
  164. }
  165. ix := kx
  166. for i := 0; i < m; i++ {
  167. tmp := alpha * x[ix]
  168. if tmp != 0 {
  169. f32.AxpyInc(tmp, a[lda*i:lda*i+n], y, uintptr(n), 1, uintptr(incY), 0, uintptr(ky))
  170. }
  171. ix += incX
  172. }
  173. }