12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748 |
- // Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
- // Copyright ©2019 The Gonum Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package gonum
- import (
- cmplx "gonum.org/v1/gonum/internal/cmplx64"
- "gonum.org/v1/gonum/blas"
- "gonum.org/v1/gonum/internal/asm/c64"
- )
- var _ blas.Complex64Level3 = Implementation{}
- // Cgemm performs one of the matrix-matrix operations
- // C = alpha * op(A) * op(B) + beta * C
- // where op(X) is one of
- // op(X) = X or op(X) = Xᵀ or op(X) = Xᴴ,
- // alpha and beta are scalars, and A, B and C are matrices, with op(A) an m×k matrix,
- // op(B) a k×n matrix and C an m×n matrix.
- //
- // Complex64 implementations are autogenerated and not directly tested.
- func (Implementation) Cgemm(tA, tB blas.Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
- switch tA {
- default:
- panic(badTranspose)
- case blas.NoTrans, blas.Trans, blas.ConjTrans:
- }
- switch tB {
- default:
- panic(badTranspose)
- case blas.NoTrans, blas.Trans, blas.ConjTrans:
- }
- switch {
- case m < 0:
- panic(mLT0)
- case n < 0:
- panic(nLT0)
- case k < 0:
- panic(kLT0)
- }
- rowA, colA := m, k
- if tA != blas.NoTrans {
- rowA, colA = k, m
- }
- if lda < max(1, colA) {
- panic(badLdA)
- }
- rowB, colB := k, n
- if tB != blas.NoTrans {
- rowB, colB = n, k
- }
- if ldb < max(1, colB) {
- panic(badLdB)
- }
- if ldc < max(1, n) {
- panic(badLdC)
- }
- // Quick return if possible.
- if m == 0 || n == 0 {
- return
- }
- // For zero matrix size the following slice length checks are trivially satisfied.
- if len(a) < (rowA-1)*lda+colA {
- panic(shortA)
- }
- if len(b) < (rowB-1)*ldb+colB {
- panic(shortB)
- }
- if len(c) < (m-1)*ldc+n {
- panic(shortC)
- }
- // Quick return if possible.
- if (alpha == 0 || k == 0) && beta == 1 {
- return
- }
- if alpha == 0 {
- if beta == 0 {
- for i := 0; i < m; i++ {
- for j := 0; j < n; j++ {
- c[i*ldc+j] = 0
- }
- }
- } else {
- for i := 0; i < m; i++ {
- for j := 0; j < n; j++ {
- c[i*ldc+j] *= beta
- }
- }
- }
- return
- }
- switch tA {
- case blas.NoTrans:
- switch tB {
- case blas.NoTrans:
- // Form C = alpha * A * B + beta * C.
- for i := 0; i < m; i++ {
- switch {
- case beta == 0:
- for j := 0; j < n; j++ {
- c[i*ldc+j] = 0
- }
- case beta != 1:
- for j := 0; j < n; j++ {
- c[i*ldc+j] *= beta
- }
- }
- for l := 0; l < k; l++ {
- tmp := alpha * a[i*lda+l]
- for j := 0; j < n; j++ {
- c[i*ldc+j] += tmp * b[l*ldb+j]
- }
- }
- }
- case blas.Trans:
- // Form C = alpha * A * Bᵀ + beta * C.
- for i := 0; i < m; i++ {
- switch {
- case beta == 0:
- for j := 0; j < n; j++ {
- c[i*ldc+j] = 0
- }
- case beta != 1:
- for j := 0; j < n; j++ {
- c[i*ldc+j] *= beta
- }
- }
- for l := 0; l < k; l++ {
- tmp := alpha * a[i*lda+l]
- for j := 0; j < n; j++ {
- c[i*ldc+j] += tmp * b[j*ldb+l]
- }
- }
- }
- case blas.ConjTrans:
- // Form C = alpha * A * Bᴴ + beta * C.
- for i := 0; i < m; i++ {
- switch {
- case beta == 0:
- for j := 0; j < n; j++ {
- c[i*ldc+j] = 0
- }
- case beta != 1:
- for j := 0; j < n; j++ {
- c[i*ldc+j] *= beta
- }
- }
- for l := 0; l < k; l++ {
- tmp := alpha * a[i*lda+l]
- for j := 0; j < n; j++ {
- c[i*ldc+j] += tmp * cmplx.Conj(b[j*ldb+l])
- }
- }
- }
- }
- case blas.Trans:
- switch tB {
- case blas.NoTrans:
- // Form C = alpha * Aᵀ * B + beta * C.
- for i := 0; i < m; i++ {
- for j := 0; j < n; j++ {
- var tmp complex64
- for l := 0; l < k; l++ {
- tmp += a[l*lda+i] * b[l*ldb+j]
- }
- if beta == 0 {
- c[i*ldc+j] = alpha * tmp
- } else {
- c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
- }
- }
- }
- case blas.Trans:
- // Form C = alpha * Aᵀ * Bᵀ + beta * C.
- for i := 0; i < m; i++ {
- for j := 0; j < n; j++ {
- var tmp complex64
- for l := 0; l < k; l++ {
- tmp += a[l*lda+i] * b[j*ldb+l]
- }
- if beta == 0 {
- c[i*ldc+j] = alpha * tmp
- } else {
- c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
- }
- }
- }
- case blas.ConjTrans:
- // Form C = alpha * Aᵀ * Bᴴ + beta * C.
- for i := 0; i < m; i++ {
- for j := 0; j < n; j++ {
- var tmp complex64
- for l := 0; l < k; l++ {
- tmp += a[l*lda+i] * cmplx.Conj(b[j*ldb+l])
- }
- if beta == 0 {
- c[i*ldc+j] = alpha * tmp
- } else {
- c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
- }
- }
- }
- }
- case blas.ConjTrans:
- switch tB {
- case blas.NoTrans:
- // Form C = alpha * Aᴴ * B + beta * C.
- for i := 0; i < m; i++ {
- for j := 0; j < n; j++ {
- var tmp complex64
- for l := 0; l < k; l++ {
- tmp += cmplx.Conj(a[l*lda+i]) * b[l*ldb+j]
- }
- if beta == 0 {
- c[i*ldc+j] = alpha * tmp
- } else {
- c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
- }
- }
- }
- case blas.Trans:
- // Form C = alpha * Aᴴ * Bᵀ + beta * C.
- for i := 0; i < m; i++ {
- for j := 0; j < n; j++ {
- var tmp complex64
- for l := 0; l < k; l++ {
- tmp += cmplx.Conj(a[l*lda+i]) * b[j*ldb+l]
- }
- if beta == 0 {
- c[i*ldc+j] = alpha * tmp
- } else {
- c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
- }
- }
- }
- case blas.ConjTrans:
- // Form C = alpha * Aᴴ * Bᴴ + beta * C.
- for i := 0; i < m; i++ {
- for j := 0; j < n; j++ {
- var tmp complex64
- for l := 0; l < k; l++ {
- tmp += cmplx.Conj(a[l*lda+i]) * cmplx.Conj(b[j*ldb+l])
- }
- if beta == 0 {
- c[i*ldc+j] = alpha * tmp
- } else {
- c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
- }
- }
- }
- }
- }
- }
- // Chemm performs one of the matrix-matrix operations
- // C = alpha*A*B + beta*C if side == blas.Left
- // C = alpha*B*A + beta*C if side == blas.Right
- // where alpha and beta are scalars, A is an m×m or n×n hermitian matrix and B
- // and C are m×n matrices. The imaginary parts of the diagonal elements of A are
- // assumed to be zero.
- //
- // Complex64 implementations are autogenerated and not directly tested.
- func (Implementation) Chemm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
- na := m
- if side == blas.Right {
- na = n
- }
- switch {
- case side != blas.Left && side != blas.Right:
- panic(badSide)
- case uplo != blas.Lower && uplo != blas.Upper:
- panic(badUplo)
- case m < 0:
- panic(mLT0)
- case n < 0:
- panic(nLT0)
- case lda < max(1, na):
- panic(badLdA)
- case ldb < max(1, n):
- panic(badLdB)
- case ldc < max(1, n):
- panic(badLdC)
- }
- // Quick return if possible.
- if m == 0 || n == 0 {
- return
- }
- // For zero matrix size the following slice length checks are trivially satisfied.
- if len(a) < lda*(na-1)+na {
- panic(shortA)
- }
- if len(b) < ldb*(m-1)+n {
- panic(shortB)
- }
- if len(c) < ldc*(m-1)+n {
- panic(shortC)
- }
- // Quick return if possible.
- if alpha == 0 && beta == 1 {
- return
- }
- if alpha == 0 {
- if beta == 0 {
- for i := 0; i < m; i++ {
- ci := c[i*ldc : i*ldc+n]
- for j := range ci {
- ci[j] = 0
- }
- }
- } else {
- for i := 0; i < m; i++ {
- ci := c[i*ldc : i*ldc+n]
- c64.ScalUnitary(beta, ci)
- }
- }
- return
- }
- if side == blas.Left {
- // Form C = alpha*A*B + beta*C.
- for i := 0; i < m; i++ {
- atmp := alpha * complex(real(a[i*lda+i]), 0)
- bi := b[i*ldb : i*ldb+n]
- ci := c[i*ldc : i*ldc+n]
- if beta == 0 {
- for j, bij := range bi {
- ci[j] = atmp * bij
- }
- } else {
- for j, bij := range bi {
- ci[j] = atmp*bij + beta*ci[j]
- }
- }
- if uplo == blas.Upper {
- for k := 0; k < i; k++ {
- atmp = alpha * cmplx.Conj(a[k*lda+i])
- c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
- }
- for k := i + 1; k < m; k++ {
- atmp = alpha * a[i*lda+k]
- c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
- }
- } else {
- for k := 0; k < i; k++ {
- atmp = alpha * a[i*lda+k]
- c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
- }
- for k := i + 1; k < m; k++ {
- atmp = alpha * cmplx.Conj(a[k*lda+i])
- c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
- }
- }
- }
- } else {
- // Form C = alpha*B*A + beta*C.
- if uplo == blas.Upper {
- for i := 0; i < m; i++ {
- for j := n - 1; j >= 0; j-- {
- abij := alpha * b[i*ldb+j]
- aj := a[j*lda+j+1 : j*lda+n]
- bi := b[i*ldb+j+1 : i*ldb+n]
- ci := c[i*ldc+j+1 : i*ldc+n]
- var tmp complex64
- for k, ajk := range aj {
- ci[k] += abij * ajk
- tmp += bi[k] * cmplx.Conj(ajk)
- }
- ajj := complex(real(a[j*lda+j]), 0)
- if beta == 0 {
- c[i*ldc+j] = abij*ajj + alpha*tmp
- } else {
- c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j]
- }
- }
- }
- } else {
- for i := 0; i < m; i++ {
- for j := 0; j < n; j++ {
- abij := alpha * b[i*ldb+j]
- aj := a[j*lda : j*lda+j]
- bi := b[i*ldb : i*ldb+j]
- ci := c[i*ldc : i*ldc+j]
- var tmp complex64
- for k, ajk := range aj {
- ci[k] += abij * ajk
- tmp += bi[k] * cmplx.Conj(ajk)
- }
- ajj := complex(real(a[j*lda+j]), 0)
- if beta == 0 {
- c[i*ldc+j] = abij*ajj + alpha*tmp
- } else {
- c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j]
- }
- }
- }
- }
- }
- }
- // Cherk performs one of the hermitian rank-k operations
- // C = alpha*A*Aᴴ + beta*C if trans == blas.NoTrans
- // C = alpha*Aᴴ*A + beta*C if trans == blas.ConjTrans
- // where alpha and beta are real scalars, C is an n×n hermitian matrix and A is
- // an n×k matrix in the first case and a k×n matrix in the second case.
- //
- // The imaginary parts of the diagonal elements of C are assumed to be zero, and
- // on return they will be set to zero.
- //
- // Complex64 implementations are autogenerated and not directly tested.
- func (Implementation) Cherk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha float32, a []complex64, lda int, beta float32, c []complex64, ldc int) {
- var rowA, colA int
- switch trans {
- default:
- panic(badTranspose)
- case blas.NoTrans:
- rowA, colA = n, k
- case blas.ConjTrans:
- rowA, colA = k, n
- }
- switch {
- case uplo != blas.Lower && uplo != blas.Upper:
- panic(badUplo)
- case n < 0:
- panic(nLT0)
- case k < 0:
- panic(kLT0)
- case lda < max(1, colA):
- panic(badLdA)
- case ldc < max(1, n):
- panic(badLdC)
- }
- // Quick return if possible.
- if n == 0 {
- return
- }
- // For zero matrix size the following slice length checks are trivially satisfied.
- if len(a) < (rowA-1)*lda+colA {
- panic(shortA)
- }
- if len(c) < (n-1)*ldc+n {
- panic(shortC)
- }
- // Quick return if possible.
- if (alpha == 0 || k == 0) && beta == 1 {
- return
- }
- if alpha == 0 {
- if uplo == blas.Upper {
- if beta == 0 {
- for i := 0; i < n; i++ {
- ci := c[i*ldc+i : i*ldc+n]
- for j := range ci {
- ci[j] = 0
- }
- }
- } else {
- for i := 0; i < n; i++ {
- ci := c[i*ldc+i : i*ldc+n]
- ci[0] = complex(beta*real(ci[0]), 0)
- if i != n-1 {
- c64.SscalUnitary(beta, ci[1:])
- }
- }
- }
- } else {
- if beta == 0 {
- for i := 0; i < n; i++ {
- ci := c[i*ldc : i*ldc+i+1]
- for j := range ci {
- ci[j] = 0
- }
- }
- } else {
- for i := 0; i < n; i++ {
- ci := c[i*ldc : i*ldc+i+1]
- if i != 0 {
- c64.SscalUnitary(beta, ci[:i])
- }
- ci[i] = complex(beta*real(ci[i]), 0)
- }
- }
- }
- return
- }
- calpha := complex(alpha, 0)
- if trans == blas.NoTrans {
- // Form C = alpha*A*Aᴴ + beta*C.
- cbeta := complex(beta, 0)
- if uplo == blas.Upper {
- for i := 0; i < n; i++ {
- ci := c[i*ldc+i : i*ldc+n]
- ai := a[i*lda : i*lda+k]
- switch {
- case beta == 0:
- // Handle the i-th diagonal element of C.
- ci[0] = complex(alpha*real(c64.DotcUnitary(ai, ai)), 0)
- // Handle the remaining elements on the i-th row of C.
- for jc := range ci[1:] {
- j := i + 1 + jc
- ci[jc+1] = calpha * c64.DotcUnitary(a[j*lda:j*lda+k], ai)
- }
- case beta != 1:
- cii := calpha*c64.DotcUnitary(ai, ai) + cbeta*ci[0]
- ci[0] = complex(real(cii), 0)
- for jc, cij := range ci[1:] {
- j := i + 1 + jc
- ci[jc+1] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij
- }
- default:
- cii := calpha*c64.DotcUnitary(ai, ai) + ci[0]
- ci[0] = complex(real(cii), 0)
- for jc, cij := range ci[1:] {
- j := i + 1 + jc
- ci[jc+1] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cij
- }
- }
- }
- } else {
- for i := 0; i < n; i++ {
- ci := c[i*ldc : i*ldc+i+1]
- ai := a[i*lda : i*lda+k]
- switch {
- case beta == 0:
- // Handle the first i-1 elements on the i-th row of C.
- for j := range ci[:i] {
- ci[j] = calpha * c64.DotcUnitary(a[j*lda:j*lda+k], ai)
- }
- // Handle the i-th diagonal element of C.
- ci[i] = complex(alpha*real(c64.DotcUnitary(ai, ai)), 0)
- case beta != 1:
- for j, cij := range ci[:i] {
- ci[j] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij
- }
- cii := calpha*c64.DotcUnitary(ai, ai) + cbeta*ci[i]
- ci[i] = complex(real(cii), 0)
- default:
- for j, cij := range ci[:i] {
- ci[j] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cij
- }
- cii := calpha*c64.DotcUnitary(ai, ai) + ci[i]
- ci[i] = complex(real(cii), 0)
- }
- }
- }
- } else {
- // Form C = alpha*Aᴴ*A + beta*C.
- if uplo == blas.Upper {
- for i := 0; i < n; i++ {
- ci := c[i*ldc+i : i*ldc+n]
- switch {
- case beta == 0:
- for jc := range ci {
- ci[jc] = 0
- }
- case beta != 1:
- c64.SscalUnitary(beta, ci)
- ci[0] = complex(real(ci[0]), 0)
- default:
- ci[0] = complex(real(ci[0]), 0)
- }
- for j := 0; j < k; j++ {
- aji := cmplx.Conj(a[j*lda+i])
- if aji != 0 {
- c64.AxpyUnitary(calpha*aji, a[j*lda+i:j*lda+n], ci)
- }
- }
- c[i*ldc+i] = complex(real(c[i*ldc+i]), 0)
- }
- } else {
- for i := 0; i < n; i++ {
- ci := c[i*ldc : i*ldc+i+1]
- switch {
- case beta == 0:
- for j := range ci {
- ci[j] = 0
- }
- case beta != 1:
- c64.SscalUnitary(beta, ci)
- ci[i] = complex(real(ci[i]), 0)
- default:
- ci[i] = complex(real(ci[i]), 0)
- }
- for j := 0; j < k; j++ {
- aji := cmplx.Conj(a[j*lda+i])
- if aji != 0 {
- c64.AxpyUnitary(calpha*aji, a[j*lda:j*lda+i+1], ci)
- }
- }
- c[i*ldc+i] = complex(real(c[i*ldc+i]), 0)
- }
- }
- }
- }
- // Cher2k performs one of the hermitian rank-2k operations
- // C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C if trans == blas.NoTrans
- // C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C if trans == blas.ConjTrans
- // where alpha and beta are scalars with beta real, C is an n×n hermitian matrix
- // and A and B are n×k matrices in the first case and k×n matrices in the second case.
- //
- // The imaginary parts of the diagonal elements of C are assumed to be zero, and
- // on return they will be set to zero.
- //
- // Complex64 implementations are autogenerated and not directly tested.
- func (Implementation) Cher2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int) {
- var row, col int
- switch trans {
- default:
- panic(badTranspose)
- case blas.NoTrans:
- row, col = n, k
- case blas.ConjTrans:
- row, col = k, n
- }
- switch {
- case uplo != blas.Lower && uplo != blas.Upper:
- panic(badUplo)
- case n < 0:
- panic(nLT0)
- case k < 0:
- panic(kLT0)
- case lda < max(1, col):
- panic(badLdA)
- case ldb < max(1, col):
- panic(badLdB)
- case ldc < max(1, n):
- panic(badLdC)
- }
- // Quick return if possible.
- if n == 0 {
- return
- }
- // For zero matrix size the following slice length checks are trivially satisfied.
- if len(a) < (row-1)*lda+col {
- panic(shortA)
- }
- if len(b) < (row-1)*ldb+col {
- panic(shortB)
- }
- if len(c) < (n-1)*ldc+n {
- panic(shortC)
- }
- // Quick return if possible.
- if (alpha == 0 || k == 0) && beta == 1 {
- return
- }
- if alpha == 0 {
- if uplo == blas.Upper {
- if beta == 0 {
- for i := 0; i < n; i++ {
- ci := c[i*ldc+i : i*ldc+n]
- for j := range ci {
- ci[j] = 0
- }
- }
- } else {
- for i := 0; i < n; i++ {
- ci := c[i*ldc+i : i*ldc+n]
- ci[0] = complex(beta*real(ci[0]), 0)
- if i != n-1 {
- c64.SscalUnitary(beta, ci[1:])
- }
- }
- }
- } else {
- if beta == 0 {
- for i := 0; i < n; i++ {
- ci := c[i*ldc : i*ldc+i+1]
- for j := range ci {
- ci[j] = 0
- }
- }
- } else {
- for i := 0; i < n; i++ {
- ci := c[i*ldc : i*ldc+i+1]
- if i != 0 {
- c64.SscalUnitary(beta, ci[:i])
- }
- ci[i] = complex(beta*real(ci[i]), 0)
- }
- }
- }
- return
- }
- conjalpha := cmplx.Conj(alpha)
- cbeta := complex(beta, 0)
- if trans == blas.NoTrans {
- // Form C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C.
- if uplo == blas.Upper {
- for i := 0; i < n; i++ {
- ci := c[i*ldc+i+1 : i*ldc+n]
- ai := a[i*lda : i*lda+k]
- bi := b[i*ldb : i*ldb+k]
- if beta == 0 {
- cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi)
- c[i*ldc+i] = complex(real(cii), 0)
- for jc := range ci {
- j := i + 1 + jc
- ci[jc] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi)
- }
- } else {
- cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i]
- c[i*ldc+i] = complex(real(cii), 0)
- for jc, cij := range ci {
- j := i + 1 + jc
- ci[jc] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij
- }
- }
- }
- } else {
- for i := 0; i < n; i++ {
- ci := c[i*ldc : i*ldc+i]
- ai := a[i*lda : i*lda+k]
- bi := b[i*ldb : i*ldb+k]
- if beta == 0 {
- for j := range ci {
- ci[j] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi)
- }
- cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi)
- c[i*ldc+i] = complex(real(cii), 0)
- } else {
- for j, cij := range ci {
- ci[j] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij
- }
- cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i]
- c[i*ldc+i] = complex(real(cii), 0)
- }
- }
- }
- } else {
- // Form C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C.
- if uplo == blas.Upper {
- for i := 0; i < n; i++ {
- ci := c[i*ldc+i : i*ldc+n]
- switch {
- case beta == 0:
- for jc := range ci {
- ci[jc] = 0
- }
- case beta != 1:
- c64.SscalUnitary(beta, ci)
- ci[0] = complex(real(ci[0]), 0)
- default:
- ci[0] = complex(real(ci[0]), 0)
- }
- for j := 0; j < k; j++ {
- aji := a[j*lda+i]
- bji := b[j*ldb+i]
- if aji != 0 {
- c64.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb+i:j*ldb+n], ci)
- }
- if bji != 0 {
- c64.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda+i:j*lda+n], ci)
- }
- }
- ci[0] = complex(real(ci[0]), 0)
- }
- } else {
- for i := 0; i < n; i++ {
- ci := c[i*ldc : i*ldc+i+1]
- switch {
- case beta == 0:
- for j := range ci {
- ci[j] = 0
- }
- case beta != 1:
- c64.SscalUnitary(beta, ci)
- ci[i] = complex(real(ci[i]), 0)
- default:
- ci[i] = complex(real(ci[i]), 0)
- }
- for j := 0; j < k; j++ {
- aji := a[j*lda+i]
- bji := b[j*ldb+i]
- if aji != 0 {
- c64.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb:j*ldb+i+1], ci)
- }
- if bji != 0 {
- c64.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda:j*lda+i+1], ci)
- }
- }
- ci[i] = complex(real(ci[i]), 0)
- }
- }
- }
- }
- // Csymm performs one of the matrix-matrix operations
- // C = alpha*A*B + beta*C if side == blas.Left
- // C = alpha*B*A + beta*C if side == blas.Right
- // where alpha and beta are scalars, A is an m×m or n×n symmetric matrix and B
- // and C are m×n matrices.
- //
- // Complex64 implementations are autogenerated and not directly tested.
- func (Implementation) Csymm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
- na := m
- if side == blas.Right {
- na = n
- }
- switch {
- case side != blas.Left && side != blas.Right:
- panic(badSide)
- case uplo != blas.Lower && uplo != blas.Upper:
- panic(badUplo)
- case m < 0:
- panic(mLT0)
- case n < 0:
- panic(nLT0)
- case lda < max(1, na):
- panic(badLdA)
- case ldb < max(1, n):
- panic(badLdB)
- case ldc < max(1, n):
- panic(badLdC)
- }
- // Quick return if possible.
- if m == 0 || n == 0 {
- return
- }
- // For zero matrix size the following slice length checks are trivially satisfied.
- if len(a) < lda*(na-1)+na {
- panic(shortA)
- }
- if len(b) < ldb*(m-1)+n {
- panic(shortB)
- }
- if len(c) < ldc*(m-1)+n {
- panic(shortC)
- }
- // Quick return if possible.
- if alpha == 0 && beta == 1 {
- return
- }
- if alpha == 0 {
- if beta == 0 {
- for i := 0; i < m; i++ {
- ci := c[i*ldc : i*ldc+n]
- for j := range ci {
- ci[j] = 0
- }
- }
- } else {
- for i := 0; i < m; i++ {
- ci := c[i*ldc : i*ldc+n]
- c64.ScalUnitary(beta, ci)
- }
- }
- return
- }
- if side == blas.Left {
- // Form C = alpha*A*B + beta*C.
- for i := 0; i < m; i++ {
- atmp := alpha * a[i*lda+i]
- bi := b[i*ldb : i*ldb+n]
- ci := c[i*ldc : i*ldc+n]
- if beta == 0 {
- for j, bij := range bi {
- ci[j] = atmp * bij
- }
- } else {
- for j, bij := range bi {
- ci[j] = atmp*bij + beta*ci[j]
- }
- }
- if uplo == blas.Upper {
- for k := 0; k < i; k++ {
- atmp = alpha * a[k*lda+i]
- c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
- }
- for k := i + 1; k < m; k++ {
- atmp = alpha * a[i*lda+k]
- c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
- }
- } else {
- for k := 0; k < i; k++ {
- atmp = alpha * a[i*lda+k]
- c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
- }
- for k := i + 1; k < m; k++ {
- atmp = alpha * a[k*lda+i]
- c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
- }
- }
- }
- } else {
- // Form C = alpha*B*A + beta*C.
- if uplo == blas.Upper {
- for i := 0; i < m; i++ {
- for j := n - 1; j >= 0; j-- {
- abij := alpha * b[i*ldb+j]
- aj := a[j*lda+j+1 : j*lda+n]
- bi := b[i*ldb+j+1 : i*ldb+n]
- ci := c[i*ldc+j+1 : i*ldc+n]
- var tmp complex64
- for k, ajk := range aj {
- ci[k] += abij * ajk
- tmp += bi[k] * ajk
- }
- if beta == 0 {
- c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp
- } else {
- c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j]
- }
- }
- }
- } else {
- for i := 0; i < m; i++ {
- for j := 0; j < n; j++ {
- abij := alpha * b[i*ldb+j]
- aj := a[j*lda : j*lda+j]
- bi := b[i*ldb : i*ldb+j]
- ci := c[i*ldc : i*ldc+j]
- var tmp complex64
- for k, ajk := range aj {
- ci[k] += abij * ajk
- tmp += bi[k] * ajk
- }
- if beta == 0 {
- c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp
- } else {
- c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j]
- }
- }
- }
- }
- }
- }
- // Csyrk performs one of the symmetric rank-k operations
- // C = alpha*A*Aᵀ + beta*C if trans == blas.NoTrans
- // C = alpha*Aᵀ*A + beta*C if trans == blas.Trans
- // where alpha and beta are scalars, C is an n×n symmetric matrix and A is
- // an n×k matrix in the first case and a k×n matrix in the second case.
- //
- // Complex64 implementations are autogenerated and not directly tested.
- func (Implementation) Csyrk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, beta complex64, c []complex64, ldc int) {
- var rowA, colA int
- switch trans {
- default:
- panic(badTranspose)
- case blas.NoTrans:
- rowA, colA = n, k
- case blas.Trans:
- rowA, colA = k, n
- }
- switch {
- case uplo != blas.Lower && uplo != blas.Upper:
- panic(badUplo)
- case n < 0:
- panic(nLT0)
- case k < 0:
- panic(kLT0)
- case lda < max(1, colA):
- panic(badLdA)
- case ldc < max(1, n):
- panic(badLdC)
- }
- // Quick return if possible.
- if n == 0 {
- return
- }
- // For zero matrix size the following slice length checks are trivially satisfied.
- if len(a) < (rowA-1)*lda+colA {
- panic(shortA)
- }
- if len(c) < (n-1)*ldc+n {
- panic(shortC)
- }
- // Quick return if possible.
- if (alpha == 0 || k == 0) && beta == 1 {
- return
- }
- if alpha == 0 {
- if uplo == blas.Upper {
- if beta == 0 {
- for i := 0; i < n; i++ {
- ci := c[i*ldc+i : i*ldc+n]
- for j := range ci {
- ci[j] = 0
- }
- }
- } else {
- for i := 0; i < n; i++ {
- ci := c[i*ldc+i : i*ldc+n]
- c64.ScalUnitary(beta, ci)
- }
- }
- } else {
- if beta == 0 {
- for i := 0; i < n; i++ {
- ci := c[i*ldc : i*ldc+i+1]
- for j := range ci {
- ci[j] = 0
- }
- }
- } else {
- for i := 0; i < n; i++ {
- ci := c[i*ldc : i*ldc+i+1]
- c64.ScalUnitary(beta, ci)
- }
- }
- }
- return
- }
- if trans == blas.NoTrans {
- // Form C = alpha*A*Aᵀ + beta*C.
- if uplo == blas.Upper {
- for i := 0; i < n; i++ {
- ci := c[i*ldc+i : i*ldc+n]
- ai := a[i*lda : i*lda+k]
- if beta == 0 {
- for jc := range ci {
- j := i + jc
- ci[jc] = alpha * c64.DotuUnitary(ai, a[j*lda:j*lda+k])
- }
- } else {
- for jc, cij := range ci {
- j := i + jc
- ci[jc] = beta*cij + alpha*c64.DotuUnitary(ai, a[j*lda:j*lda+k])
- }
- }
- }
- } else {
- for i := 0; i < n; i++ {
- ci := c[i*ldc : i*ldc+i+1]
- ai := a[i*lda : i*lda+k]
- if beta == 0 {
- for j := range ci {
- ci[j] = alpha * c64.DotuUnitary(ai, a[j*lda:j*lda+k])
- }
- } else {
- for j, cij := range ci {
- ci[j] = beta*cij + alpha*c64.DotuUnitary(ai, a[j*lda:j*lda+k])
- }
- }
- }
- }
- } else {
- // Form C = alpha*Aᵀ*A + beta*C.
- if uplo == blas.Upper {
- for i := 0; i < n; i++ {
- ci := c[i*ldc+i : i*ldc+n]
- switch {
- case beta == 0:
- for jc := range ci {
- ci[jc] = 0
- }
- case beta != 1:
- for jc := range ci {
- ci[jc] *= beta
- }
- }
- for j := 0; j < k; j++ {
- aji := a[j*lda+i]
- if aji != 0 {
- c64.AxpyUnitary(alpha*aji, a[j*lda+i:j*lda+n], ci)
- }
- }
- }
- } else {
- for i := 0; i < n; i++ {
- ci := c[i*ldc : i*ldc+i+1]
- switch {
- case beta == 0:
- for j := range ci {
- ci[j] = 0
- }
- case beta != 1:
- for j := range ci {
- ci[j] *= beta
- }
- }
- for j := 0; j < k; j++ {
- aji := a[j*lda+i]
- if aji != 0 {
- c64.AxpyUnitary(alpha*aji, a[j*lda:j*lda+i+1], ci)
- }
- }
- }
- }
- }
- }
- // Csyr2k performs one of the symmetric rank-2k operations
- // C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C if trans == blas.NoTrans
- // C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C if trans == blas.Trans
- // where alpha and beta are scalars, C is an n×n symmetric matrix and A and B
- // are n×k matrices in the first case and k×n matrices in the second case.
- //
- // Complex64 implementations are autogenerated and not directly tested.
- func (Implementation) Csyr2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
- var row, col int
- switch trans {
- default:
- panic(badTranspose)
- case blas.NoTrans:
- row, col = n, k
- case blas.Trans:
- row, col = k, n
- }
- switch {
- case uplo != blas.Lower && uplo != blas.Upper:
- panic(badUplo)
- case n < 0:
- panic(nLT0)
- case k < 0:
- panic(kLT0)
- case lda < max(1, col):
- panic(badLdA)
- case ldb < max(1, col):
- panic(badLdB)
- case ldc < max(1, n):
- panic(badLdC)
- }
- // Quick return if possible.
- if n == 0 {
- return
- }
- // For zero matrix size the following slice length checks are trivially satisfied.
- if len(a) < (row-1)*lda+col {
- panic(shortA)
- }
- if len(b) < (row-1)*ldb+col {
- panic(shortB)
- }
- if len(c) < (n-1)*ldc+n {
- panic(shortC)
- }
- // Quick return if possible.
- if (alpha == 0 || k == 0) && beta == 1 {
- return
- }
- if alpha == 0 {
- if uplo == blas.Upper {
- if beta == 0 {
- for i := 0; i < n; i++ {
- ci := c[i*ldc+i : i*ldc+n]
- for j := range ci {
- ci[j] = 0
- }
- }
- } else {
- for i := 0; i < n; i++ {
- ci := c[i*ldc+i : i*ldc+n]
- c64.ScalUnitary(beta, ci)
- }
- }
- } else {
- if beta == 0 {
- for i := 0; i < n; i++ {
- ci := c[i*ldc : i*ldc+i+1]
- for j := range ci {
- ci[j] = 0
- }
- }
- } else {
- for i := 0; i < n; i++ {
- ci := c[i*ldc : i*ldc+i+1]
- c64.ScalUnitary(beta, ci)
- }
- }
- }
- return
- }
- if trans == blas.NoTrans {
- // Form C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C.
- if uplo == blas.Upper {
- for i := 0; i < n; i++ {
- ci := c[i*ldc+i : i*ldc+n]
- ai := a[i*lda : i*lda+k]
- bi := b[i*ldb : i*ldb+k]
- if beta == 0 {
- for jc := range ci {
- j := i + jc
- ci[jc] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k])
- }
- } else {
- for jc, cij := range ci {
- j := i + jc
- ci[jc] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij
- }
- }
- }
- } else {
- for i := 0; i < n; i++ {
- ci := c[i*ldc : i*ldc+i+1]
- ai := a[i*lda : i*lda+k]
- bi := b[i*ldb : i*ldb+k]
- if beta == 0 {
- for j := range ci {
- ci[j] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k])
- }
- } else {
- for j, cij := range ci {
- ci[j] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij
- }
- }
- }
- }
- } else {
- // Form C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C.
- if uplo == blas.Upper {
- for i := 0; i < n; i++ {
- ci := c[i*ldc+i : i*ldc+n]
- switch {
- case beta == 0:
- for jc := range ci {
- ci[jc] = 0
- }
- case beta != 1:
- for jc := range ci {
- ci[jc] *= beta
- }
- }
- for j := 0; j < k; j++ {
- aji := a[j*lda+i]
- bji := b[j*ldb+i]
- if aji != 0 {
- c64.AxpyUnitary(alpha*aji, b[j*ldb+i:j*ldb+n], ci)
- }
- if bji != 0 {
- c64.AxpyUnitary(alpha*bji, a[j*lda+i:j*lda+n], ci)
- }
- }
- }
- } else {
- for i := 0; i < n; i++ {
- ci := c[i*ldc : i*ldc+i+1]
- switch {
- case beta == 0:
- for j := range ci {
- ci[j] = 0
- }
- case beta != 1:
- for j := range ci {
- ci[j] *= beta
- }
- }
- for j := 0; j < k; j++ {
- aji := a[j*lda+i]
- bji := b[j*ldb+i]
- if aji != 0 {
- c64.AxpyUnitary(alpha*aji, b[j*ldb:j*ldb+i+1], ci)
- }
- if bji != 0 {
- c64.AxpyUnitary(alpha*bji, a[j*lda:j*lda+i+1], ci)
- }
- }
- }
- }
- }
- }
- // Ctrmm performs one of the matrix-matrix operations
- // B = alpha * op(A) * B if side == blas.Left,
- // B = alpha * B * op(A) if side == blas.Right,
- // where alpha is a scalar, B is an m×n matrix, A is a unit, or non-unit,
- // upper or lower triangular matrix and op(A) is one of
- // op(A) = A if trans == blas.NoTrans,
- // op(A) = Aᵀ if trans == blas.Trans,
- // op(A) = Aᴴ if trans == blas.ConjTrans.
- //
- // Complex64 implementations are autogenerated and not directly tested.
- func (Implementation) Ctrmm(side blas.Side, uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) {
- na := m
- if side == blas.Right {
- na = n
- }
- switch {
- case side != blas.Left && side != blas.Right:
- panic(badSide)
- case uplo != blas.Lower && uplo != blas.Upper:
- panic(badUplo)
- case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans:
- panic(badTranspose)
- case diag != blas.Unit && diag != blas.NonUnit:
- panic(badDiag)
- case m < 0:
- panic(mLT0)
- case n < 0:
- panic(nLT0)
- case lda < max(1, na):
- panic(badLdA)
- case ldb < max(1, n):
- panic(badLdB)
- }
- // Quick return if possible.
- if m == 0 || n == 0 {
- return
- }
- // For zero matrix size the following slice length checks are trivially satisfied.
- if len(a) < (na-1)*lda+na {
- panic(shortA)
- }
- if len(b) < (m-1)*ldb+n {
- panic(shortB)
- }
- // Quick return if possible.
- if alpha == 0 {
- for i := 0; i < m; i++ {
- bi := b[i*ldb : i*ldb+n]
- for j := range bi {
- bi[j] = 0
- }
- }
- return
- }
- noConj := trans != blas.ConjTrans
- noUnit := diag == blas.NonUnit
- if side == blas.Left {
- if trans == blas.NoTrans {
- // Form B = alpha*A*B.
- if uplo == blas.Upper {
- for i := 0; i < m; i++ {
- aii := alpha
- if noUnit {
- aii *= a[i*lda+i]
- }
- bi := b[i*ldb : i*ldb+n]
- for j := range bi {
- bi[j] *= aii
- }
- for ja, aij := range a[i*lda+i+1 : i*lda+m] {
- j := ja + i + 1
- if aij != 0 {
- c64.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi)
- }
- }
- }
- } else {
- for i := m - 1; i >= 0; i-- {
- aii := alpha
- if noUnit {
- aii *= a[i*lda+i]
- }
- bi := b[i*ldb : i*ldb+n]
- for j := range bi {
- bi[j] *= aii
- }
- for j, aij := range a[i*lda : i*lda+i] {
- if aij != 0 {
- c64.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi)
- }
- }
- }
- }
- } else {
- // Form B = alpha*Aᵀ*B or B = alpha*Aᴴ*B.
- if uplo == blas.Upper {
- for k := m - 1; k >= 0; k-- {
- bk := b[k*ldb : k*ldb+n]
- for ja, ajk := range a[k*lda+k+1 : k*lda+m] {
- if ajk == 0 {
- continue
- }
- j := k + 1 + ja
- if noConj {
- c64.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n])
- } else {
- c64.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n])
- }
- }
- akk := alpha
- if noUnit {
- if noConj {
- akk *= a[k*lda+k]
- } else {
- akk *= cmplx.Conj(a[k*lda+k])
- }
- }
- if akk != 1 {
- c64.ScalUnitary(akk, bk)
- }
- }
- } else {
- for k := 0; k < m; k++ {
- bk := b[k*ldb : k*ldb+n]
- for j, ajk := range a[k*lda : k*lda+k] {
- if ajk == 0 {
- continue
- }
- if noConj {
- c64.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n])
- } else {
- c64.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n])
- }
- }
- akk := alpha
- if noUnit {
- if noConj {
- akk *= a[k*lda+k]
- } else {
- akk *= cmplx.Conj(a[k*lda+k])
- }
- }
- if akk != 1 {
- c64.ScalUnitary(akk, bk)
- }
- }
- }
- }
- } else {
- if trans == blas.NoTrans {
- // Form B = alpha*B*A.
- if uplo == blas.Upper {
- for i := 0; i < m; i++ {
- bi := b[i*ldb : i*ldb+n]
- for k := n - 1; k >= 0; k-- {
- abik := alpha * bi[k]
- if abik == 0 {
- continue
- }
- bi[k] = abik
- if noUnit {
- bi[k] *= a[k*lda+k]
- }
- c64.AxpyUnitary(abik, a[k*lda+k+1:k*lda+n], bi[k+1:])
- }
- }
- } else {
- for i := 0; i < m; i++ {
- bi := b[i*ldb : i*ldb+n]
- for k := 0; k < n; k++ {
- abik := alpha * bi[k]
- if abik == 0 {
- continue
- }
- bi[k] = abik
- if noUnit {
- bi[k] *= a[k*lda+k]
- }
- c64.AxpyUnitary(abik, a[k*lda:k*lda+k], bi[:k])
- }
- }
- }
- } else {
- // Form B = alpha*B*Aᵀ or B = alpha*B*Aᴴ.
- if uplo == blas.Upper {
- for i := 0; i < m; i++ {
- bi := b[i*ldb : i*ldb+n]
- for j, bij := range bi {
- if noConj {
- if noUnit {
- bij *= a[j*lda+j]
- }
- bij += c64.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n])
- } else {
- if noUnit {
- bij *= cmplx.Conj(a[j*lda+j])
- }
- bij += c64.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n])
- }
- bi[j] = alpha * bij
- }
- }
- } else {
- for i := 0; i < m; i++ {
- bi := b[i*ldb : i*ldb+n]
- for j := n - 1; j >= 0; j-- {
- bij := bi[j]
- if noConj {
- if noUnit {
- bij *= a[j*lda+j]
- }
- bij += c64.DotuUnitary(a[j*lda:j*lda+j], bi[:j])
- } else {
- if noUnit {
- bij *= cmplx.Conj(a[j*lda+j])
- }
- bij += c64.DotcUnitary(a[j*lda:j*lda+j], bi[:j])
- }
- bi[j] = alpha * bij
- }
- }
- }
- }
- }
- }
- // Ctrsm solves one of the matrix equations
- // op(A) * X = alpha * B if side == blas.Left,
- // X * op(A) = alpha * B if side == blas.Right,
- // where alpha is a scalar, X and B are m×n matrices, A is a unit or
- // non-unit, upper or lower triangular matrix and op(A) is one of
- // op(A) = A if transA == blas.NoTrans,
- // op(A) = Aᵀ if transA == blas.Trans,
- // op(A) = Aᴴ if transA == blas.ConjTrans.
- // On return the matrix X is overwritten on B.
- //
- // Complex64 implementations are autogenerated and not directly tested.
- func (Implementation) Ctrsm(side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) {
- na := m
- if side == blas.Right {
- na = n
- }
- switch {
- case side != blas.Left && side != blas.Right:
- panic(badSide)
- case uplo != blas.Lower && uplo != blas.Upper:
- panic(badUplo)
- case transA != blas.NoTrans && transA != blas.Trans && transA != blas.ConjTrans:
- panic(badTranspose)
- case diag != blas.Unit && diag != blas.NonUnit:
- panic(badDiag)
- case m < 0:
- panic(mLT0)
- case n < 0:
- panic(nLT0)
- case lda < max(1, na):
- panic(badLdA)
- case ldb < max(1, n):
- panic(badLdB)
- }
- // Quick return if possible.
- if m == 0 || n == 0 {
- return
- }
- // For zero matrix size the following slice length checks are trivially satisfied.
- if len(a) < (na-1)*lda+na {
- panic(shortA)
- }
- if len(b) < (m-1)*ldb+n {
- panic(shortB)
- }
- if alpha == 0 {
- for i := 0; i < m; i++ {
- for j := 0; j < n; j++ {
- b[i*ldb+j] = 0
- }
- }
- return
- }
- noConj := transA != blas.ConjTrans
- noUnit := diag == blas.NonUnit
- if side == blas.Left {
- if transA == blas.NoTrans {
- // Form B = alpha*inv(A)*B.
- if uplo == blas.Upper {
- for i := m - 1; i >= 0; i-- {
- bi := b[i*ldb : i*ldb+n]
- if alpha != 1 {
- c64.ScalUnitary(alpha, bi)
- }
- for ka, aik := range a[i*lda+i+1 : i*lda+m] {
- k := i + 1 + ka
- if aik != 0 {
- c64.AxpyUnitary(-aik, b[k*ldb:k*ldb+n], bi)
- }
- }
- if noUnit {
- c64.ScalUnitary(1/a[i*lda+i], bi)
- }
- }
- } else {
- for i := 0; i < m; i++ {
- bi := b[i*ldb : i*ldb+n]
- if alpha != 1 {
- c64.ScalUnitary(alpha, bi)
- }
- for j, aij := range a[i*lda : i*lda+i] {
- if aij != 0 {
- c64.AxpyUnitary(-aij, b[j*ldb:j*ldb+n], bi)
- }
- }
- if noUnit {
- c64.ScalUnitary(1/a[i*lda+i], bi)
- }
- }
- }
- } else {
- // Form B = alpha*inv(Aᵀ)*B or B = alpha*inv(Aᴴ)*B.
- if uplo == blas.Upper {
- for i := 0; i < m; i++ {
- bi := b[i*ldb : i*ldb+n]
- if noUnit {
- if noConj {
- c64.ScalUnitary(1/a[i*lda+i], bi)
- } else {
- c64.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi)
- }
- }
- for ja, aij := range a[i*lda+i+1 : i*lda+m] {
- if aij == 0 {
- continue
- }
- j := i + 1 + ja
- if noConj {
- c64.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n])
- } else {
- c64.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n])
- }
- }
- if alpha != 1 {
- c64.ScalUnitary(alpha, bi)
- }
- }
- } else {
- for i := m - 1; i >= 0; i-- {
- bi := b[i*ldb : i*ldb+n]
- if noUnit {
- if noConj {
- c64.ScalUnitary(1/a[i*lda+i], bi)
- } else {
- c64.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi)
- }
- }
- for j, aij := range a[i*lda : i*lda+i] {
- if aij == 0 {
- continue
- }
- if noConj {
- c64.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n])
- } else {
- c64.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n])
- }
- }
- if alpha != 1 {
- c64.ScalUnitary(alpha, bi)
- }
- }
- }
- }
- } else {
- if transA == blas.NoTrans {
- // Form B = alpha*B*inv(A).
- if uplo == blas.Upper {
- for i := 0; i < m; i++ {
- bi := b[i*ldb : i*ldb+n]
- if alpha != 1 {
- c64.ScalUnitary(alpha, bi)
- }
- for j, bij := range bi {
- if bij == 0 {
- continue
- }
- if noUnit {
- bi[j] /= a[j*lda+j]
- }
- c64.AxpyUnitary(-bi[j], a[j*lda+j+1:j*lda+n], bi[j+1:n])
- }
- }
- } else {
- for i := 0; i < m; i++ {
- bi := b[i*ldb : i*ldb+n]
- if alpha != 1 {
- c64.ScalUnitary(alpha, bi)
- }
- for j := n - 1; j >= 0; j-- {
- if bi[j] == 0 {
- continue
- }
- if noUnit {
- bi[j] /= a[j*lda+j]
- }
- c64.AxpyUnitary(-bi[j], a[j*lda:j*lda+j], bi[:j])
- }
- }
- }
- } else {
- // Form B = alpha*B*inv(Aᵀ) or B = alpha*B*inv(Aᴴ).
- if uplo == blas.Upper {
- for i := 0; i < m; i++ {
- bi := b[i*ldb : i*ldb+n]
- for j := n - 1; j >= 0; j-- {
- bij := alpha * bi[j]
- if noConj {
- bij -= c64.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n])
- if noUnit {
- bij /= a[j*lda+j]
- }
- } else {
- bij -= c64.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n])
- if noUnit {
- bij /= cmplx.Conj(a[j*lda+j])
- }
- }
- bi[j] = bij
- }
- }
- } else {
- for i := 0; i < m; i++ {
- bi := b[i*ldb : i*ldb+n]
- for j, bij := range bi {
- bij *= alpha
- if noConj {
- bij -= c64.DotuUnitary(a[j*lda:j*lda+j], bi[:j])
- if noUnit {
- bij /= a[j*lda+j]
- }
- } else {
- bij -= c64.DotcUnitary(a[j*lda:j*lda+j], bi[:j])
- if noUnit {
- bij /= cmplx.Conj(a[j*lda+j])
- }
- }
- bi[j] = bij
- }
- }
- }
- }
- }
- }
|