transport.go 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297
  1. package kafka
  2. import (
  3. "context"
  4. "crypto/tls"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "math/rand"
  9. "net"
  10. "runtime/pprof"
  11. "sort"
  12. "strconv"
  13. "strings"
  14. "sync"
  15. "sync/atomic"
  16. "time"
  17. "github.com/segmentio/kafka-go/protocol"
  18. "github.com/segmentio/kafka-go/protocol/apiversions"
  19. "github.com/segmentio/kafka-go/protocol/createtopics"
  20. "github.com/segmentio/kafka-go/protocol/findcoordinator"
  21. meta "github.com/segmentio/kafka-go/protocol/metadata"
  22. "github.com/segmentio/kafka-go/protocol/saslauthenticate"
  23. "github.com/segmentio/kafka-go/protocol/saslhandshake"
  24. "github.com/segmentio/kafka-go/sasl"
  25. )
  26. // Request is an interface implemented by types that represent messages sent
  27. // from kafka clients to brokers.
  28. type Request = protocol.Message
  29. // Response is an interface implemented by types that represent messages sent
  30. // from kafka brokers in response to client requests.
  31. type Response = protocol.Message
  32. // RoundTripper is an interface implemented by types which support interacting
  33. // with kafka brokers.
  34. type RoundTripper interface {
  35. // RoundTrip sends a request to a kafka broker and returns the response that
  36. // was received, or a non-nil error.
  37. //
  38. // The context passed as first argument can be used to asynchronnously abort
  39. // the call if needed.
  40. RoundTrip(context.Context, net.Addr, Request) (Response, error)
  41. }
  42. // Transport is an implementation of the RoundTripper interface.
  43. //
  44. // Transport values manage a pool of connections and automatically discovers the
  45. // clusters layout to route requests to the appropriate brokers.
  46. //
  47. // Transport values are safe to use concurrently from multiple goroutines.
  48. //
  49. // Note: The intent is for the Transport to become the underlying layer of the
  50. // kafka.Reader and kafka.Writer types.
  51. type Transport struct {
  52. // A function used to establish connections to the kafka cluster.
  53. Dial func(context.Context, string, string) (net.Conn, error)
  54. // Time limit set for establishing connections to the kafka cluster. This
  55. // limit includes all round trips done to establish the connections (TLS
  56. // hadbhaske, SASL negotiation, etc...).
  57. //
  58. // Defaults to 5s.
  59. DialTimeout time.Duration
  60. // Maximum amount of time that connections will remain open and unused.
  61. // The transport will manage to automatically close connections that have
  62. // been idle for too long, and re-open them on demand when the transport is
  63. // used again.
  64. //
  65. // Defaults to 30s.
  66. IdleTimeout time.Duration
  67. // TTL for the metadata cached by this transport. Note that the value
  68. // configured here is an upper bound, the transport randomizes the TTLs to
  69. // avoid getting into states where multiple clients end up synchronized and
  70. // cause bursts of requests to the kafka broker.
  71. //
  72. // Default to 6s.
  73. MetadataTTL time.Duration
  74. // Unique identifier that the transport communicates to the brokers when it
  75. // sends requests.
  76. ClientID string
  77. // An optional configuration for TLS connections established by this
  78. // transport.
  79. //
  80. // If the Server
  81. TLS *tls.Config
  82. // SASL configures the Transfer to use SASL authentication.
  83. SASL sasl.Mechanism
  84. // An optional resolver used to translate broker host names into network
  85. // addresses.
  86. //
  87. // The resolver will be called for every request (not every connection),
  88. // making it possible to implement ACL policies by validating that the
  89. // program is allowed to connect to the kafka broker. This also means that
  90. // the resolver should probably provide a caching layer to avoid storming
  91. // the service discovery backend with requests.
  92. //
  93. // When set, the Dial function is not responsible for performing name
  94. // resolution, and is always called with a pre-resolved address.
  95. Resolver BrokerResolver
  96. // The background context used to control goroutines started internally by
  97. // the transport.
  98. //
  99. // If nil, context.Background() is used instead.
  100. Context context.Context
  101. mutex sync.RWMutex
  102. pools map[networkAddress]*connPool
  103. }
  104. // DefaultTransport is the default transport used by kafka clients in this
  105. // package.
  106. var DefaultTransport RoundTripper = &Transport{
  107. Dial: (&net.Dialer{
  108. Timeout: 3 * time.Second,
  109. DualStack: true,
  110. }).DialContext,
  111. }
  112. // CloseIdleConnections closes all idle connections immediately, and marks all
  113. // connections that are in use to be closed when they become idle again.
  114. func (t *Transport) CloseIdleConnections() {
  115. t.mutex.Lock()
  116. defer t.mutex.Unlock()
  117. for _, pool := range t.pools {
  118. pool.unref()
  119. }
  120. for k := range t.pools {
  121. delete(t.pools, k)
  122. }
  123. }
  124. // RoundTrip sends a request to a kafka cluster and returns the response, or an
  125. // error if no responses were received.
  126. //
  127. // Message types are available in sub-packages of the protocol package. Each
  128. // kafka API is implemented in a different sub-package. For example, the request
  129. // and response types for the Fetch API are available in the protocol/fetch
  130. // package.
  131. //
  132. // The type of the response message will match the type of the request. For
  133. // exmple, if RoundTrip was called with a *fetch.Request as argument, the value
  134. // returned will be of type *fetch.Response. It is safe for the program to do a
  135. // type assertion after checking that no error was returned.
  136. //
  137. // This example illustrates the way this method is expected to be used:
  138. //
  139. // r, err := transport.RoundTrip(ctx, addr, &fetch.Request{ ... })
  140. // if err != nil {
  141. // ...
  142. // } else {
  143. // res := r.(*fetch.Response)
  144. // ...
  145. // }
  146. //
  147. // The transport automatically selects the highest version of the API that is
  148. // supported by both the kafka-go package and the kafka broker. The negotiation
  149. // happens transparently once when connections are established.
  150. //
  151. // This API was introduced in version 0.4 as a way to leverage the lower-level
  152. // features of the kafka protocol, but also provide a more efficient way of
  153. // managing connections to kafka brokers.
  154. func (t *Transport) RoundTrip(ctx context.Context, addr net.Addr, req Request) (Response, error) {
  155. p := t.grabPool(addr)
  156. defer p.unref()
  157. return p.roundTrip(ctx, req)
  158. }
  159. func (t *Transport) dial() func(context.Context, string, string) (net.Conn, error) {
  160. if t.Dial != nil {
  161. return t.Dial
  162. }
  163. return defaultDialer.DialContext
  164. }
  165. func (t *Transport) dialTimeout() time.Duration {
  166. if t.DialTimeout > 0 {
  167. return t.DialTimeout
  168. }
  169. return 5 * time.Second
  170. }
  171. func (t *Transport) idleTimeout() time.Duration {
  172. if t.IdleTimeout > 0 {
  173. return t.IdleTimeout
  174. }
  175. return 30 * time.Second
  176. }
  177. func (t *Transport) metadataTTL() time.Duration {
  178. if t.MetadataTTL > 0 {
  179. return t.MetadataTTL
  180. }
  181. return 6 * time.Second
  182. }
  183. func (t *Transport) grabPool(addr net.Addr) *connPool {
  184. k := networkAddress{
  185. network: addr.Network(),
  186. address: addr.String(),
  187. }
  188. t.mutex.RLock()
  189. p := t.pools[k]
  190. if p != nil {
  191. p.ref()
  192. }
  193. t.mutex.RUnlock()
  194. if p != nil {
  195. return p
  196. }
  197. t.mutex.Lock()
  198. defer t.mutex.Unlock()
  199. if p := t.pools[k]; p != nil {
  200. p.ref()
  201. return p
  202. }
  203. ctx, cancel := context.WithCancel(t.context())
  204. p = &connPool{
  205. refc: 2,
  206. dial: t.dial(),
  207. dialTimeout: t.dialTimeout(),
  208. idleTimeout: t.idleTimeout(),
  209. metadataTTL: t.metadataTTL(),
  210. clientID: t.ClientID,
  211. tls: t.TLS,
  212. sasl: t.SASL,
  213. resolver: t.Resolver,
  214. ready: make(event),
  215. wake: make(chan event),
  216. conns: make(map[int32]*connGroup),
  217. cancel: cancel,
  218. }
  219. p.ctrl = p.newConnGroup(addr)
  220. go p.discover(ctx, p.wake)
  221. if t.pools == nil {
  222. t.pools = make(map[networkAddress]*connPool)
  223. }
  224. t.pools[k] = p
  225. return p
  226. }
  227. func (t *Transport) context() context.Context {
  228. if t.Context != nil {
  229. return t.Context
  230. }
  231. return context.Background()
  232. }
  233. type event chan struct{}
  234. func (e event) trigger() { close(e) }
  235. type connPool struct {
  236. refc uintptr
  237. // Immutable fields of the connection pool. Connections access these field
  238. // on their parent pool in a ready-only fashion, so no synchronization is
  239. // required.
  240. dial func(context.Context, string, string) (net.Conn, error)
  241. dialTimeout time.Duration
  242. idleTimeout time.Duration
  243. metadataTTL time.Duration
  244. clientID string
  245. tls *tls.Config
  246. sasl sasl.Mechanism
  247. resolver BrokerResolver
  248. // Signaling mechanisms to orchestrate communications between the pool and
  249. // the rest of the program.
  250. once sync.Once // ensure that `ready` is triggered only once
  251. ready event // triggered after the first metadata update
  252. wake chan event // used to force metadata updates
  253. cancel context.CancelFunc
  254. // Mutable fields of the connection pool, access must be synchronized.
  255. mutex sync.RWMutex
  256. conns map[int32]*connGroup // data connections used for produce/fetch/etc...
  257. ctrl *connGroup // control connections used for metadata requests
  258. state atomic.Value // cached cluster state
  259. }
  260. type connPoolState struct {
  261. metadata *meta.Response // last metadata response seen by the pool
  262. err error // last error from metadata requests
  263. layout protocol.Cluster // cluster layout built from metadata response
  264. }
  265. func (p *connPool) grabState() connPoolState {
  266. state, _ := p.state.Load().(connPoolState)
  267. return state
  268. }
  269. func (p *connPool) setState(state connPoolState) {
  270. p.state.Store(state)
  271. }
  272. func (p *connPool) ref() {
  273. atomic.AddUintptr(&p.refc, +1)
  274. }
  275. func (p *connPool) unref() {
  276. if atomic.AddUintptr(&p.refc, ^uintptr(0)) == 0 {
  277. p.mutex.Lock()
  278. defer p.mutex.Unlock()
  279. for _, conns := range p.conns {
  280. conns.closeIdleConns()
  281. }
  282. p.ctrl.closeIdleConns()
  283. p.cancel()
  284. }
  285. }
  286. func (p *connPool) roundTrip(ctx context.Context, req Request) (Response, error) {
  287. // This first select should never block after the first metadata response
  288. // that would mark the pool as `ready`.
  289. select {
  290. case <-p.ready:
  291. case <-ctx.Done():
  292. return nil, ctx.Err()
  293. }
  294. var expectTopics []string
  295. defer func() {
  296. if len(expectTopics) != 0 {
  297. p.refreshMetadata(ctx, expectTopics)
  298. }
  299. }()
  300. state := p.grabState()
  301. var response promise
  302. switch m := req.(type) {
  303. case *meta.Request:
  304. // We serve metadata requests directly from the transport cache.
  305. //
  306. // This reduces the number of round trips to kafka brokers while keeping
  307. // the logic simple when applying partitioning strategies.
  308. if state.err != nil {
  309. return nil, state.err
  310. }
  311. return filterMetadataResponse(m, state.metadata), nil
  312. case *createtopics.Request:
  313. // Force an update of the metadata when adding topics,
  314. // otherwise the cached state would get out of sync.
  315. expectTopics = make([]string, len(m.Topics))
  316. for i := range m.Topics {
  317. expectTopics[i] = m.Topics[i].Name
  318. }
  319. case protocol.Splitter:
  320. // Messages that implement the Splitter interface trigger the creation of
  321. // multiple requests that are all merged back into a single results by
  322. // a merger.
  323. messages, merger, err := m.Split(state.layout)
  324. if err != nil {
  325. return nil, err
  326. }
  327. promises := make([]promise, len(messages))
  328. for i, m := range messages {
  329. promises[i] = p.sendRequest(ctx, m, state)
  330. }
  331. response = join(promises, messages, merger)
  332. }
  333. if response == nil {
  334. response = p.sendRequest(ctx, req, state)
  335. }
  336. return response.await(ctx)
  337. }
  338. // refreshMetadata forces an update of the cached cluster metadata, and waits
  339. // for the given list of topics to appear. This waiting mechanism is necessary
  340. // to account for the fact that topic creation is asynchronous in kafka, and
  341. // causes subsequent requests to fail while the cluster state is propagated to
  342. // all the brokers.
  343. func (p *connPool) refreshMetadata(ctx context.Context, expectTopics []string) {
  344. minBackoff := 100 * time.Millisecond
  345. maxBackoff := 2 * time.Second
  346. cancel := ctx.Done()
  347. for ctx.Err() == nil {
  348. notify := make(event)
  349. select {
  350. case <-cancel:
  351. return
  352. case p.wake <- notify:
  353. select {
  354. case <-notify:
  355. case <-cancel:
  356. return
  357. }
  358. }
  359. state := p.grabState()
  360. found := 0
  361. for _, topic := range expectTopics {
  362. if _, ok := state.layout.Topics[topic]; ok {
  363. found++
  364. }
  365. }
  366. if found == len(expectTopics) {
  367. return
  368. }
  369. if delay := time.Duration(rand.Int63n(int64(minBackoff))); delay > 0 {
  370. timer := time.NewTimer(minBackoff)
  371. select {
  372. case <-cancel:
  373. case <-timer.C:
  374. }
  375. timer.Stop()
  376. if minBackoff *= 2; minBackoff > maxBackoff {
  377. minBackoff = maxBackoff
  378. }
  379. }
  380. }
  381. }
  382. func (p *connPool) setReady() {
  383. p.once.Do(p.ready.trigger)
  384. }
  385. // update is called periodically by the goroutine running the discover method
  386. // to refresh the cluster layout information used by the transport to route
  387. // requests to brokers.
  388. func (p *connPool) update(ctx context.Context, metadata *meta.Response, err error) {
  389. var layout protocol.Cluster
  390. if metadata != nil {
  391. metadata.ThrottleTimeMs = 0
  392. // Normalize the lists so we can apply binary search on them.
  393. sortMetadataBrokers(metadata.Brokers)
  394. sortMetadataTopics(metadata.Topics)
  395. for i := range metadata.Topics {
  396. t := &metadata.Topics[i]
  397. sortMetadataPartitions(t.Partitions)
  398. }
  399. layout = makeLayout(metadata)
  400. }
  401. state := p.grabState()
  402. addBrokers := make(map[int32]struct{})
  403. delBrokers := make(map[int32]struct{})
  404. if err != nil {
  405. // Only update the error on the transport if the cluster layout was
  406. // unknown. This ensures that we prioritize a previously known state
  407. // of the cluster to reduce the impact of transient failures.
  408. if state.metadata != nil {
  409. return
  410. }
  411. state.err = err
  412. } else {
  413. for id, b2 := range layout.Brokers {
  414. if b1, ok := state.layout.Brokers[id]; !ok {
  415. addBrokers[id] = struct{}{}
  416. } else if b1 != b2 {
  417. addBrokers[id] = struct{}{}
  418. delBrokers[id] = struct{}{}
  419. }
  420. }
  421. for id := range state.layout.Brokers {
  422. if _, ok := layout.Brokers[id]; !ok {
  423. delBrokers[id] = struct{}{}
  424. }
  425. }
  426. state.metadata, state.layout = metadata, layout
  427. state.err = nil
  428. }
  429. defer p.setReady()
  430. defer p.setState(state)
  431. if len(addBrokers) != 0 || len(delBrokers) != 0 {
  432. // Only acquire the lock when there is a change of layout. This is an
  433. // infrequent event so we don't risk introducing regular contention on
  434. // the mutex if we were to lock it on every update.
  435. p.mutex.Lock()
  436. defer p.mutex.Unlock()
  437. if ctx.Err() != nil {
  438. return // the pool has been closed, no need to update
  439. }
  440. for id := range delBrokers {
  441. if broker := p.conns[id]; broker != nil {
  442. broker.closeIdleConns()
  443. delete(p.conns, id)
  444. }
  445. }
  446. for id := range addBrokers {
  447. broker := layout.Brokers[id]
  448. p.conns[id] = p.newBrokerConnGroup(Broker{
  449. Rack: broker.Rack,
  450. Host: broker.Host,
  451. Port: int(broker.Port),
  452. ID: int(broker.ID),
  453. })
  454. }
  455. }
  456. }
  457. // discover is the entry point of an internal goroutine for the transport which
  458. // periodically requests updates of the cluster metadata and refreshes the
  459. // transport cached cluster layout.
  460. func (p *connPool) discover(ctx context.Context, wake <-chan event) {
  461. prng := rand.New(rand.NewSource(time.Now().UnixNano()))
  462. metadataTTL := func() time.Duration {
  463. return time.Duration(prng.Int63n(int64(p.metadataTTL)))
  464. }
  465. timer := time.NewTimer(metadataTTL())
  466. defer timer.Stop()
  467. var notify event
  468. done := ctx.Done()
  469. for {
  470. c, err := p.grabClusterConn(ctx)
  471. if err != nil {
  472. p.update(ctx, nil, err)
  473. } else {
  474. res := make(async, 1)
  475. req := &meta.Request{
  476. IncludeClusterAuthorizedOperations: true,
  477. IncludeTopicAuthorizedOperations: true,
  478. }
  479. deadline, cancel := context.WithTimeout(ctx, p.metadataTTL)
  480. c.reqs <- connRequest{
  481. ctx: deadline,
  482. req: req,
  483. res: res,
  484. }
  485. r, err := res.await(deadline)
  486. cancel()
  487. if err != nil && err == ctx.Err() {
  488. return
  489. }
  490. ret, _ := r.(*meta.Response)
  491. p.update(ctx, ret, err)
  492. }
  493. if notify != nil {
  494. notify.trigger()
  495. notify = nil
  496. }
  497. select {
  498. case <-timer.C:
  499. timer.Reset(metadataTTL())
  500. case <-done:
  501. return
  502. case notify = <-wake:
  503. }
  504. }
  505. }
  506. // grabBrokerConn returns a connection to a specific broker represented by the
  507. // broker id passed as argument. If the broker id was not known, an error is
  508. // returned.
  509. func (p *connPool) grabBrokerConn(ctx context.Context, brokerID int32) (*conn, error) {
  510. p.mutex.RLock()
  511. g := p.conns[brokerID]
  512. p.mutex.RUnlock()
  513. if g == nil {
  514. return nil, BrokerNotAvailable
  515. }
  516. return g.grabConnOrConnect(ctx)
  517. }
  518. // grabClusterConn returns the connection to the kafka cluster that the pool is
  519. // configured to connect to.
  520. //
  521. // The transport uses a shared `control` connection to the cluster for any
  522. // requests that aren't supposed to be sent to specific brokers (e.g. Fetch or
  523. // Produce requests). Requests intended to be routed to specific brokers are
  524. // dispatched on a separate pool of connections that the transport maintains.
  525. // This split help avoid head-of-line blocking situations where control requests
  526. // like Metadata would be queued behind large responses from Fetch requests for
  527. // example.
  528. //
  529. // In either cases, the requests are multiplexed so we can keep a minimal number
  530. // of connections open (N+1, where N is the number of brokers in the cluster).
  531. func (p *connPool) grabClusterConn(ctx context.Context) (*conn, error) {
  532. return p.ctrl.grabConnOrConnect(ctx)
  533. }
  534. func (p *connPool) sendRequest(ctx context.Context, req Request, state connPoolState) promise {
  535. brokerID := int32(-1)
  536. switch m := req.(type) {
  537. case protocol.BrokerMessage:
  538. // Some requests are supposed to be sent to specific brokers (e.g. the
  539. // partition leaders). They implement the BrokerMessage interface to
  540. // delegate the routing decision to each message type.
  541. broker, err := m.Broker(state.layout)
  542. if err != nil {
  543. return reject(err)
  544. }
  545. brokerID = broker.ID
  546. case protocol.GroupMessage:
  547. // Some requests are supposed to be sent to a group coordinator,
  548. // look up which broker is currently the coordinator for the group
  549. // so we can get a connection to that broker.
  550. //
  551. // TODO: should we cache the coordinator info?
  552. p := p.sendRequest(ctx, &findcoordinator.Request{Key: m.Group()}, state)
  553. r, err := p.await(ctx)
  554. if err != nil {
  555. return reject(err)
  556. }
  557. brokerID = r.(*findcoordinator.Response).NodeID
  558. }
  559. var c *conn
  560. var err error
  561. if brokerID >= 0 {
  562. c, err = p.grabBrokerConn(ctx, brokerID)
  563. } else {
  564. c, err = p.grabClusterConn(ctx)
  565. }
  566. if err != nil {
  567. return reject(err)
  568. }
  569. res := make(async, 1)
  570. c.reqs <- connRequest{
  571. ctx: ctx,
  572. req: req,
  573. res: res,
  574. }
  575. return res
  576. }
  577. func filterMetadataResponse(req *meta.Request, res *meta.Response) *meta.Response {
  578. ret := *res
  579. if req.TopicNames != nil {
  580. ret.Topics = make([]meta.ResponseTopic, len(req.TopicNames))
  581. for i, topicName := range req.TopicNames {
  582. j, ok := findMetadataTopic(res.Topics, topicName)
  583. if ok {
  584. ret.Topics[i] = res.Topics[j]
  585. } else {
  586. ret.Topics[i] = meta.ResponseTopic{
  587. ErrorCode: int16(UnknownTopicOrPartition),
  588. Name: topicName,
  589. }
  590. }
  591. }
  592. }
  593. return &ret
  594. }
  595. func findMetadataTopic(topics []meta.ResponseTopic, topicName string) (int, bool) {
  596. i := sort.Search(len(topics), func(i int) bool {
  597. return topics[i].Name >= topicName
  598. })
  599. return i, i >= 0 && i < len(topics) && topics[i].Name == topicName
  600. }
  601. func sortMetadataBrokers(brokers []meta.ResponseBroker) {
  602. sort.Slice(brokers, func(i, j int) bool {
  603. return brokers[i].NodeID < brokers[j].NodeID
  604. })
  605. }
  606. func sortMetadataTopics(topics []meta.ResponseTopic) {
  607. sort.Slice(topics, func(i, j int) bool {
  608. return topics[i].Name < topics[j].Name
  609. })
  610. }
  611. func sortMetadataPartitions(partitions []meta.ResponsePartition) {
  612. sort.Slice(partitions, func(i, j int) bool {
  613. return partitions[i].PartitionIndex < partitions[j].PartitionIndex
  614. })
  615. }
  616. func makeLayout(metadataResponse *meta.Response) protocol.Cluster {
  617. layout := protocol.Cluster{
  618. Controller: metadataResponse.ControllerID,
  619. Brokers: make(map[int32]protocol.Broker),
  620. Topics: make(map[string]protocol.Topic),
  621. }
  622. for _, broker := range metadataResponse.Brokers {
  623. layout.Brokers[broker.NodeID] = protocol.Broker{
  624. Rack: broker.Rack,
  625. Host: broker.Host,
  626. Port: broker.Port,
  627. ID: broker.NodeID,
  628. }
  629. }
  630. for _, topic := range metadataResponse.Topics {
  631. if topic.IsInternal {
  632. continue // TODO: do we need to expose those?
  633. }
  634. layout.Topics[topic.Name] = protocol.Topic{
  635. Name: topic.Name,
  636. Error: topic.ErrorCode,
  637. Partitions: makePartitions(topic.Partitions),
  638. }
  639. }
  640. return layout
  641. }
  642. func makePartitions(metadataPartitions []meta.ResponsePartition) map[int32]protocol.Partition {
  643. protocolPartitions := make(map[int32]protocol.Partition, len(metadataPartitions))
  644. numBrokerIDs := 0
  645. for _, p := range metadataPartitions {
  646. numBrokerIDs += len(p.ReplicaNodes) + len(p.IsrNodes) + len(p.OfflineReplicas)
  647. }
  648. // Reduce the memory footprint a bit by allocating a single buffer to write
  649. // all broker ids.
  650. brokerIDs := make([]int32, 0, numBrokerIDs)
  651. for _, p := range metadataPartitions {
  652. var rep, isr, off []int32
  653. brokerIDs, rep = appendBrokerIDs(brokerIDs, p.ReplicaNodes)
  654. brokerIDs, isr = appendBrokerIDs(brokerIDs, p.IsrNodes)
  655. brokerIDs, off = appendBrokerIDs(brokerIDs, p.OfflineReplicas)
  656. protocolPartitions[p.PartitionIndex] = protocol.Partition{
  657. ID: p.PartitionIndex,
  658. Error: p.ErrorCode,
  659. Leader: p.LeaderID,
  660. Replicas: rep,
  661. ISR: isr,
  662. Offline: off,
  663. }
  664. }
  665. return protocolPartitions
  666. }
  667. func appendBrokerIDs(ids, brokers []int32) ([]int32, []int32) {
  668. i := len(ids)
  669. ids = append(ids, brokers...)
  670. return ids, ids[i:len(ids):len(ids)]
  671. }
  672. func (p *connPool) newConnGroup(a net.Addr) *connGroup {
  673. return &connGroup{
  674. addr: a,
  675. pool: p,
  676. broker: Broker{
  677. ID: -1,
  678. },
  679. }
  680. }
  681. func (p *connPool) newBrokerConnGroup(broker Broker) *connGroup {
  682. return &connGroup{
  683. addr: &networkAddress{
  684. network: "tcp",
  685. address: net.JoinHostPort(broker.Host, strconv.Itoa(broker.Port)),
  686. },
  687. pool: p,
  688. broker: broker,
  689. }
  690. }
  691. type connRequest struct {
  692. ctx context.Context
  693. req Request
  694. res async
  695. }
  696. // The promise interface is used as a message passing abstraction to coordinate
  697. // between goroutines that handle requests and responses.
  698. type promise interface {
  699. // Waits until the promise is resolved, rejected, or the context canceled.
  700. await(context.Context) (Response, error)
  701. }
  702. // async is an implementation of the promise interface which supports resolving
  703. // or rejecting the await call asynchronously.
  704. type async chan interface{}
  705. func (p async) await(ctx context.Context) (Response, error) {
  706. select {
  707. case x := <-p:
  708. switch v := x.(type) {
  709. case nil:
  710. return nil, nil // A nil response is ok (e.g. when RequiredAcks is None)
  711. case Response:
  712. return v, nil
  713. case error:
  714. return nil, v
  715. default:
  716. panic(fmt.Errorf("BUG: promise resolved with impossible value of type %T", v))
  717. }
  718. case <-ctx.Done():
  719. return nil, ctx.Err()
  720. }
  721. }
  722. func (p async) resolve(res Response) { p <- res }
  723. func (p async) reject(err error) { p <- err }
  724. // rejected is an implementation of the promise interface which is always
  725. // returns an error. Values of this type are constructed using the reject
  726. // function.
  727. type rejected struct{ err error }
  728. func reject(err error) promise { return &rejected{err: err} }
  729. func (p *rejected) await(ctx context.Context) (Response, error) {
  730. return nil, p.err
  731. }
  732. // joined is an implementation of the promise interface which merges results
  733. // from multiple promises into one await call using a merger.
  734. type joined struct {
  735. promises []promise
  736. requests []Request
  737. merger protocol.Merger
  738. }
  739. func join(promises []promise, requests []Request, merger protocol.Merger) promise {
  740. return &joined{
  741. promises: promises,
  742. requests: requests,
  743. merger: merger,
  744. }
  745. }
  746. func (p *joined) await(ctx context.Context) (Response, error) {
  747. results := make([]interface{}, len(p.promises))
  748. for i, sub := range p.promises {
  749. m, err := sub.await(ctx)
  750. if err != nil {
  751. results[i] = err
  752. } else {
  753. results[i] = m
  754. }
  755. }
  756. return p.merger.Merge(p.requests, results)
  757. }
  758. // Default dialer used by the transport connections when no Dial function
  759. // was configured by the program.
  760. var defaultDialer = net.Dialer{
  761. Timeout: 3 * time.Second,
  762. DualStack: true,
  763. }
  764. // connGroup represents a logical connection group to a kafka broker. The
  765. // actual network connections are lazily open before sending requests, and
  766. // closed if they are unused for longer than the idle timeout.
  767. type connGroup struct {
  768. addr net.Addr
  769. broker Broker
  770. // Immutable state of the connection.
  771. pool *connPool
  772. // Shared state of the connection, this is synchronized on the mutex through
  773. // calls to the synchronized method. Both goroutines of the connection share
  774. // the state maintained in these fields.
  775. mutex sync.Mutex
  776. closed bool
  777. idleConns []*conn // stack of idle connections
  778. }
  779. func (g *connGroup) closeIdleConns() {
  780. g.mutex.Lock()
  781. conns := g.idleConns
  782. g.idleConns = nil
  783. g.closed = true
  784. g.mutex.Unlock()
  785. for _, c := range conns {
  786. c.close()
  787. }
  788. }
  789. func (g *connGroup) grabConnOrConnect(ctx context.Context) (*conn, error) {
  790. rslv := g.pool.resolver
  791. addr := g.addr
  792. var c *conn
  793. if rslv == nil {
  794. c = g.grabConn()
  795. } else {
  796. var err error
  797. broker := g.broker
  798. if broker.ID < 0 {
  799. host, port, err := net.SplitHostPort(addr.String())
  800. if err != nil {
  801. return nil, fmt.Errorf("%s: %w", addr, err)
  802. }
  803. portNumber, err := strconv.Atoi(port)
  804. if err != nil {
  805. return nil, fmt.Errorf("%s: %w", addr, err)
  806. }
  807. broker.Host = host
  808. broker.Port = portNumber
  809. }
  810. ipAddrs, err := rslv.LookupBrokerIPAddr(ctx, broker)
  811. if err != nil {
  812. return nil, err
  813. }
  814. for _, ipAddr := range ipAddrs {
  815. network := addr.Network()
  816. address := net.JoinHostPort(ipAddr.String(), strconv.Itoa(broker.Port))
  817. if c = g.grabConnTo(network, address); c != nil {
  818. break
  819. }
  820. }
  821. }
  822. if c == nil {
  823. connChan := make(chan *conn)
  824. errChan := make(chan error)
  825. go func() {
  826. c, err := g.connect(ctx, addr)
  827. if err != nil {
  828. select {
  829. case errChan <- err:
  830. case <-ctx.Done():
  831. }
  832. } else {
  833. select {
  834. case connChan <- c:
  835. case <-ctx.Done():
  836. if !g.releaseConn(c) {
  837. c.close()
  838. }
  839. }
  840. }
  841. }()
  842. select {
  843. case c = <-connChan:
  844. case err := <-errChan:
  845. return nil, err
  846. case <-ctx.Done():
  847. return nil, ctx.Err()
  848. }
  849. }
  850. return c, nil
  851. }
  852. func (g *connGroup) grabConnTo(network, address string) *conn {
  853. g.mutex.Lock()
  854. defer g.mutex.Unlock()
  855. for i := len(g.idleConns) - 1; i >= 0; i-- {
  856. c := g.idleConns[i]
  857. if c.network == network && c.address == address {
  858. copy(g.idleConns[i:], g.idleConns[i+1:])
  859. n := len(g.idleConns) - 1
  860. g.idleConns[n] = nil
  861. g.idleConns = g.idleConns[:n]
  862. if c.timer != nil {
  863. c.timer.Stop()
  864. }
  865. return c
  866. }
  867. }
  868. return nil
  869. }
  870. func (g *connGroup) grabConn() *conn {
  871. g.mutex.Lock()
  872. defer g.mutex.Unlock()
  873. if len(g.idleConns) == 0 {
  874. return nil
  875. }
  876. n := len(g.idleConns) - 1
  877. c := g.idleConns[n]
  878. g.idleConns[n] = nil
  879. g.idleConns = g.idleConns[:n]
  880. if c.timer != nil {
  881. c.timer.Stop()
  882. }
  883. return c
  884. }
  885. func (g *connGroup) removeConn(c *conn) bool {
  886. g.mutex.Lock()
  887. defer g.mutex.Unlock()
  888. if c.timer != nil {
  889. c.timer.Stop()
  890. }
  891. for i, x := range g.idleConns {
  892. if x == c {
  893. copy(g.idleConns[i:], g.idleConns[i+1:])
  894. n := len(g.idleConns) - 1
  895. g.idleConns[n] = nil
  896. g.idleConns = g.idleConns[:n]
  897. return true
  898. }
  899. }
  900. return false
  901. }
  902. func (g *connGroup) releaseConn(c *conn) bool {
  903. idleTimeout := g.pool.idleTimeout
  904. g.mutex.Lock()
  905. defer g.mutex.Unlock()
  906. if g.closed {
  907. return false
  908. }
  909. if c.timer != nil {
  910. c.timer.Reset(idleTimeout)
  911. } else {
  912. c.timer = time.AfterFunc(idleTimeout, func() {
  913. if g.removeConn(c) {
  914. c.close()
  915. }
  916. })
  917. }
  918. g.idleConns = append(g.idleConns, c)
  919. return true
  920. }
  921. func (g *connGroup) connect(ctx context.Context, addr net.Addr) (*conn, error) {
  922. deadline := time.Now().Add(g.pool.dialTimeout)
  923. ctx, cancel := context.WithDeadline(ctx, deadline)
  924. defer cancel()
  925. network := strings.Split(addr.Network(), ",")
  926. address := strings.Split(addr.String(), ",")
  927. var netConn net.Conn
  928. var netAddr net.Addr
  929. var err error
  930. if len(address) > 1 {
  931. // Shuffle the list of addresses to randomize the order in which
  932. // connections are attempted. This prevents routing all connections
  933. // to the first broker (which will usually succeed).
  934. rand.Shuffle(len(address), func(i, j int) {
  935. network[i], network[j] = network[j], network[i]
  936. address[i], address[j] = address[j], address[i]
  937. })
  938. }
  939. for i := range address {
  940. netConn, err = g.pool.dial(ctx, network[i], address[i])
  941. if err == nil {
  942. netAddr = &networkAddress{
  943. network: network[i],
  944. address: address[i],
  945. }
  946. break
  947. }
  948. }
  949. if err != nil {
  950. return nil, err
  951. }
  952. defer func() {
  953. if netConn != nil {
  954. netConn.Close()
  955. }
  956. }()
  957. if tlsConfig := g.pool.tls; tlsConfig != nil {
  958. if tlsConfig.ServerName == "" && !tlsConfig.InsecureSkipVerify {
  959. host, _, _ := net.SplitHostPort(netAddr.String())
  960. tlsConfig = tlsConfig.Clone()
  961. tlsConfig.ServerName = host
  962. }
  963. netConn = tls.Client(netConn, tlsConfig)
  964. }
  965. pc := protocol.NewConn(netConn, g.pool.clientID)
  966. pc.SetDeadline(deadline)
  967. r, err := pc.RoundTrip(new(apiversions.Request))
  968. if err != nil {
  969. return nil, err
  970. }
  971. res := r.(*apiversions.Response)
  972. ver := make(map[protocol.ApiKey]int16, len(res.ApiKeys))
  973. if res.ErrorCode != 0 {
  974. return nil, fmt.Errorf("negotating API versions with kafka broker at %s: %w", g.addr, Error(res.ErrorCode))
  975. }
  976. for _, r := range res.ApiKeys {
  977. apiKey := protocol.ApiKey(r.ApiKey)
  978. ver[apiKey] = apiKey.SelectVersion(r.MinVersion, r.MaxVersion)
  979. }
  980. pc.SetVersions(ver)
  981. pc.SetDeadline(time.Time{})
  982. if g.pool.sasl != nil {
  983. if err := authenticateSASL(ctx, pc, g.pool.sasl); err != nil {
  984. return nil, err
  985. }
  986. }
  987. reqs := make(chan connRequest)
  988. c := &conn{
  989. network: netAddr.Network(),
  990. address: netAddr.String(),
  991. reqs: reqs,
  992. group: g,
  993. }
  994. go c.run(pc, reqs)
  995. netConn = nil
  996. return c, nil
  997. }
  998. type conn struct {
  999. reqs chan<- connRequest
  1000. network string
  1001. address string
  1002. once sync.Once
  1003. group *connGroup
  1004. timer *time.Timer
  1005. }
  1006. func (c *conn) close() {
  1007. c.once.Do(func() { close(c.reqs) })
  1008. }
  1009. func (c *conn) run(pc *protocol.Conn, reqs <-chan connRequest) {
  1010. defer pc.Close()
  1011. for cr := range reqs {
  1012. r, err := c.roundTrip(cr.ctx, pc, cr.req)
  1013. if err != nil {
  1014. cr.res.reject(err)
  1015. if !errors.Is(err, protocol.ErrNoRecord) {
  1016. break
  1017. }
  1018. } else {
  1019. cr.res.resolve(r)
  1020. }
  1021. if !c.group.releaseConn(c) {
  1022. break
  1023. }
  1024. }
  1025. }
  1026. func (c *conn) roundTrip(ctx context.Context, pc *protocol.Conn, req Request) (Response, error) {
  1027. pprof.SetGoroutineLabels(ctx)
  1028. defer pprof.SetGoroutineLabels(context.Background())
  1029. if deadline, hasDeadline := ctx.Deadline(); hasDeadline {
  1030. pc.SetDeadline(deadline)
  1031. defer pc.SetDeadline(time.Time{})
  1032. }
  1033. return pc.RoundTrip(req)
  1034. }
  1035. // authenticateSASL performs all of the required requests to authenticate this
  1036. // connection. If any step fails, this function returns with an error. A nil
  1037. // error indicates successful authentication.
  1038. func authenticateSASL(ctx context.Context, pc *protocol.Conn, mechanism sasl.Mechanism) error {
  1039. if err := saslHandshakeRoundTrip(pc, mechanism.Name()); err != nil {
  1040. return err
  1041. }
  1042. sess, state, err := mechanism.Start(ctx)
  1043. if err != nil {
  1044. return err
  1045. }
  1046. for completed := false; !completed; {
  1047. challenge, err := saslAuthenticateRoundTrip(pc, state)
  1048. switch err {
  1049. case nil:
  1050. case io.EOF:
  1051. // the broker may communicate a failed exchange by closing the
  1052. // connection (esp. in the case where we're passing opaque sasl
  1053. // data over the wire since there's no protocol info).
  1054. return SASLAuthenticationFailed
  1055. default:
  1056. return err
  1057. }
  1058. completed, state, err = sess.Next(ctx, challenge)
  1059. if err != nil {
  1060. return err
  1061. }
  1062. }
  1063. return nil
  1064. }
  1065. // saslHandshake sends the SASL handshake message. This will determine whether
  1066. // the Mechanism is supported by the cluster. If it's not, this function will
  1067. // error out with UnsupportedSASLMechanism.
  1068. //
  1069. // If the mechanism is unsupported, the handshake request will reply with the
  1070. // list of the cluster's configured mechanisms, which could potentially be used
  1071. // to facilitate negotiation. At the moment, we are not negotiating the
  1072. // mechanism as we believe that brokers are usually known to the client, and
  1073. // therefore the client should already know which mechanisms are supported.
  1074. //
  1075. // See http://kafka.apache.org/protocol.html#The_Messages_SaslHandshake
  1076. func saslHandshakeRoundTrip(pc *protocol.Conn, mechanism string) error {
  1077. msg, err := pc.RoundTrip(&saslhandshake.Request{
  1078. Mechanism: mechanism,
  1079. })
  1080. if err != nil {
  1081. return err
  1082. }
  1083. res := msg.(*saslhandshake.Response)
  1084. if res.ErrorCode != 0 {
  1085. err = Error(res.ErrorCode)
  1086. }
  1087. return err
  1088. }
  1089. // saslAuthenticate sends the SASL authenticate message. This function must
  1090. // be immediately preceded by a successful saslHandshake.
  1091. //
  1092. // See http://kafka.apache.org/protocol.html#The_Messages_SaslAuthenticate
  1093. func saslAuthenticateRoundTrip(pc *protocol.Conn, data []byte) ([]byte, error) {
  1094. msg, err := pc.RoundTrip(&saslauthenticate.Request{
  1095. AuthBytes: data,
  1096. })
  1097. if err != nil {
  1098. return nil, err
  1099. }
  1100. res := msg.(*saslauthenticate.Response)
  1101. if res.ErrorCode != 0 {
  1102. err = makeError(res.ErrorCode, res.ErrorMessage)
  1103. }
  1104. return res.AuthBytes, err
  1105. }
  1106. var _ RoundTripper = (*Transport)(nil)