enc_fast.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656
  1. // Copyright 2019+ Klaus Post. All rights reserved.
  2. // License information can be found in the LICENSE file.
  3. // Based on work by Yann Collet, released under BSD License.
  4. package zstd
  5. import (
  6. "math/bits"
  7. "github.com/klauspost/compress/zstd/internal/xxhash"
  8. )
  9. const (
  10. tableBits = 15 // Bits used in the table
  11. tableSize = 1 << tableBits // Size of the table
  12. tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
  13. maxMatchLength = 131074
  14. )
  15. type tableEntry struct {
  16. val uint32
  17. offset int32
  18. }
  19. type fastEncoder struct {
  20. o encParams
  21. // cur is the offset at the start of hist
  22. cur int32
  23. // maximum offset. Should be at least 2x block size.
  24. maxMatchOff int32
  25. hist []byte
  26. crc *xxhash.Digest
  27. table [tableSize]tableEntry
  28. tmp [8]byte
  29. blk *blockEnc
  30. }
  31. // CRC returns the underlying CRC writer.
  32. func (e *fastEncoder) CRC() *xxhash.Digest {
  33. return e.crc
  34. }
  35. // AppendCRC will append the CRC to the destination slice and return it.
  36. func (e *fastEncoder) AppendCRC(dst []byte) []byte {
  37. crc := e.crc.Sum(e.tmp[:0])
  38. dst = append(dst, crc[7], crc[6], crc[5], crc[4])
  39. return dst
  40. }
  41. // WindowSize returns the window size of the encoder,
  42. // or a window size small enough to contain the input size, if > 0.
  43. func (e *fastEncoder) WindowSize(size int) int32 {
  44. if size > 0 && size < int(e.maxMatchOff) {
  45. b := int32(1) << uint(bits.Len(uint(size)))
  46. // Keep minimum window.
  47. if b < 1024 {
  48. b = 1024
  49. }
  50. return b
  51. }
  52. return e.maxMatchOff
  53. }
  54. // Block returns the current block.
  55. func (e *fastEncoder) Block() *blockEnc {
  56. return e.blk
  57. }
  58. // Encode mimmics functionality in zstd_fast.c
  59. func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
  60. const (
  61. inputMargin = 8
  62. minNonLiteralBlockSize = 1 + 1 + inputMargin
  63. )
  64. // Protect against e.cur wraparound.
  65. for e.cur > (1<<30)+e.maxMatchOff {
  66. if len(e.hist) == 0 {
  67. for i := range e.table[:] {
  68. e.table[i] = tableEntry{}
  69. }
  70. e.cur = e.maxMatchOff
  71. break
  72. }
  73. // Shift down everything in the table that isn't already too far away.
  74. minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
  75. for i := range e.table[:] {
  76. v := e.table[i].offset
  77. if v < minOff {
  78. v = 0
  79. } else {
  80. v = v - e.cur + e.maxMatchOff
  81. }
  82. e.table[i].offset = v
  83. }
  84. e.cur = e.maxMatchOff
  85. }
  86. s := e.addBlock(src)
  87. blk.size = len(src)
  88. if len(src) < minNonLiteralBlockSize {
  89. blk.extraLits = len(src)
  90. blk.literals = blk.literals[:len(src)]
  91. copy(blk.literals, src)
  92. return
  93. }
  94. // Override src
  95. src = e.hist
  96. sLimit := int32(len(src)) - inputMargin
  97. // stepSize is the number of bytes to skip on every main loop iteration.
  98. // It should be >= 2.
  99. stepSize := int32(e.o.targetLength)
  100. if stepSize == 0 {
  101. stepSize++
  102. }
  103. stepSize++
  104. // TEMPLATE
  105. const hashLog = tableBits
  106. // seems global, but would be nice to tweak.
  107. const kSearchStrength = 8
  108. // nextEmit is where in src the next emitLiteral should start from.
  109. nextEmit := s
  110. cv := load6432(src, s)
  111. // Relative offsets
  112. offset1 := int32(blk.recentOffsets[0])
  113. offset2 := int32(blk.recentOffsets[1])
  114. addLiterals := func(s *seq, until int32) {
  115. if until == nextEmit {
  116. return
  117. }
  118. blk.literals = append(blk.literals, src[nextEmit:until]...)
  119. s.litLen = uint32(until - nextEmit)
  120. }
  121. if debug {
  122. println("recent offsets:", blk.recentOffsets)
  123. }
  124. encodeLoop:
  125. for {
  126. // t will contain the match offset when we find one.
  127. // When existing the search loop, we have already checked 4 bytes.
  128. var t int32
  129. // We will not use repeat offsets across blocks.
  130. // By not using them for the first 3 matches
  131. canRepeat := len(blk.sequences) > 2
  132. for {
  133. if debug && canRepeat && offset1 == 0 {
  134. panic("offset0 was 0")
  135. }
  136. nextHash := hash6(cv, hashLog)
  137. nextHash2 := hash6(cv>>8, hashLog)
  138. candidate := e.table[nextHash]
  139. candidate2 := e.table[nextHash2]
  140. repIndex := s - offset1 + 2
  141. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  142. e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
  143. if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
  144. // Consider history as well.
  145. var seq seq
  146. lenght := 4 + e.matchlen(s+6, repIndex+4, src)
  147. seq.matchLen = uint32(lenght - zstdMinMatch)
  148. // We might be able to match backwards.
  149. // Extend as long as we can.
  150. start := s + 2
  151. // We end the search early, so we don't risk 0 literals
  152. // and have to do special offset treatment.
  153. startLimit := nextEmit + 1
  154. sMin := s - e.maxMatchOff
  155. if sMin < 0 {
  156. sMin = 0
  157. }
  158. for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
  159. repIndex--
  160. start--
  161. seq.matchLen++
  162. }
  163. addLiterals(&seq, start)
  164. // rep 0
  165. seq.offset = 1
  166. if debugSequences {
  167. println("repeat sequence", seq, "next s:", s)
  168. }
  169. blk.sequences = append(blk.sequences, seq)
  170. s += lenght + 2
  171. nextEmit = s
  172. if s >= sLimit {
  173. if debug {
  174. println("repeat ended", s, lenght)
  175. }
  176. break encodeLoop
  177. }
  178. cv = load6432(src, s)
  179. continue
  180. }
  181. coffset0 := s - (candidate.offset - e.cur)
  182. coffset1 := s - (candidate2.offset - e.cur) + 1
  183. if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
  184. // found a regular match
  185. t = candidate.offset - e.cur
  186. if debug && s <= t {
  187. panic("s <= t")
  188. }
  189. if debug && s-t > e.maxMatchOff {
  190. panic("s - t >e.maxMatchOff")
  191. }
  192. break
  193. }
  194. if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
  195. // found a regular match
  196. t = candidate2.offset - e.cur
  197. s++
  198. if debug && s <= t {
  199. panic("s <= t")
  200. }
  201. if debug && s-t > e.maxMatchOff {
  202. panic("s - t >e.maxMatchOff")
  203. }
  204. if debug && t < 0 {
  205. panic("t<0")
  206. }
  207. break
  208. }
  209. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  210. if s >= sLimit {
  211. break encodeLoop
  212. }
  213. cv = load6432(src, s)
  214. }
  215. // A 4-byte match has been found. We'll later see if more than 4 bytes.
  216. offset2 = offset1
  217. offset1 = s - t
  218. if debug && s <= t {
  219. panic("s <= t")
  220. }
  221. if debug && canRepeat && int(offset1) > len(src) {
  222. panic("invalid offset")
  223. }
  224. // Extend the 4-byte match as long as possible.
  225. l := e.matchlen(s+4, t+4, src) + 4
  226. // Extend backwards
  227. tMin := s - e.maxMatchOff
  228. if tMin < 0 {
  229. tMin = 0
  230. }
  231. for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
  232. s--
  233. t--
  234. l++
  235. }
  236. // Write our sequence.
  237. var seq seq
  238. seq.litLen = uint32(s - nextEmit)
  239. seq.matchLen = uint32(l - zstdMinMatch)
  240. if seq.litLen > 0 {
  241. blk.literals = append(blk.literals, src[nextEmit:s]...)
  242. }
  243. // Don't use repeat offsets
  244. seq.offset = uint32(s-t) + 3
  245. s += l
  246. if debugSequences {
  247. println("sequence", seq, "next s:", s)
  248. }
  249. blk.sequences = append(blk.sequences, seq)
  250. nextEmit = s
  251. if s >= sLimit {
  252. break encodeLoop
  253. }
  254. cv = load6432(src, s)
  255. // Check offset 2
  256. if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
  257. // We have at least 4 byte match.
  258. // No need to check backwards. We come straight from a match
  259. l := 4 + e.matchlen(s+4, o2+4, src)
  260. // Store this, since we have it.
  261. nextHash := hash6(cv, hashLog)
  262. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  263. seq.matchLen = uint32(l) - zstdMinMatch
  264. seq.litLen = 0
  265. // Since litlen is always 0, this is offset 1.
  266. seq.offset = 1
  267. s += l
  268. nextEmit = s
  269. if debugSequences {
  270. println("sequence", seq, "next s:", s)
  271. }
  272. blk.sequences = append(blk.sequences, seq)
  273. // Swap offset 1 and 2.
  274. offset1, offset2 = offset2, offset1
  275. if s >= sLimit {
  276. break encodeLoop
  277. }
  278. // Prepare next loop.
  279. cv = load6432(src, s)
  280. }
  281. }
  282. if int(nextEmit) < len(src) {
  283. blk.literals = append(blk.literals, src[nextEmit:]...)
  284. blk.extraLits = len(src) - int(nextEmit)
  285. }
  286. blk.recentOffsets[0] = uint32(offset1)
  287. blk.recentOffsets[1] = uint32(offset2)
  288. if debug {
  289. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  290. }
  291. }
  292. // EncodeNoHist will encode a block with no history and no following blocks.
  293. // Most notable difference is that src will not be copied for history and
  294. // we do not need to check for max match length.
  295. func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
  296. const (
  297. inputMargin = 8
  298. minNonLiteralBlockSize = 1 + 1 + inputMargin
  299. )
  300. if debug {
  301. if len(src) > maxBlockSize {
  302. panic("src too big")
  303. }
  304. }
  305. // Protect against e.cur wraparound.
  306. if e.cur > (1<<30)+e.maxMatchOff {
  307. for i := range e.table[:] {
  308. e.table[i] = tableEntry{}
  309. }
  310. e.cur = e.maxMatchOff
  311. }
  312. s := int32(0)
  313. blk.size = len(src)
  314. if len(src) < minNonLiteralBlockSize {
  315. blk.extraLits = len(src)
  316. blk.literals = blk.literals[:len(src)]
  317. copy(blk.literals, src)
  318. return
  319. }
  320. sLimit := int32(len(src)) - inputMargin
  321. // stepSize is the number of bytes to skip on every main loop iteration.
  322. // It should be >= 2.
  323. const stepSize = 2
  324. // TEMPLATE
  325. const hashLog = tableBits
  326. // seems global, but would be nice to tweak.
  327. const kSearchStrength = 8
  328. // nextEmit is where in src the next emitLiteral should start from.
  329. nextEmit := s
  330. cv := load6432(src, s)
  331. // Relative offsets
  332. offset1 := int32(blk.recentOffsets[0])
  333. offset2 := int32(blk.recentOffsets[1])
  334. addLiterals := func(s *seq, until int32) {
  335. if until == nextEmit {
  336. return
  337. }
  338. blk.literals = append(blk.literals, src[nextEmit:until]...)
  339. s.litLen = uint32(until - nextEmit)
  340. }
  341. if debug {
  342. println("recent offsets:", blk.recentOffsets)
  343. }
  344. encodeLoop:
  345. for {
  346. // t will contain the match offset when we find one.
  347. // When existing the search loop, we have already checked 4 bytes.
  348. var t int32
  349. // We will not use repeat offsets across blocks.
  350. // By not using them for the first 3 matches
  351. for {
  352. nextHash := hash6(cv, hashLog)
  353. nextHash2 := hash6(cv>>8, hashLog)
  354. candidate := e.table[nextHash]
  355. candidate2 := e.table[nextHash2]
  356. repIndex := s - offset1 + 2
  357. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  358. e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
  359. if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
  360. // Consider history as well.
  361. var seq seq
  362. // lenght := 4 + e.matchlen(s+6, repIndex+4, src)
  363. lenght := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
  364. seq.matchLen = uint32(lenght - zstdMinMatch)
  365. // We might be able to match backwards.
  366. // Extend as long as we can.
  367. start := s + 2
  368. // We end the search early, so we don't risk 0 literals
  369. // and have to do special offset treatment.
  370. startLimit := nextEmit + 1
  371. sMin := s - e.maxMatchOff
  372. if sMin < 0 {
  373. sMin = 0
  374. }
  375. for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] {
  376. repIndex--
  377. start--
  378. seq.matchLen++
  379. }
  380. addLiterals(&seq, start)
  381. // rep 0
  382. seq.offset = 1
  383. if debugSequences {
  384. println("repeat sequence", seq, "next s:", s)
  385. }
  386. blk.sequences = append(blk.sequences, seq)
  387. s += lenght + 2
  388. nextEmit = s
  389. if s >= sLimit {
  390. if debug {
  391. println("repeat ended", s, lenght)
  392. }
  393. break encodeLoop
  394. }
  395. cv = load6432(src, s)
  396. continue
  397. }
  398. coffset0 := s - (candidate.offset - e.cur)
  399. coffset1 := s - (candidate2.offset - e.cur) + 1
  400. if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
  401. // found a regular match
  402. t = candidate.offset - e.cur
  403. if debug && s <= t {
  404. panic("s <= t")
  405. }
  406. if debug && s-t > e.maxMatchOff {
  407. panic("s - t >e.maxMatchOff")
  408. }
  409. break
  410. }
  411. if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
  412. // found a regular match
  413. t = candidate2.offset - e.cur
  414. s++
  415. if debug && s <= t {
  416. panic("s <= t")
  417. }
  418. if debug && s-t > e.maxMatchOff {
  419. panic("s - t >e.maxMatchOff")
  420. }
  421. if debug && t < 0 {
  422. panic("t<0")
  423. }
  424. break
  425. }
  426. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  427. if s >= sLimit {
  428. break encodeLoop
  429. }
  430. cv = load6432(src, s)
  431. }
  432. // A 4-byte match has been found. We'll later see if more than 4 bytes.
  433. offset2 = offset1
  434. offset1 = s - t
  435. if debug && s <= t {
  436. panic("s <= t")
  437. }
  438. // Extend the 4-byte match as long as possible.
  439. //l := e.matchlenNoHist(s+4, t+4, src) + 4
  440. l := int32(matchLen(src[s+4:], src[t+4:])) + 4
  441. // Extend backwards
  442. tMin := s - e.maxMatchOff
  443. if tMin < 0 {
  444. tMin = 0
  445. }
  446. for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
  447. s--
  448. t--
  449. l++
  450. }
  451. // Write our sequence.
  452. var seq seq
  453. seq.litLen = uint32(s - nextEmit)
  454. seq.matchLen = uint32(l - zstdMinMatch)
  455. if seq.litLen > 0 {
  456. blk.literals = append(blk.literals, src[nextEmit:s]...)
  457. }
  458. // Don't use repeat offsets
  459. seq.offset = uint32(s-t) + 3
  460. s += l
  461. if debugSequences {
  462. println("sequence", seq, "next s:", s)
  463. }
  464. blk.sequences = append(blk.sequences, seq)
  465. nextEmit = s
  466. if s >= sLimit {
  467. break encodeLoop
  468. }
  469. cv = load6432(src, s)
  470. // Check offset 2
  471. if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
  472. // We have at least 4 byte match.
  473. // No need to check backwards. We come straight from a match
  474. //l := 4 + e.matchlenNoHist(s+4, o2+4, src)
  475. l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
  476. // Store this, since we have it.
  477. nextHash := hash6(cv, hashLog)
  478. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  479. seq.matchLen = uint32(l) - zstdMinMatch
  480. seq.litLen = 0
  481. // Since litlen is always 0, this is offset 1.
  482. seq.offset = 1
  483. s += l
  484. nextEmit = s
  485. if debugSequences {
  486. println("sequence", seq, "next s:", s)
  487. }
  488. blk.sequences = append(blk.sequences, seq)
  489. // Swap offset 1 and 2.
  490. offset1, offset2 = offset2, offset1
  491. if s >= sLimit {
  492. break encodeLoop
  493. }
  494. // Prepare next loop.
  495. cv = load6432(src, s)
  496. }
  497. }
  498. if int(nextEmit) < len(src) {
  499. blk.literals = append(blk.literals, src[nextEmit:]...)
  500. blk.extraLits = len(src) - int(nextEmit)
  501. }
  502. if debug {
  503. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  504. }
  505. }
  506. func (e *fastEncoder) addBlock(src []byte) int32 {
  507. // check if we have space already
  508. if len(e.hist)+len(src) > cap(e.hist) {
  509. if cap(e.hist) == 0 {
  510. l := e.maxMatchOff * 2
  511. // Make it at least 1MB.
  512. if l < 1<<20 {
  513. l = 1 << 20
  514. }
  515. e.hist = make([]byte, 0, l)
  516. } else {
  517. if cap(e.hist) < int(e.maxMatchOff*2) {
  518. panic("unexpected buffer size")
  519. }
  520. // Move down
  521. offset := int32(len(e.hist)) - e.maxMatchOff
  522. copy(e.hist[0:e.maxMatchOff], e.hist[offset:])
  523. e.cur += offset
  524. e.hist = e.hist[:e.maxMatchOff]
  525. }
  526. }
  527. s := int32(len(e.hist))
  528. e.hist = append(e.hist, src...)
  529. return s
  530. }
  531. // useBlock will replace the block with the provided one,
  532. // but transfer recent offsets from the previous.
  533. func (e *fastEncoder) UseBlock(enc *blockEnc) {
  534. enc.reset(e.blk)
  535. e.blk = enc
  536. }
  537. func (e *fastEncoder) matchlenNoHist(s, t int32, src []byte) int32 {
  538. // Extend the match to be as long as possible.
  539. return int32(matchLen(src[s:], src[t:]))
  540. }
  541. func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 {
  542. if debug {
  543. if s < 0 {
  544. panic("s<0")
  545. }
  546. if t < 0 {
  547. panic("t<0")
  548. }
  549. if s-t > e.maxMatchOff {
  550. panic(s - t)
  551. }
  552. }
  553. s1 := int(s) + maxMatchLength - 4
  554. if s1 > len(src) {
  555. s1 = len(src)
  556. }
  557. // Extend the match to be as long as possible.
  558. return int32(matchLen(src[s:s1], src[t:]))
  559. }
  560. // Reset the encoding table.
  561. func (e *fastEncoder) Reset() {
  562. if e.blk == nil {
  563. e.blk = &blockEnc{}
  564. e.blk.init()
  565. } else {
  566. e.blk.reset(nil)
  567. }
  568. e.blk.initNewEncode()
  569. if e.crc == nil {
  570. e.crc = xxhash.New()
  571. } else {
  572. e.crc.Reset()
  573. }
  574. if cap(e.hist) < int(e.maxMatchOff*2) {
  575. l := e.maxMatchOff * 2
  576. // Make it at least 1MB.
  577. if l < 1<<20 {
  578. l = 1 << 20
  579. }
  580. e.hist = make([]byte, 0, l)
  581. }
  582. // We offset current position so everything will be out of reach
  583. e.cur += e.maxMatchOff + int32(len(e.hist))
  584. e.hist = e.hist[:0]
  585. }