reader.go 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. package lz4
  2. import (
  3. "encoding/binary"
  4. "fmt"
  5. "io"
  6. "io/ioutil"
  7. "github.com/pierrec/lz4/internal/xxh32"
  8. )
  9. // Reader implements the LZ4 frame decoder.
  10. // The Header is set after the first call to Read().
  11. // The Header may change between Read() calls in case of concatenated frames.
  12. type Reader struct {
  13. Header
  14. buf [8]byte // Scrap buffer.
  15. pos int64 // Current position in src.
  16. src io.Reader // Source.
  17. zdata []byte // Compressed data.
  18. data []byte // Uncompressed data.
  19. idx int // Index of unread bytes into data.
  20. checksum xxh32.XXHZero // Frame hash.
  21. }
  22. // NewReader returns a new LZ4 frame decoder.
  23. // No access to the underlying io.Reader is performed.
  24. func NewReader(src io.Reader) *Reader {
  25. r := &Reader{src: src}
  26. return r
  27. }
  28. // readHeader checks the frame magic number and parses the frame descriptoz.
  29. // Skippable frames are supported even as a first frame although the LZ4
  30. // specifications recommends skippable frames not to be used as first frames.
  31. func (z *Reader) readHeader(first bool) error {
  32. defer z.checksum.Reset()
  33. buf := z.buf[:]
  34. for {
  35. magic, err := z.readUint32()
  36. if err != nil {
  37. z.pos += 4
  38. if !first && err == io.ErrUnexpectedEOF {
  39. return io.EOF
  40. }
  41. return err
  42. }
  43. if magic == frameMagic {
  44. break
  45. }
  46. if magic>>8 != frameSkipMagic>>8 {
  47. return ErrInvalid
  48. }
  49. skipSize, err := z.readUint32()
  50. if err != nil {
  51. return err
  52. }
  53. z.pos += 4
  54. m, err := io.CopyN(ioutil.Discard, z.src, int64(skipSize))
  55. if err != nil {
  56. return err
  57. }
  58. z.pos += m
  59. }
  60. // Header.
  61. if _, err := io.ReadFull(z.src, buf[:2]); err != nil {
  62. return err
  63. }
  64. z.pos += 8
  65. b := buf[0]
  66. if v := b >> 6; v != Version {
  67. return fmt.Errorf("lz4: invalid version: got %d; expected %d", v, Version)
  68. }
  69. if b>>5&1 == 0 {
  70. return fmt.Errorf("lz4: block dependency not supported")
  71. }
  72. z.BlockChecksum = b>>4&1 > 0
  73. frameSize := b>>3&1 > 0
  74. z.NoChecksum = b>>2&1 == 0
  75. bmsID := buf[1] >> 4 & 0x7
  76. bSize, ok := bsMapID[bmsID]
  77. if !ok {
  78. return fmt.Errorf("lz4: invalid block max size ID: %d", bmsID)
  79. }
  80. z.BlockMaxSize = bSize
  81. // Allocate the compressed/uncompressed buffers.
  82. // The compressed buffer cannot exceed the uncompressed one.
  83. if n := 2 * bSize; cap(z.zdata) < n {
  84. z.zdata = make([]byte, n, n)
  85. }
  86. if debugFlag {
  87. debug("header block max size id=%d size=%d", bmsID, bSize)
  88. }
  89. z.zdata = z.zdata[:bSize]
  90. z.data = z.zdata[:cap(z.zdata)][bSize:]
  91. z.idx = len(z.data)
  92. z.checksum.Write(buf[0:2])
  93. if frameSize {
  94. buf := buf[:8]
  95. if _, err := io.ReadFull(z.src, buf); err != nil {
  96. return err
  97. }
  98. z.Size = binary.LittleEndian.Uint64(buf)
  99. z.pos += 8
  100. z.checksum.Write(buf)
  101. }
  102. // Header checksum.
  103. if _, err := io.ReadFull(z.src, buf[:1]); err != nil {
  104. return err
  105. }
  106. z.pos++
  107. if h := byte(z.checksum.Sum32() >> 8 & 0xFF); h != buf[0] {
  108. return fmt.Errorf("lz4: invalid header checksum: got %x; expected %x", buf[0], h)
  109. }
  110. z.Header.done = true
  111. if debugFlag {
  112. debug("header read: %v", z.Header)
  113. }
  114. return nil
  115. }
  116. // Read decompresses data from the underlying source into the supplied buffer.
  117. //
  118. // Since there can be multiple streams concatenated, Header values may
  119. // change between calls to Read(). If that is the case, no data is actually read from
  120. // the underlying io.Reader, to allow for potential input buffer resizing.
  121. func (z *Reader) Read(buf []byte) (int, error) {
  122. if debugFlag {
  123. debug("Read buf len=%d", len(buf))
  124. }
  125. if !z.Header.done {
  126. if err := z.readHeader(true); err != nil {
  127. return 0, err
  128. }
  129. if debugFlag {
  130. debug("header read OK compressed buffer %d / %d uncompressed buffer %d : %d index=%d",
  131. len(z.zdata), cap(z.zdata), len(z.data), cap(z.data), z.idx)
  132. }
  133. }
  134. if len(buf) == 0 {
  135. return 0, nil
  136. }
  137. if z.idx == len(z.data) {
  138. // No data ready for reading, process the next block.
  139. if debugFlag {
  140. debug("reading block from writer")
  141. }
  142. // Block length: 0 = end of frame, highest bit set: uncompressed.
  143. bLen, err := z.readUint32()
  144. if err != nil {
  145. return 0, err
  146. }
  147. z.pos += 4
  148. if bLen == 0 {
  149. // End of frame reached.
  150. if !z.NoChecksum {
  151. // Validate the frame checksum.
  152. checksum, err := z.readUint32()
  153. if err != nil {
  154. return 0, err
  155. }
  156. if debugFlag {
  157. debug("frame checksum got=%x / want=%x", z.checksum.Sum32(), checksum)
  158. }
  159. z.pos += 4
  160. if h := z.checksum.Sum32(); checksum != h {
  161. return 0, fmt.Errorf("lz4: invalid frame checksum: got %x; expected %x", h, checksum)
  162. }
  163. }
  164. // Get ready for the next concatenated frame and keep the position.
  165. pos := z.pos
  166. z.Reset(z.src)
  167. z.pos = pos
  168. // Since multiple frames can be concatenated, check for more.
  169. return 0, z.readHeader(false)
  170. }
  171. if debugFlag {
  172. debug("raw block size %d", bLen)
  173. }
  174. if bLen&compressedBlockFlag > 0 {
  175. // Uncompressed block.
  176. bLen &= compressedBlockMask
  177. if debugFlag {
  178. debug("uncompressed block size %d", bLen)
  179. }
  180. if int(bLen) > cap(z.data) {
  181. return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
  182. }
  183. z.data = z.data[:bLen]
  184. if _, err := io.ReadFull(z.src, z.data); err != nil {
  185. return 0, err
  186. }
  187. z.pos += int64(bLen)
  188. if z.BlockChecksum {
  189. checksum, err := z.readUint32()
  190. if err != nil {
  191. return 0, err
  192. }
  193. z.pos += 4
  194. if h := xxh32.ChecksumZero(z.data); h != checksum {
  195. return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
  196. }
  197. }
  198. } else {
  199. // Compressed block.
  200. if debugFlag {
  201. debug("compressed block size %d", bLen)
  202. }
  203. if int(bLen) > cap(z.data) {
  204. return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
  205. }
  206. zdata := z.zdata[:bLen]
  207. if _, err := io.ReadFull(z.src, zdata); err != nil {
  208. return 0, err
  209. }
  210. z.pos += int64(bLen)
  211. if z.BlockChecksum {
  212. checksum, err := z.readUint32()
  213. if err != nil {
  214. return 0, err
  215. }
  216. z.pos += 4
  217. if h := xxh32.ChecksumZero(zdata); h != checksum {
  218. return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
  219. }
  220. }
  221. n, err := UncompressBlock(zdata, z.data)
  222. if err != nil {
  223. return 0, err
  224. }
  225. z.data = z.data[:n]
  226. }
  227. if !z.NoChecksum {
  228. z.checksum.Write(z.data)
  229. if debugFlag {
  230. debug("current frame checksum %x", z.checksum.Sum32())
  231. }
  232. }
  233. z.idx = 0
  234. }
  235. n := copy(buf, z.data[z.idx:])
  236. z.idx += n
  237. if debugFlag {
  238. debug("copied %d bytes to input", n)
  239. }
  240. return n, nil
  241. }
  242. // Reset discards the Reader's state and makes it equivalent to the
  243. // result of its original state from NewReader, but reading from r instead.
  244. // This permits reusing a Reader rather than allocating a new one.
  245. func (z *Reader) Reset(r io.Reader) {
  246. z.Header = Header{}
  247. z.pos = 0
  248. z.src = r
  249. z.zdata = z.zdata[:0]
  250. z.data = z.data[:0]
  251. z.idx = 0
  252. z.checksum.Reset()
  253. }
  254. // readUint32 reads an uint32 into the supplied buffer.
  255. // The idea is to make use of the already allocated buffers avoiding additional allocations.
  256. func (z *Reader) readUint32() (uint32, error) {
  257. buf := z.buf[:4]
  258. _, err := io.ReadFull(z.src, buf)
  259. x := binary.LittleEndian.Uint32(buf)
  260. return x, err
  261. }