internal.js 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. "use strict"
  2. var Buffer = require("safer-buffer").Buffer
  3. // Export Node.js internal encodings.
  4. module.exports = {
  5. // Encodings
  6. utf8: { type: "_internal", bomAware: true },
  7. cesu8: { type: "_internal", bomAware: true },
  8. unicode11utf8: "utf8",
  9. ucs2: { type: "_internal", bomAware: true },
  10. utf16le: "ucs2",
  11. binary: { type: "_internal" },
  12. base64: { type: "_internal" },
  13. hex: { type: "_internal" },
  14. // Codec.
  15. _internal: InternalCodec
  16. }
  17. // ------------------------------------------------------------------------------
  18. function InternalCodec (codecOptions, iconv) {
  19. this.enc = codecOptions.encodingName
  20. this.bomAware = codecOptions.bomAware
  21. if (this.enc === "base64") { this.encoder = InternalEncoderBase64 } else if (this.enc === "utf8") { this.encoder = InternalEncoderUtf8 } else if (this.enc === "cesu8") {
  22. this.enc = "utf8" // Use utf8 for decoding.
  23. this.encoder = InternalEncoderCesu8
  24. // Add decoder for versions of Node not supporting CESU-8
  25. if (Buffer.from("eda0bdedb2a9", "hex").toString() !== "💩") {
  26. this.decoder = InternalDecoderCesu8
  27. this.defaultCharUnicode = iconv.defaultCharUnicode
  28. }
  29. }
  30. }
  31. InternalCodec.prototype.encoder = InternalEncoder
  32. InternalCodec.prototype.decoder = InternalDecoder
  33. // ------------------------------------------------------------------------------
  34. // We use node.js internal decoder. Its signature is the same as ours.
  35. var StringDecoder = require("string_decoder").StringDecoder
  36. function InternalDecoder (options, codec) {
  37. this.decoder = new StringDecoder(codec.enc)
  38. }
  39. InternalDecoder.prototype.write = function (buf) {
  40. if (!Buffer.isBuffer(buf)) {
  41. buf = Buffer.from(buf)
  42. }
  43. return this.decoder.write(buf)
  44. }
  45. InternalDecoder.prototype.end = function () {
  46. return this.decoder.end()
  47. }
  48. // ------------------------------------------------------------------------------
  49. // Encoder is mostly trivial
  50. function InternalEncoder (options, codec) {
  51. this.enc = codec.enc
  52. }
  53. InternalEncoder.prototype.write = function (str) {
  54. return Buffer.from(str, this.enc)
  55. }
  56. InternalEncoder.prototype.end = function () {
  57. }
  58. // ------------------------------------------------------------------------------
  59. // Except base64 encoder, which must keep its state.
  60. function InternalEncoderBase64 (options, codec) {
  61. this.prevStr = ""
  62. }
  63. InternalEncoderBase64.prototype.write = function (str) {
  64. str = this.prevStr + str
  65. var completeQuads = str.length - (str.length % 4)
  66. this.prevStr = str.slice(completeQuads)
  67. str = str.slice(0, completeQuads)
  68. return Buffer.from(str, "base64")
  69. }
  70. InternalEncoderBase64.prototype.end = function () {
  71. return Buffer.from(this.prevStr, "base64")
  72. }
  73. // ------------------------------------------------------------------------------
  74. // CESU-8 encoder is also special.
  75. function InternalEncoderCesu8 (options, codec) {
  76. }
  77. InternalEncoderCesu8.prototype.write = function (str) {
  78. var buf = Buffer.alloc(str.length * 3); var bufIdx = 0
  79. for (var i = 0; i < str.length; i++) {
  80. var charCode = str.charCodeAt(i)
  81. // Naive implementation, but it works because CESU-8 is especially easy
  82. // to convert from UTF-16 (which all JS strings are encoded in).
  83. if (charCode < 0x80) { buf[bufIdx++] = charCode } else if (charCode < 0x800) {
  84. buf[bufIdx++] = 0xC0 + (charCode >>> 6)
  85. buf[bufIdx++] = 0x80 + (charCode & 0x3f)
  86. } else { // charCode will always be < 0x10000 in javascript.
  87. buf[bufIdx++] = 0xE0 + (charCode >>> 12)
  88. buf[bufIdx++] = 0x80 + ((charCode >>> 6) & 0x3f)
  89. buf[bufIdx++] = 0x80 + (charCode & 0x3f)
  90. }
  91. }
  92. return buf.slice(0, bufIdx)
  93. }
  94. InternalEncoderCesu8.prototype.end = function () {
  95. }
  96. // ------------------------------------------------------------------------------
  97. // CESU-8 decoder is not implemented in Node v4.0+
  98. function InternalDecoderCesu8 (options, codec) {
  99. this.acc = 0
  100. this.contBytes = 0
  101. this.accBytes = 0
  102. this.defaultCharUnicode = codec.defaultCharUnicode
  103. }
  104. InternalDecoderCesu8.prototype.write = function (buf) {
  105. var acc = this.acc; var contBytes = this.contBytes; var accBytes = this.accBytes
  106. var res = ""
  107. for (var i = 0; i < buf.length; i++) {
  108. var curByte = buf[i]
  109. if ((curByte & 0xC0) !== 0x80) { // Leading byte
  110. if (contBytes > 0) { // Previous code is invalid
  111. res += this.defaultCharUnicode
  112. contBytes = 0
  113. }
  114. if (curByte < 0x80) { // Single-byte code
  115. res += String.fromCharCode(curByte)
  116. } else if (curByte < 0xE0) { // Two-byte code
  117. acc = curByte & 0x1F
  118. contBytes = 1; accBytes = 1
  119. } else if (curByte < 0xF0) { // Three-byte code
  120. acc = curByte & 0x0F
  121. contBytes = 2; accBytes = 1
  122. } else { // Four or more are not supported for CESU-8.
  123. res += this.defaultCharUnicode
  124. }
  125. } else { // Continuation byte
  126. if (contBytes > 0) { // We're waiting for it.
  127. acc = (acc << 6) | (curByte & 0x3f)
  128. contBytes--; accBytes++
  129. if (contBytes === 0) {
  130. // Check for overlong encoding, but support Modified UTF-8 (encoding NULL as C0 80)
  131. if (accBytes === 2 && acc < 0x80 && acc > 0) {
  132. res += this.defaultCharUnicode
  133. } else if (accBytes === 3 && acc < 0x800) {
  134. res += this.defaultCharUnicode
  135. } else {
  136. // Actually add character.
  137. res += String.fromCharCode(acc)
  138. }
  139. }
  140. } else { // Unexpected continuation byte
  141. res += this.defaultCharUnicode
  142. }
  143. }
  144. }
  145. this.acc = acc; this.contBytes = contBytes; this.accBytes = accBytes
  146. return res
  147. }
  148. InternalDecoderCesu8.prototype.end = function () {
  149. var res = 0
  150. if (this.contBytes > 0) { res += this.defaultCharUnicode }
  151. return res
  152. }
  153. // ------------------------------------------------------------------------------
  154. // check the chunk boundaries for surrogate pair
  155. function InternalEncoderUtf8 (options, codec) {
  156. this.highSurrogate = ""
  157. }
  158. InternalEncoderUtf8.prototype.write = function (str) {
  159. if (this.highSurrogate) {
  160. str = this.highSurrogate + str
  161. this.highSurrogate = ""
  162. }
  163. if (str.length > 0) {
  164. var charCode = str.charCodeAt(str.length - 1)
  165. if (charCode >= 0xd800 && charCode < 0xdc00) {
  166. this.highSurrogate = str[str.length - 1]
  167. str = str.slice(0, str.length - 1)
  168. }
  169. }
  170. return Buffer.from(str, this.enc)
  171. }
  172. InternalEncoderUtf8.prototype.end = function () {
  173. if (this.highSurrogate) {
  174. var str = this.highSurrogate
  175. this.highSurrogate = ""
  176. return Buffer.from(str, this.enc)
  177. }
  178. }