utf7.js 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. "use strict"
  2. var Buffer = require("safer-buffer").Buffer
  3. // UTF-7 codec, according to https://tools.ietf.org/html/rfc2152
  4. // See also below a UTF-7-IMAP codec, according to http://tools.ietf.org/html/rfc3501#section-5.1.3
  5. exports.utf7 = Utf7Codec
  6. exports.unicode11utf7 = "utf7" // Alias UNICODE-1-1-UTF-7
  7. function Utf7Codec (codecOptions, iconv) {
  8. this.iconv = iconv
  9. };
  10. Utf7Codec.prototype.encoder = Utf7Encoder
  11. Utf7Codec.prototype.decoder = Utf7Decoder
  12. Utf7Codec.prototype.bomAware = true
  13. // -- Encoding
  14. // Why scape ()?./?
  15. // eslint-disable-next-line no-useless-escape
  16. var nonDirectChars = /[^A-Za-z0-9'\(\),-\.\/:\? \n\r\t]+/g
  17. function Utf7Encoder (options, codec) {
  18. this.iconv = codec.iconv
  19. }
  20. Utf7Encoder.prototype.write = function (str) {
  21. // Naive implementation.
  22. // Non-direct chars are encoded as "+<base64>-"; single "+" char is encoded as "+-".
  23. return Buffer.from(str.replace(nonDirectChars, function (chunk) {
  24. return "+" + (chunk === "+"
  25. ? ""
  26. : this.iconv.encode(chunk, "utf16-be").toString("base64").replace(/=+$/, "")) +
  27. "-"
  28. }.bind(this)))
  29. }
  30. Utf7Encoder.prototype.end = function () {
  31. }
  32. // -- Decoding
  33. function Utf7Decoder (options, codec) {
  34. this.iconv = codec.iconv
  35. this.inBase64 = false
  36. this.base64Accum = ""
  37. }
  38. // Why scape /?
  39. // eslint-disable-next-line no-useless-escape
  40. var base64Regex = /[A-Za-z0-9\/+]/
  41. var base64Chars = []
  42. for (var i = 0; i < 256; i++) { base64Chars[i] = base64Regex.test(String.fromCharCode(i)) }
  43. var plusChar = "+".charCodeAt(0)
  44. var minusChar = "-".charCodeAt(0)
  45. var andChar = "&".charCodeAt(0)
  46. Utf7Decoder.prototype.write = function (buf) {
  47. var res = ""; var lastI = 0
  48. var inBase64 = this.inBase64
  49. var base64Accum = this.base64Accum
  50. // The decoder is more involved as we must handle chunks in stream.
  51. for (var i = 0; i < buf.length; i++) {
  52. if (!inBase64) { // We're in direct mode.
  53. // Write direct chars until '+'
  54. if (buf[i] == plusChar) {
  55. res += this.iconv.decode(buf.slice(lastI, i), "ascii") // Write direct chars.
  56. lastI = i + 1
  57. inBase64 = true
  58. }
  59. } else { // We decode base64.
  60. if (!base64Chars[buf[i]]) { // Base64 ended.
  61. if (i == lastI && buf[i] == minusChar) { // "+-" -> "+"
  62. res += "+"
  63. } else {
  64. var b64str = base64Accum + this.iconv.decode(buf.slice(lastI, i), "ascii")
  65. res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
  66. }
  67. if (buf[i] != minusChar) // Minus is absorbed after base64.
  68. { i-- }
  69. lastI = i + 1
  70. inBase64 = false
  71. base64Accum = ""
  72. }
  73. }
  74. }
  75. if (!inBase64) {
  76. res += this.iconv.decode(buf.slice(lastI), "ascii") // Write direct chars.
  77. } else {
  78. var b64str = base64Accum + this.iconv.decode(buf.slice(lastI), "ascii")
  79. var canBeDecoded = b64str.length - (b64str.length % 8) // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
  80. base64Accum = b64str.slice(canBeDecoded) // The rest will be decoded in future.
  81. b64str = b64str.slice(0, canBeDecoded)
  82. res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
  83. }
  84. this.inBase64 = inBase64
  85. this.base64Accum = base64Accum
  86. return res
  87. }
  88. Utf7Decoder.prototype.end = function () {
  89. var res = ""
  90. if (this.inBase64 && this.base64Accum.length > 0) { res = this.iconv.decode(Buffer.from(this.base64Accum, "base64"), "utf16-be") }
  91. this.inBase64 = false
  92. this.base64Accum = ""
  93. return res
  94. }
  95. // UTF-7-IMAP codec.
  96. // RFC3501 Sec. 5.1.3 Modified UTF-7 (http://tools.ietf.org/html/rfc3501#section-5.1.3)
  97. // Differences:
  98. // * Base64 part is started by "&" instead of "+"
  99. // * Direct characters are 0x20-0x7E, except "&" (0x26)
  100. // * In Base64, "," is used instead of "/"
  101. // * Base64 must not be used to represent direct characters.
  102. // * No implicit shift back from Base64 (should always end with '-')
  103. // * String must end in non-shifted position.
  104. // * "-&" while in base64 is not allowed.
  105. exports.utf7imap = Utf7IMAPCodec
  106. function Utf7IMAPCodec (codecOptions, iconv) {
  107. this.iconv = iconv
  108. };
  109. Utf7IMAPCodec.prototype.encoder = Utf7IMAPEncoder
  110. Utf7IMAPCodec.prototype.decoder = Utf7IMAPDecoder
  111. Utf7IMAPCodec.prototype.bomAware = true
  112. // -- Encoding
  113. function Utf7IMAPEncoder (options, codec) {
  114. this.iconv = codec.iconv
  115. this.inBase64 = false
  116. this.base64Accum = Buffer.alloc(6)
  117. this.base64AccumIdx = 0
  118. }
  119. Utf7IMAPEncoder.prototype.write = function (str) {
  120. var inBase64 = this.inBase64
  121. var base64Accum = this.base64Accum
  122. var base64AccumIdx = this.base64AccumIdx
  123. var buf = Buffer.alloc(str.length * 5 + 10); var bufIdx = 0
  124. for (var i = 0; i < str.length; i++) {
  125. var uChar = str.charCodeAt(i)
  126. if (uChar >= 0x20 && uChar <= 0x7E) { // Direct character or '&'.
  127. if (inBase64) {
  128. if (base64AccumIdx > 0) {
  129. bufIdx += buf.write(base64Accum.slice(0, base64AccumIdx).toString("base64").replace(/\//g, ",").replace(/=+$/, ""), bufIdx)
  130. base64AccumIdx = 0
  131. }
  132. buf[bufIdx++] = minusChar // Write '-', then go to direct mode.
  133. inBase64 = false
  134. }
  135. if (!inBase64) {
  136. buf[bufIdx++] = uChar // Write direct character
  137. if (uChar === andChar) // Ampersand -> '&-'
  138. { buf[bufIdx++] = minusChar }
  139. }
  140. } else { // Non-direct character
  141. if (!inBase64) {
  142. buf[bufIdx++] = andChar // Write '&', then go to base64 mode.
  143. inBase64 = true
  144. }
  145. if (inBase64) {
  146. base64Accum[base64AccumIdx++] = uChar >> 8
  147. base64Accum[base64AccumIdx++] = uChar & 0xFF
  148. if (base64AccumIdx == base64Accum.length) {
  149. bufIdx += buf.write(base64Accum.toString("base64").replace(/\//g, ","), bufIdx)
  150. base64AccumIdx = 0
  151. }
  152. }
  153. }
  154. }
  155. this.inBase64 = inBase64
  156. this.base64AccumIdx = base64AccumIdx
  157. return buf.slice(0, bufIdx)
  158. }
  159. Utf7IMAPEncoder.prototype.end = function () {
  160. var buf = Buffer.alloc(10); var bufIdx = 0
  161. if (this.inBase64) {
  162. if (this.base64AccumIdx > 0) {
  163. bufIdx += buf.write(this.base64Accum.slice(0, this.base64AccumIdx).toString("base64").replace(/\//g, ",").replace(/=+$/, ""), bufIdx)
  164. this.base64AccumIdx = 0
  165. }
  166. buf[bufIdx++] = minusChar // Write '-', then go to direct mode.
  167. this.inBase64 = false
  168. }
  169. return buf.slice(0, bufIdx)
  170. }
  171. // -- Decoding
  172. function Utf7IMAPDecoder (options, codec) {
  173. this.iconv = codec.iconv
  174. this.inBase64 = false
  175. this.base64Accum = ""
  176. }
  177. var base64IMAPChars = base64Chars.slice()
  178. base64IMAPChars[",".charCodeAt(0)] = true
  179. Utf7IMAPDecoder.prototype.write = function (buf) {
  180. var res = ""; var lastI = 0
  181. var inBase64 = this.inBase64
  182. var base64Accum = this.base64Accum
  183. // The decoder is more involved as we must handle chunks in stream.
  184. // It is forgiving, closer to standard UTF-7 (for example, '-' is optional at the end).
  185. for (var i = 0; i < buf.length; i++) {
  186. if (!inBase64) { // We're in direct mode.
  187. // Write direct chars until '&'
  188. if (buf[i] == andChar) {
  189. res += this.iconv.decode(buf.slice(lastI, i), "ascii") // Write direct chars.
  190. lastI = i + 1
  191. inBase64 = true
  192. }
  193. } else { // We decode base64.
  194. if (!base64IMAPChars[buf[i]]) { // Base64 ended.
  195. if (i == lastI && buf[i] == minusChar) { // "&-" -> "&"
  196. res += "&"
  197. } else {
  198. var b64str = base64Accum + this.iconv.decode(buf.slice(lastI, i), "ascii").replace(/,/g, "/")
  199. res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
  200. }
  201. if (buf[i] != minusChar) // Minus may be absorbed after base64.
  202. { i-- }
  203. lastI = i + 1
  204. inBase64 = false
  205. base64Accum = ""
  206. }
  207. }
  208. }
  209. if (!inBase64) {
  210. res += this.iconv.decode(buf.slice(lastI), "ascii") // Write direct chars.
  211. } else {
  212. var b64str = base64Accum + this.iconv.decode(buf.slice(lastI), "ascii").replace(/,/g, "/")
  213. var canBeDecoded = b64str.length - (b64str.length % 8) // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
  214. base64Accum = b64str.slice(canBeDecoded) // The rest will be decoded in future.
  215. b64str = b64str.slice(0, canBeDecoded)
  216. res += this.iconv.decode(Buffer.from(b64str, "base64"), "utf16-be")
  217. }
  218. this.inBase64 = inBase64
  219. this.base64Accum = base64Accum
  220. return res
  221. }
  222. Utf7IMAPDecoder.prototype.end = function () {
  223. var res = ""
  224. if (this.inBase64 && this.base64Accum.length > 0) { res = this.iconv.decode(Buffer.from(this.base64Accum, "base64"), "utf16-be") }
  225. this.inBase64 = false
  226. this.base64Accum = ""
  227. return res
  228. }