123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218 |
- "use strict"
- var Buffer = require("safer-buffer").Buffer
- // Export Node.js internal encodings.
- module.exports = {
- // Encodings
- utf8: { type: "_internal", bomAware: true },
- cesu8: { type: "_internal", bomAware: true },
- unicode11utf8: "utf8",
- ucs2: { type: "_internal", bomAware: true },
- utf16le: "ucs2",
- binary: { type: "_internal" },
- base64: { type: "_internal" },
- hex: { type: "_internal" },
- // Codec.
- _internal: InternalCodec
- }
- // ------------------------------------------------------------------------------
- function InternalCodec (codecOptions, iconv) {
- this.enc = codecOptions.encodingName
- this.bomAware = codecOptions.bomAware
- if (this.enc === "base64") { this.encoder = InternalEncoderBase64 } else if (this.enc === "utf8") { this.encoder = InternalEncoderUtf8 } else if (this.enc === "cesu8") {
- this.enc = "utf8" // Use utf8 for decoding.
- this.encoder = InternalEncoderCesu8
- // Add decoder for versions of Node not supporting CESU-8
- if (Buffer.from("eda0bdedb2a9", "hex").toString() !== "💩") {
- this.decoder = InternalDecoderCesu8
- this.defaultCharUnicode = iconv.defaultCharUnicode
- }
- }
- }
- InternalCodec.prototype.encoder = InternalEncoder
- InternalCodec.prototype.decoder = InternalDecoder
- // ------------------------------------------------------------------------------
- // We use node.js internal decoder. Its signature is the same as ours.
- var StringDecoder = require("string_decoder").StringDecoder
- function InternalDecoder (options, codec) {
- this.decoder = new StringDecoder(codec.enc)
- }
- InternalDecoder.prototype.write = function (buf) {
- if (!Buffer.isBuffer(buf)) {
- buf = Buffer.from(buf)
- }
- return this.decoder.write(buf)
- }
- InternalDecoder.prototype.end = function () {
- return this.decoder.end()
- }
- // ------------------------------------------------------------------------------
- // Encoder is mostly trivial
- function InternalEncoder (options, codec) {
- this.enc = codec.enc
- }
- InternalEncoder.prototype.write = function (str) {
- return Buffer.from(str, this.enc)
- }
- InternalEncoder.prototype.end = function () {
- }
- // ------------------------------------------------------------------------------
- // Except base64 encoder, which must keep its state.
- function InternalEncoderBase64 (options, codec) {
- this.prevStr = ""
- }
- InternalEncoderBase64.prototype.write = function (str) {
- str = this.prevStr + str
- var completeQuads = str.length - (str.length % 4)
- this.prevStr = str.slice(completeQuads)
- str = str.slice(0, completeQuads)
- return Buffer.from(str, "base64")
- }
- InternalEncoderBase64.prototype.end = function () {
- return Buffer.from(this.prevStr, "base64")
- }
- // ------------------------------------------------------------------------------
- // CESU-8 encoder is also special.
- function InternalEncoderCesu8 (options, codec) {
- }
- InternalEncoderCesu8.prototype.write = function (str) {
- var buf = Buffer.alloc(str.length * 3); var bufIdx = 0
- for (var i = 0; i < str.length; i++) {
- var charCode = str.charCodeAt(i)
- // Naive implementation, but it works because CESU-8 is especially easy
- // to convert from UTF-16 (which all JS strings are encoded in).
- if (charCode < 0x80) { buf[bufIdx++] = charCode } else if (charCode < 0x800) {
- buf[bufIdx++] = 0xC0 + (charCode >>> 6)
- buf[bufIdx++] = 0x80 + (charCode & 0x3f)
- } else { // charCode will always be < 0x10000 in javascript.
- buf[bufIdx++] = 0xE0 + (charCode >>> 12)
- buf[bufIdx++] = 0x80 + ((charCode >>> 6) & 0x3f)
- buf[bufIdx++] = 0x80 + (charCode & 0x3f)
- }
- }
- return buf.slice(0, bufIdx)
- }
- InternalEncoderCesu8.prototype.end = function () {
- }
- // ------------------------------------------------------------------------------
- // CESU-8 decoder is not implemented in Node v4.0+
- function InternalDecoderCesu8 (options, codec) {
- this.acc = 0
- this.contBytes = 0
- this.accBytes = 0
- this.defaultCharUnicode = codec.defaultCharUnicode
- }
- InternalDecoderCesu8.prototype.write = function (buf) {
- var acc = this.acc; var contBytes = this.contBytes; var accBytes = this.accBytes
- var res = ""
- for (var i = 0; i < buf.length; i++) {
- var curByte = buf[i]
- if ((curByte & 0xC0) !== 0x80) { // Leading byte
- if (contBytes > 0) { // Previous code is invalid
- res += this.defaultCharUnicode
- contBytes = 0
- }
- if (curByte < 0x80) { // Single-byte code
- res += String.fromCharCode(curByte)
- } else if (curByte < 0xE0) { // Two-byte code
- acc = curByte & 0x1F
- contBytes = 1; accBytes = 1
- } else if (curByte < 0xF0) { // Three-byte code
- acc = curByte & 0x0F
- contBytes = 2; accBytes = 1
- } else { // Four or more are not supported for CESU-8.
- res += this.defaultCharUnicode
- }
- } else { // Continuation byte
- if (contBytes > 0) { // We're waiting for it.
- acc = (acc << 6) | (curByte & 0x3f)
- contBytes--; accBytes++
- if (contBytes === 0) {
- // Check for overlong encoding, but support Modified UTF-8 (encoding NULL as C0 80)
- if (accBytes === 2 && acc < 0x80 && acc > 0) {
- res += this.defaultCharUnicode
- } else if (accBytes === 3 && acc < 0x800) {
- res += this.defaultCharUnicode
- } else {
- // Actually add character.
- res += String.fromCharCode(acc)
- }
- }
- } else { // Unexpected continuation byte
- res += this.defaultCharUnicode
- }
- }
- }
- this.acc = acc; this.contBytes = contBytes; this.accBytes = accBytes
- return res
- }
- InternalDecoderCesu8.prototype.end = function () {
- var res = 0
- if (this.contBytes > 0) { res += this.defaultCharUnicode }
- return res
- }
- // ------------------------------------------------------------------------------
- // check the chunk boundaries for surrogate pair
- function InternalEncoderUtf8 (options, codec) {
- this.highSurrogate = ""
- }
- InternalEncoderUtf8.prototype.write = function (str) {
- if (this.highSurrogate) {
- str = this.highSurrogate + str
- this.highSurrogate = ""
- }
- if (str.length > 0) {
- var charCode = str.charCodeAt(str.length - 1)
- if (charCode >= 0xd800 && charCode < 0xdc00) {
- this.highSurrogate = str[str.length - 1]
- str = str.slice(0, str.length - 1)
- }
- }
- return Buffer.from(str, this.enc)
- }
- InternalEncoderUtf8.prototype.end = function () {
- if (this.highSurrogate) {
- var str = this.highSurrogate
- this.highSurrogate = ""
- return Buffer.from(str, this.enc)
- }
- }
|