unicode.js 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790
  1. /**
  2. * unicode.js - east asian width and surrogate pairs
  3. * Copyright (c) 2013-2015, Christopher Jeffrey and contributors (MIT License).
  4. * https://github.com/chjj/blessed
  5. * Borrowed from vangie/east-asian-width, komagata/eastasianwidth,
  6. * and mathiasbynens/String.prototype.codePointAt. Licenses below.
  7. */
  8. // east-asian-width
  9. //
  10. // Copyright (c) 2015 Vangie Du
  11. // https://github.com/vangie/east-asian-width
  12. //
  13. // Permission is hereby granted, free of charge, to any person
  14. // obtaining a copy of this software and associated documentation
  15. // files (the "Software"), to deal in the Software without
  16. // restriction, including without limitation the rights to use,
  17. // copy, modify, merge, publish, distribute, sublicense, and/or sell
  18. // copies of the Software, and to permit persons to whom the
  19. // Software is furnished to do so, subject to the following
  20. // conditions:
  21. //
  22. // The above copyright notice and this permission notice shall be
  23. // included in all copies or substantial portions of the Software.
  24. //
  25. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  27. // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  29. // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  30. // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  31. // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  32. // OTHER DEALINGS IN THE SOFTWARE.
  33. // eastasianwidth
  34. //
  35. // Copyright (c) 2013, Masaki Komagata
  36. // https://github.com/komagata/eastasianwidth
  37. //
  38. // Permission is hereby granted, free of charge, to any person obtaining a copy
  39. // of this software and associated documentation files (the "Software"), to deal
  40. // in the Software without restriction, including without limitation the rights
  41. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  42. // copies of the Software, and to permit persons to whom the Software is
  43. // furnished to do so, subject to the following conditions:
  44. //
  45. // The above copyright notice and this permission notice shall be included in
  46. // all copies or substantial portions of the Software.
  47. //
  48. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  49. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  50. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  51. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  52. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  53. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  54. // THE SOFTWARE.
  55. // String.prototype.codePointAt
  56. //
  57. // Copyright Mathias Bynens <https://mathiasbynens.be/>
  58. // https://github.com/mathiasbynens/String.prototype.codePointAt
  59. //
  60. // Permission is hereby granted, free of charge, to any person obtaining
  61. // a copy of this software and associated documentation files (the
  62. // "Software"), to deal in the Software without restriction, including
  63. // without limitation the rights to use, copy, modify, merge, publish,
  64. // distribute, sublicense, and/or sell copies of the Software, and to
  65. // permit persons to whom the Software is furnished to do so, subject to
  66. // the following conditions:
  67. //
  68. // The above copyright notice and this permission notice shall be
  69. // included in all copies or substantial portions of the Software.
  70. //
  71. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  72. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  73. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  74. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  75. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  76. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  77. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  78. // String.fromCodePoint
  79. //
  80. // Copyright Mathias Bynens <https://mathiasbynens.be/>
  81. // https://github.com/mathiasbynens/String.fromCodePoint
  82. //
  83. // Permission is hereby granted, free of charge, to any person obtaining
  84. // a copy of this software and associated documentation files (the
  85. // "Software"), to deal in the Software without restriction, including
  86. // without limitation the rights to use, copy, modify, merge, publish,
  87. // distribute, sublicense, and/or sell copies of the Software, and to
  88. // permit persons to whom the Software is furnished to do so, subject to
  89. // the following conditions:
  90. //
  91. // The above copyright notice and this permission notice shall be
  92. // included in all copies or substantial portions of the Software.
  93. //
  94. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  95. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  96. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  97. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  98. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  99. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  100. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  101. var stringFromCharCode = String.fromCharCode;
  102. var floor = Math.floor;
  103. /**
  104. * Wide, Surrogates, and Combining
  105. */
  106. exports.charWidth = function(str, i) {
  107. var point = typeof str !== 'number'
  108. ? exports.codePointAt(str, i || 0)
  109. : str;
  110. // nul
  111. if (point === 0) return 0;
  112. // tab
  113. if (point === 0x09) {
  114. if (!exports.blessed) {
  115. exports.blessed = require('../');
  116. }
  117. return exports.blessed.screen.global
  118. ? exports.blessed.screen.global.tabc.length
  119. : 8;
  120. }
  121. // 8-bit control characters (2-width according to unicode??)
  122. if (point < 32 || (point >= 0x7f && point < 0xa0)) {
  123. return 0;
  124. }
  125. // search table of non-spacing characters
  126. // is ucs combining or C0/C1 control character
  127. if (exports.combining[point]) {
  128. return 0;
  129. }
  130. // check for double-wide
  131. // if (point >= 0x1100
  132. // && (point <= 0x115f // Hangul Jamo init. consonants
  133. // || point === 0x2329 || point === 0x232a
  134. // || (point >= 0x2e80 && point <= 0xa4cf
  135. // && point !== 0x303f) // CJK ... Yi
  136. // || (point >= 0xac00 && point <= 0xd7a3) // Hangul Syllables
  137. // || (point >= 0xf900 && point <= 0xfaff) // CJK Compatibility Ideographs
  138. // || (point >= 0xfe10 && point <= 0xfe19) // Vertical forms
  139. // || (point >= 0xfe30 && point <= 0xfe6f) // CJK Compatibility Forms
  140. // || (point >= 0xff00 && point <= 0xff60) // Fullwidth Forms
  141. // || (point >= 0xffe0 && point <= 0xffe6)
  142. // || (point >= 0x20000 && point <= 0x2fffd)
  143. // || (point >= 0x30000 && point <= 0x3fffd))) {
  144. // return 2;
  145. // }
  146. // check for double-wide
  147. if ((0x3000 === point)
  148. || (0xFF01 <= point && point <= 0xFF60)
  149. || (0xFFE0 <= point && point <= 0xFFE6)) {
  150. return 2;
  151. }
  152. if ((0x1100 <= point && point <= 0x115F)
  153. || (0x11A3 <= point && point <= 0x11A7)
  154. || (0x11FA <= point && point <= 0x11FF)
  155. || (0x2329 <= point && point <= 0x232A)
  156. || (0x2E80 <= point && point <= 0x2E99)
  157. || (0x2E9B <= point && point <= 0x2EF3)
  158. || (0x2F00 <= point && point <= 0x2FD5)
  159. || (0x2FF0 <= point && point <= 0x2FFB)
  160. || (0x3001 <= point && point <= 0x303E)
  161. || (0x3041 <= point && point <= 0x3096)
  162. || (0x3099 <= point && point <= 0x30FF)
  163. || (0x3105 <= point && point <= 0x312D)
  164. || (0x3131 <= point && point <= 0x318E)
  165. || (0x3190 <= point && point <= 0x31BA)
  166. || (0x31C0 <= point && point <= 0x31E3)
  167. || (0x31F0 <= point && point <= 0x321E)
  168. || (0x3220 <= point && point <= 0x3247)
  169. || (0x3250 <= point && point <= 0x32FE)
  170. || (0x3300 <= point && point <= 0x4DBF)
  171. || (0x4E00 <= point && point <= 0xA48C)
  172. || (0xA490 <= point && point <= 0xA4C6)
  173. || (0xA960 <= point && point <= 0xA97C)
  174. || (0xAC00 <= point && point <= 0xD7A3)
  175. || (0xD7B0 <= point && point <= 0xD7C6)
  176. || (0xD7CB <= point && point <= 0xD7FB)
  177. || (0xF900 <= point && point <= 0xFAFF)
  178. || (0xFE10 <= point && point <= 0xFE19)
  179. || (0xFE30 <= point && point <= 0xFE52)
  180. || (0xFE54 <= point && point <= 0xFE66)
  181. || (0xFE68 <= point && point <= 0xFE6B)
  182. || (0x1B000 <= point && point <= 0x1B001)
  183. || (0x1F200 <= point && point <= 0x1F202)
  184. || (0x1F210 <= point && point <= 0x1F23A)
  185. || (0x1F240 <= point && point <= 0x1F248)
  186. || (0x1F250 <= point && point <= 0x1F251)
  187. || (0x20000 <= point && point <= 0x2F73F)
  188. || (0x2B740 <= point && point <= 0x2FFFD)
  189. || (0x30000 <= point && point <= 0x3FFFD)) {
  190. return 2;
  191. }
  192. // CJK Ambiguous
  193. // http://www.unicode.org/reports/tr11/
  194. // http://www.unicode.org/reports/tr11/#Ambiguous
  195. if (process.env.NCURSES_CJK_WIDTH) {
  196. if ((0x00A1 === point)
  197. || (0x00A4 === point)
  198. || (0x00A7 <= point && point <= 0x00A8)
  199. || (0x00AA === point)
  200. || (0x00AD <= point && point <= 0x00AE)
  201. || (0x00B0 <= point && point <= 0x00B4)
  202. || (0x00B6 <= point && point <= 0x00BA)
  203. || (0x00BC <= point && point <= 0x00BF)
  204. || (0x00C6 === point)
  205. || (0x00D0 === point)
  206. || (0x00D7 <= point && point <= 0x00D8)
  207. || (0x00DE <= point && point <= 0x00E1)
  208. || (0x00E6 === point)
  209. || (0x00E8 <= point && point <= 0x00EA)
  210. || (0x00EC <= point && point <= 0x00ED)
  211. || (0x00F0 === point)
  212. || (0x00F2 <= point && point <= 0x00F3)
  213. || (0x00F7 <= point && point <= 0x00FA)
  214. || (0x00FC === point)
  215. || (0x00FE === point)
  216. || (0x0101 === point)
  217. || (0x0111 === point)
  218. || (0x0113 === point)
  219. || (0x011B === point)
  220. || (0x0126 <= point && point <= 0x0127)
  221. || (0x012B === point)
  222. || (0x0131 <= point && point <= 0x0133)
  223. || (0x0138 === point)
  224. || (0x013F <= point && point <= 0x0142)
  225. || (0x0144 === point)
  226. || (0x0148 <= point && point <= 0x014B)
  227. || (0x014D === point)
  228. || (0x0152 <= point && point <= 0x0153)
  229. || (0x0166 <= point && point <= 0x0167)
  230. || (0x016B === point)
  231. || (0x01CE === point)
  232. || (0x01D0 === point)
  233. || (0x01D2 === point)
  234. || (0x01D4 === point)
  235. || (0x01D6 === point)
  236. || (0x01D8 === point)
  237. || (0x01DA === point)
  238. || (0x01DC === point)
  239. || (0x0251 === point)
  240. || (0x0261 === point)
  241. || (0x02C4 === point)
  242. || (0x02C7 === point)
  243. || (0x02C9 <= point && point <= 0x02CB)
  244. || (0x02CD === point)
  245. || (0x02D0 === point)
  246. || (0x02D8 <= point && point <= 0x02DB)
  247. || (0x02DD === point)
  248. || (0x02DF === point)
  249. || (0x0300 <= point && point <= 0x036F)
  250. || (0x0391 <= point && point <= 0x03A1)
  251. || (0x03A3 <= point && point <= 0x03A9)
  252. || (0x03B1 <= point && point <= 0x03C1)
  253. || (0x03C3 <= point && point <= 0x03C9)
  254. || (0x0401 === point)
  255. || (0x0410 <= point && point <= 0x044F)
  256. || (0x0451 === point)
  257. || (0x2010 === point)
  258. || (0x2013 <= point && point <= 0x2016)
  259. || (0x2018 <= point && point <= 0x2019)
  260. || (0x201C <= point && point <= 0x201D)
  261. || (0x2020 <= point && point <= 0x2022)
  262. || (0x2024 <= point && point <= 0x2027)
  263. || (0x2030 === point)
  264. || (0x2032 <= point && point <= 0x2033)
  265. || (0x2035 === point)
  266. || (0x203B === point)
  267. || (0x203E === point)
  268. || (0x2074 === point)
  269. || (0x207F === point)
  270. || (0x2081 <= point && point <= 0x2084)
  271. || (0x20AC === point)
  272. || (0x2103 === point)
  273. || (0x2105 === point)
  274. || (0x2109 === point)
  275. || (0x2113 === point)
  276. || (0x2116 === point)
  277. || (0x2121 <= point && point <= 0x2122)
  278. || (0x2126 === point)
  279. || (0x212B === point)
  280. || (0x2153 <= point && point <= 0x2154)
  281. || (0x215B <= point && point <= 0x215E)
  282. || (0x2160 <= point && point <= 0x216B)
  283. || (0x2170 <= point && point <= 0x2179)
  284. || (0x2189 === point)
  285. || (0x2190 <= point && point <= 0x2199)
  286. || (0x21B8 <= point && point <= 0x21B9)
  287. || (0x21D2 === point)
  288. || (0x21D4 === point)
  289. || (0x21E7 === point)
  290. || (0x2200 === point)
  291. || (0x2202 <= point && point <= 0x2203)
  292. || (0x2207 <= point && point <= 0x2208)
  293. || (0x220B === point)
  294. || (0x220F === point)
  295. || (0x2211 === point)
  296. || (0x2215 === point)
  297. || (0x221A === point)
  298. || (0x221D <= point && point <= 0x2220)
  299. || (0x2223 === point)
  300. || (0x2225 === point)
  301. || (0x2227 <= point && point <= 0x222C)
  302. || (0x222E === point)
  303. || (0x2234 <= point && point <= 0x2237)
  304. || (0x223C <= point && point <= 0x223D)
  305. || (0x2248 === point)
  306. || (0x224C === point)
  307. || (0x2252 === point)
  308. || (0x2260 <= point && point <= 0x2261)
  309. || (0x2264 <= point && point <= 0x2267)
  310. || (0x226A <= point && point <= 0x226B)
  311. || (0x226E <= point && point <= 0x226F)
  312. || (0x2282 <= point && point <= 0x2283)
  313. || (0x2286 <= point && point <= 0x2287)
  314. || (0x2295 === point)
  315. || (0x2299 === point)
  316. || (0x22A5 === point)
  317. || (0x22BF === point)
  318. || (0x2312 === point)
  319. || (0x2460 <= point && point <= 0x24E9)
  320. || (0x24EB <= point && point <= 0x254B)
  321. || (0x2550 <= point && point <= 0x2573)
  322. || (0x2580 <= point && point <= 0x258F)
  323. || (0x2592 <= point && point <= 0x2595)
  324. || (0x25A0 <= point && point <= 0x25A1)
  325. || (0x25A3 <= point && point <= 0x25A9)
  326. || (0x25B2 <= point && point <= 0x25B3)
  327. || (0x25B6 <= point && point <= 0x25B7)
  328. || (0x25BC <= point && point <= 0x25BD)
  329. || (0x25C0 <= point && point <= 0x25C1)
  330. || (0x25C6 <= point && point <= 0x25C8)
  331. || (0x25CB === point)
  332. || (0x25CE <= point && point <= 0x25D1)
  333. || (0x25E2 <= point && point <= 0x25E5)
  334. || (0x25EF === point)
  335. || (0x2605 <= point && point <= 0x2606)
  336. || (0x2609 === point)
  337. || (0x260E <= point && point <= 0x260F)
  338. || (0x2614 <= point && point <= 0x2615)
  339. || (0x261C === point)
  340. || (0x261E === point)
  341. || (0x2640 === point)
  342. || (0x2642 === point)
  343. || (0x2660 <= point && point <= 0x2661)
  344. || (0x2663 <= point && point <= 0x2665)
  345. || (0x2667 <= point && point <= 0x266A)
  346. || (0x266C <= point && point <= 0x266D)
  347. || (0x266F === point)
  348. || (0x269E <= point && point <= 0x269F)
  349. || (0x26BE <= point && point <= 0x26BF)
  350. || (0x26C4 <= point && point <= 0x26CD)
  351. || (0x26CF <= point && point <= 0x26E1)
  352. || (0x26E3 === point)
  353. || (0x26E8 <= point && point <= 0x26FF)
  354. || (0x273D === point)
  355. || (0x2757 === point)
  356. || (0x2776 <= point && point <= 0x277F)
  357. || (0x2B55 <= point && point <= 0x2B59)
  358. || (0x3248 <= point && point <= 0x324F)
  359. || (0xE000 <= point && point <= 0xF8FF)
  360. || (0xFE00 <= point && point <= 0xFE0F)
  361. || (0xFFFD === point)
  362. || (0x1F100 <= point && point <= 0x1F10A)
  363. || (0x1F110 <= point && point <= 0x1F12D)
  364. || (0x1F130 <= point && point <= 0x1F169)
  365. || (0x1F170 <= point && point <= 0x1F19A)
  366. || (0xE0100 <= point && point <= 0xE01EF)
  367. || (0xF0000 <= point && point <= 0xFFFFD)
  368. || (0x100000 <= point && point <= 0x10FFFD)) {
  369. return +process.env.NCURSES_CJK_WIDTH || 1;
  370. }
  371. }
  372. return 1;
  373. };
  374. exports.strWidth = function(str) {
  375. var width = 0;
  376. for (var i = 0; i < str.length; i++) {
  377. width += exports.charWidth(str, i);
  378. if (exports.isSurrogate(str, i)) i++;
  379. }
  380. return width;
  381. };
  382. exports.isSurrogate = function(str, i) {
  383. var point = typeof str !== 'number'
  384. ? exports.codePointAt(str, i || 0)
  385. : str;
  386. return point > 0x00ffff;
  387. };
  388. exports.combiningTable = [
  389. [0x0300, 0x036F], [0x0483, 0x0486], [0x0488, 0x0489],
  390. [0x0591, 0x05BD], [0x05BF, 0x05BF], [0x05C1, 0x05C2],
  391. [0x05C4, 0x05C5], [0x05C7, 0x05C7], [0x0600, 0x0603],
  392. [0x0610, 0x0615], [0x064B, 0x065E], [0x0670, 0x0670],
  393. [0x06D6, 0x06E4], [0x06E7, 0x06E8], [0x06EA, 0x06ED],
  394. [0x070F, 0x070F], [0x0711, 0x0711], [0x0730, 0x074A],
  395. [0x07A6, 0x07B0], [0x07EB, 0x07F3], [0x0901, 0x0902],
  396. [0x093C, 0x093C], [0x0941, 0x0948], [0x094D, 0x094D],
  397. [0x0951, 0x0954], [0x0962, 0x0963], [0x0981, 0x0981],
  398. [0x09BC, 0x09BC], [0x09C1, 0x09C4], [0x09CD, 0x09CD],
  399. [0x09E2, 0x09E3], [0x0A01, 0x0A02], [0x0A3C, 0x0A3C],
  400. [0x0A41, 0x0A42], [0x0A47, 0x0A48], [0x0A4B, 0x0A4D],
  401. [0x0A70, 0x0A71], [0x0A81, 0x0A82], [0x0ABC, 0x0ABC],
  402. [0x0AC1, 0x0AC5], [0x0AC7, 0x0AC8], [0x0ACD, 0x0ACD],
  403. [0x0AE2, 0x0AE3], [0x0B01, 0x0B01], [0x0B3C, 0x0B3C],
  404. [0x0B3F, 0x0B3F], [0x0B41, 0x0B43], [0x0B4D, 0x0B4D],
  405. [0x0B56, 0x0B56], [0x0B82, 0x0B82], [0x0BC0, 0x0BC0],
  406. [0x0BCD, 0x0BCD], [0x0C3E, 0x0C40], [0x0C46, 0x0C48],
  407. [0x0C4A, 0x0C4D], [0x0C55, 0x0C56], [0x0CBC, 0x0CBC],
  408. [0x0CBF, 0x0CBF], [0x0CC6, 0x0CC6], [0x0CCC, 0x0CCD],
  409. [0x0CE2, 0x0CE3], [0x0D41, 0x0D43], [0x0D4D, 0x0D4D],
  410. [0x0DCA, 0x0DCA], [0x0DD2, 0x0DD4], [0x0DD6, 0x0DD6],
  411. [0x0E31, 0x0E31], [0x0E34, 0x0E3A], [0x0E47, 0x0E4E],
  412. [0x0EB1, 0x0EB1], [0x0EB4, 0x0EB9], [0x0EBB, 0x0EBC],
  413. [0x0EC8, 0x0ECD], [0x0F18, 0x0F19], [0x0F35, 0x0F35],
  414. [0x0F37, 0x0F37], [0x0F39, 0x0F39], [0x0F71, 0x0F7E],
  415. [0x0F80, 0x0F84], [0x0F86, 0x0F87], [0x0F90, 0x0F97],
  416. [0x0F99, 0x0FBC], [0x0FC6, 0x0FC6], [0x102D, 0x1030],
  417. [0x1032, 0x1032], [0x1036, 0x1037], [0x1039, 0x1039],
  418. [0x1058, 0x1059], [0x1160, 0x11FF], [0x135F, 0x135F],
  419. [0x1712, 0x1714], [0x1732, 0x1734], [0x1752, 0x1753],
  420. [0x1772, 0x1773], [0x17B4, 0x17B5], [0x17B7, 0x17BD],
  421. [0x17C6, 0x17C6], [0x17C9, 0x17D3], [0x17DD, 0x17DD],
  422. [0x180B, 0x180D], [0x18A9, 0x18A9], [0x1920, 0x1922],
  423. [0x1927, 0x1928], [0x1932, 0x1932], [0x1939, 0x193B],
  424. [0x1A17, 0x1A18], [0x1B00, 0x1B03], [0x1B34, 0x1B34],
  425. [0x1B36, 0x1B3A], [0x1B3C, 0x1B3C], [0x1B42, 0x1B42],
  426. [0x1B6B, 0x1B73], [0x1DC0, 0x1DCA], [0x1DFE, 0x1DFF],
  427. [0x200B, 0x200F], [0x202A, 0x202E], [0x2060, 0x2063],
  428. [0x206A, 0x206F], [0x20D0, 0x20EF], [0x302A, 0x302F],
  429. [0x3099, 0x309A], [0xA806, 0xA806], [0xA80B, 0xA80B],
  430. [0xA825, 0xA826], [0xFB1E, 0xFB1E], [0xFE00, 0xFE0F],
  431. [0xFE20, 0xFE23], [0xFEFF, 0xFEFF], [0xFFF9, 0xFFFB],
  432. [0x10A01, 0x10A03], [0x10A05, 0x10A06], [0x10A0C, 0x10A0F],
  433. [0x10A38, 0x10A3A], [0x10A3F, 0x10A3F], [0x1D167, 0x1D169],
  434. [0x1D173, 0x1D182], [0x1D185, 0x1D18B], [0x1D1AA, 0x1D1AD],
  435. [0x1D242, 0x1D244], [0xE0001, 0xE0001], [0xE0020, 0xE007F],
  436. [0xE0100, 0xE01EF]
  437. ];
  438. exports.combining = exports.combiningTable.reduce(function(out, row) {
  439. for (var i = row[0]; i <= row[1]; i++) {
  440. out[i] = true;
  441. }
  442. return out;
  443. }, {});
  444. exports.isCombining = function(str, i) {
  445. var point = typeof str !== 'number'
  446. ? exports.codePointAt(str, i || 0)
  447. : str;
  448. return exports.combining[point] === true;
  449. };
  450. /**
  451. * Code Point Helpers
  452. */
  453. exports.codePointAt = function(str, position) {
  454. if (str == null) {
  455. throw TypeError();
  456. }
  457. var string = String(str);
  458. if (string.codePointAt) {
  459. return string.codePointAt(position);
  460. }
  461. var size = string.length;
  462. // `ToInteger`
  463. var index = position ? Number(position) : 0;
  464. if (index !== index) { // better `isNaN`
  465. index = 0;
  466. }
  467. // Account for out-of-bounds indices:
  468. if (index < 0 || index >= size) {
  469. return undefined;
  470. }
  471. // Get the first code unit
  472. var first = string.charCodeAt(index);
  473. var second;
  474. if ( // check if it’s the start of a surrogate pair
  475. first >= 0xD800 && first <= 0xDBFF && // high surrogate
  476. size > index + 1 // there is a next code unit
  477. ) {
  478. second = string.charCodeAt(index + 1);
  479. if (second >= 0xDC00 && second <= 0xDFFF) { // low surrogate
  480. // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  481. return (first - 0xD800) * 0x400 + second - 0xDC00 + 0x10000;
  482. }
  483. }
  484. return first;
  485. };
  486. // exports.codePointAt = function(str, position) {
  487. // position = +position || 0;
  488. // var x = str.charCodeAt(position);
  489. // var y = str.length > 1 ? str.charCodeAt(position + 1) : 0;
  490. // var point = x;
  491. // if ((0xD800 <= x && x <= 0xDBFF) && (0xDC00 <= y && y <= 0xDFFF)) {
  492. // x &= 0x3FF;
  493. // y &= 0x3FF;
  494. // point = (x << 10) | y;
  495. // point += 0x10000;
  496. // }
  497. // return point;
  498. // };
  499. exports.fromCodePoint = function() {
  500. if (String.fromCodePoint) {
  501. return String.fromCodePoint.apply(String, arguments);
  502. }
  503. var MAX_SIZE = 0x4000;
  504. var codeUnits = [];
  505. var highSurrogate;
  506. var lowSurrogate;
  507. var index = -1;
  508. var length = arguments.length;
  509. if (!length) {
  510. return '';
  511. }
  512. var result = '';
  513. while (++index < length) {
  514. var codePoint = Number(arguments[index]);
  515. if (
  516. !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
  517. codePoint < 0 || // not a valid Unicode code point
  518. codePoint > 0x10FFFF || // not a valid Unicode code point
  519. floor(codePoint) !== codePoint // not an integer
  520. ) {
  521. throw RangeError('Invalid code point: ' + codePoint);
  522. }
  523. if (codePoint <= 0xFFFF) { // BMP code point
  524. codeUnits.push(codePoint);
  525. } else { // Astral code point; split in surrogate halves
  526. // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  527. codePoint -= 0x10000;
  528. highSurrogate = (codePoint >> 10) + 0xD800;
  529. lowSurrogate = (codePoint % 0x400) + 0xDC00;
  530. codeUnits.push(highSurrogate, lowSurrogate);
  531. }
  532. if (index + 1 === length || codeUnits.length > MAX_SIZE) {
  533. result += stringFromCharCode.apply(null, codeUnits);
  534. codeUnits.length = 0;
  535. }
  536. }
  537. return result;
  538. };
  539. /**
  540. * Regexes
  541. */
  542. exports.chars = {};
  543. // Double width characters that are _not_ surrogate pairs.
  544. // NOTE: 0x20000 - 0x2fffd and 0x30000 - 0x3fffd are not necessary for this
  545. // regex anyway. This regex is used to put a blank char after wide chars to
  546. // be eaten, however, if this is a surrogate pair, parseContent already adds
  547. // the extra one char because its length equals 2 instead of 1.
  548. exports.chars.wide = new RegExp('(['
  549. + '\\u1100-\\u115f' // Hangul Jamo init. consonants
  550. + '\\u2329\\u232a'
  551. + '\\u2e80-\\u303e\\u3040-\\ua4cf' // CJK ... Yi
  552. + '\\uac00-\\ud7a3' // Hangul Syllables
  553. + '\\uf900-\\ufaff' // CJK Compatibility Ideographs
  554. + '\\ufe10-\\ufe19' // Vertical forms
  555. + '\\ufe30-\\ufe6f' // CJK Compatibility Forms
  556. + '\\uff00-\\uff60' // Fullwidth Forms
  557. + '\\uffe0-\\uffe6'
  558. + '])', 'g');
  559. // All surrogate pair wide chars.
  560. exports.chars.swide = new RegExp('('
  561. // 0x20000 - 0x2fffd:
  562. + '[\\ud840-\\ud87f][\\udc00-\\udffd]'
  563. + '|'
  564. // 0x30000 - 0x3fffd:
  565. + '[\\ud880-\\ud8bf][\\udc00-\\udffd]'
  566. + ')', 'g');
  567. // All wide chars including surrogate pairs.
  568. exports.chars.all = new RegExp('('
  569. + exports.chars.swide.source.slice(1, -1)
  570. + '|'
  571. + exports.chars.wide.source.slice(1, -1)
  572. + ')', 'g');
  573. // Regex to detect a surrogate pair.
  574. exports.chars.surrogate = /[\ud800-\udbff][\udc00-\udfff]/g;
  575. // Regex to find combining characters.
  576. exports.chars.combining = exports.combiningTable.reduce(function(out, row) {
  577. var low, high, range;
  578. if (row[0] > 0x00ffff) {
  579. low = exports.fromCodePoint(row[0]);
  580. low = [
  581. hexify(low.charCodeAt(0)),
  582. hexify(low.charCodeAt(1))
  583. ];
  584. high = exports.fromCodePoint(row[1]);
  585. high = [
  586. hexify(high.charCodeAt(0)),
  587. hexify(high.charCodeAt(1))
  588. ];
  589. range = '[\\u' + low[0] + '-' + '\\u' + high[0] + ']'
  590. + '[\\u' + low[1] + '-' + '\\u' + high[1] + ']';
  591. if (!~out.indexOf('|')) out += ']';
  592. out += '|' + range;
  593. } else {
  594. low = hexify(row[0]);
  595. high = hexify(row[1]);
  596. low = '\\u' + low;
  597. high = '\\u' + high;
  598. out += low + '-' + high;
  599. }
  600. return out;
  601. }, '[');
  602. exports.chars.combining = new RegExp(exports.chars.combining, 'g');
  603. function hexify(n) {
  604. n = n.toString(16);
  605. while (n.length < 4) n = '0' + n;
  606. return n;
  607. }
  608. /*
  609. exports.chars.combining = new RegExp(
  610. '['
  611. + '\\u0300-\\u036f'
  612. + '\\u0483-\\u0486'
  613. + '\\u0488-\\u0489'
  614. + '\\u0591-\\u05bd'
  615. + '\\u05bf-\\u05bf'
  616. + '\\u05c1-\\u05c2'
  617. + '\\u05c4-\\u05c5'
  618. + '\\u05c7-\\u05c7'
  619. + '\\u0600-\\u0603'
  620. + '\\u0610-\\u0615'
  621. + '\\u064b-\\u065e'
  622. + '\\u0670-\\u0670'
  623. + '\\u06d6-\\u06e4'
  624. + '\\u06e7-\\u06e8'
  625. + '\\u06ea-\\u06ed'
  626. + '\\u070f-\\u070f'
  627. + '\\u0711-\\u0711'
  628. + '\\u0730-\\u074a'
  629. + '\\u07a6-\\u07b0'
  630. + '\\u07eb-\\u07f3'
  631. + '\\u0901-\\u0902'
  632. + '\\u093c-\\u093c'
  633. + '\\u0941-\\u0948'
  634. + '\\u094d-\\u094d'
  635. + '\\u0951-\\u0954'
  636. + '\\u0962-\\u0963'
  637. + '\\u0981-\\u0981'
  638. + '\\u09bc-\\u09bc'
  639. + '\\u09c1-\\u09c4'
  640. + '\\u09cd-\\u09cd'
  641. + '\\u09e2-\\u09e3'
  642. + '\\u0a01-\\u0a02'
  643. + '\\u0a3c-\\u0a3c'
  644. + '\\u0a41-\\u0a42'
  645. + '\\u0a47-\\u0a48'
  646. + '\\u0a4b-\\u0a4d'
  647. + '\\u0a70-\\u0a71'
  648. + '\\u0a81-\\u0a82'
  649. + '\\u0abc-\\u0abc'
  650. + '\\u0ac1-\\u0ac5'
  651. + '\\u0ac7-\\u0ac8'
  652. + '\\u0acd-\\u0acd'
  653. + '\\u0ae2-\\u0ae3'
  654. + '\\u0b01-\\u0b01'
  655. + '\\u0b3c-\\u0b3c'
  656. + '\\u0b3f-\\u0b3f'
  657. + '\\u0b41-\\u0b43'
  658. + '\\u0b4d-\\u0b4d'
  659. + '\\u0b56-\\u0b56'
  660. + '\\u0b82-\\u0b82'
  661. + '\\u0bc0-\\u0bc0'
  662. + '\\u0bcd-\\u0bcd'
  663. + '\\u0c3e-\\u0c40'
  664. + '\\u0c46-\\u0c48'
  665. + '\\u0c4a-\\u0c4d'
  666. + '\\u0c55-\\u0c56'
  667. + '\\u0cbc-\\u0cbc'
  668. + '\\u0cbf-\\u0cbf'
  669. + '\\u0cc6-\\u0cc6'
  670. + '\\u0ccc-\\u0ccd'
  671. + '\\u0ce2-\\u0ce3'
  672. + '\\u0d41-\\u0d43'
  673. + '\\u0d4d-\\u0d4d'
  674. + '\\u0dca-\\u0dca'
  675. + '\\u0dd2-\\u0dd4'
  676. + '\\u0dd6-\\u0dd6'
  677. + '\\u0e31-\\u0e31'
  678. + '\\u0e34-\\u0e3a'
  679. + '\\u0e47-\\u0e4e'
  680. + '\\u0eb1-\\u0eb1'
  681. + '\\u0eb4-\\u0eb9'
  682. + '\\u0ebb-\\u0ebc'
  683. + '\\u0ec8-\\u0ecd'
  684. + '\\u0f18-\\u0f19'
  685. + '\\u0f35-\\u0f35'
  686. + '\\u0f37-\\u0f37'
  687. + '\\u0f39-\\u0f39'
  688. + '\\u0f71-\\u0f7e'
  689. + '\\u0f80-\\u0f84'
  690. + '\\u0f86-\\u0f87'
  691. + '\\u0f90-\\u0f97'
  692. + '\\u0f99-\\u0fbc'
  693. + '\\u0fc6-\\u0fc6'
  694. + '\\u102d-\\u1030'
  695. + '\\u1032-\\u1032'
  696. + '\\u1036-\\u1037'
  697. + '\\u1039-\\u1039'
  698. + '\\u1058-\\u1059'
  699. + '\\u1160-\\u11ff'
  700. + '\\u135f-\\u135f'
  701. + '\\u1712-\\u1714'
  702. + '\\u1732-\\u1734'
  703. + '\\u1752-\\u1753'
  704. + '\\u1772-\\u1773'
  705. + '\\u17b4-\\u17b5'
  706. + '\\u17b7-\\u17bd'
  707. + '\\u17c6-\\u17c6'
  708. + '\\u17c9-\\u17d3'
  709. + '\\u17dd-\\u17dd'
  710. + '\\u180b-\\u180d'
  711. + '\\u18a9-\\u18a9'
  712. + '\\u1920-\\u1922'
  713. + '\\u1927-\\u1928'
  714. + '\\u1932-\\u1932'
  715. + '\\u1939-\\u193b'
  716. + '\\u1a17-\\u1a18'
  717. + '\\u1b00-\\u1b03'
  718. + '\\u1b34-\\u1b34'
  719. + '\\u1b36-\\u1b3a'
  720. + '\\u1b3c-\\u1b3c'
  721. + '\\u1b42-\\u1b42'
  722. + '\\u1b6b-\\u1b73'
  723. + '\\u1dc0-\\u1dca'
  724. + '\\u1dfe-\\u1dff'
  725. + '\\u200b-\\u200f'
  726. + '\\u202a-\\u202e'
  727. + '\\u2060-\\u2063'
  728. + '\\u206a-\\u206f'
  729. + '\\u20d0-\\u20ef'
  730. + '\\u302a-\\u302f'
  731. + '\\u3099-\\u309a'
  732. + '\\ua806-\\ua806'
  733. + '\\ua80b-\\ua80b'
  734. + '\\ua825-\\ua826'
  735. + '\\ufb1e-\\ufb1e'
  736. + '\\ufe00-\\ufe0f'
  737. + '\\ufe20-\\ufe23'
  738. + '\\ufeff-\\ufeff'
  739. + '\\ufff9-\\ufffb'
  740. + ']'
  741. + '|[\\ud802-\\ud802][\\ude01-\\ude03]'
  742. + '|[\\ud802-\\ud802][\\ude05-\\ude06]'
  743. + '|[\\ud802-\\ud802][\\ude0c-\\ude0f]'
  744. + '|[\\ud802-\\ud802][\\ude38-\\ude3a]'
  745. + '|[\\ud802-\\ud802][\\ude3f-\\ude3f]'
  746. + '|[\\ud834-\\ud834][\\udd67-\\udd69]'
  747. + '|[\\ud834-\\ud834][\\udd73-\\udd82]'
  748. + '|[\\ud834-\\ud834][\\udd85-\\udd8b]'
  749. + '|[\\ud834-\\ud834][\\uddaa-\\uddad]'
  750. + '|[\\ud834-\\ud834][\\ude42-\\ude44]'
  751. + '|[\\udb40-\\udb40][\\udc01-\\udc01]'
  752. + '|[\\udb40-\\udb40][\\udc20-\\udc7f]'
  753. + '|[\\udb40-\\udb40][\\udd00-\\uddef]'
  754. , 'g');
  755. */