cabac.c 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239
  1. /*****************************************************************************
  2. * cabac.c: cabac bitstream writing
  3. *****************************************************************************
  4. * Copyright (C) 2003-2018 x264 project
  5. *
  6. * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  7. * Loren Merritt <lorenm@u.washington.edu>
  8. * Fiona Glaser <fiona@x264.com>
  9. *
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License as published by
  12. * the Free Software Foundation; either version 2 of the License, or
  13. * (at your option) any later version.
  14. *
  15. * This program is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. * GNU General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU General Public License
  21. * along with this program; if not, write to the Free Software
  22. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
  23. *
  24. * This program is also available under a commercial proprietary license.
  25. * For more information, contact us at licensing@x264.com.
  26. *****************************************************************************/
  27. #include "common/common.h"
  28. #include "macroblock.h"
  29. #ifndef RDO_SKIP_BS
  30. #define RDO_SKIP_BS 0
  31. #endif
  32. static inline void cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type,
  33. int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 )
  34. {
  35. if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
  36. {
  37. x264_cabac_encode_decision_noup( cb, ctx0, 0 );
  38. }
  39. #if !RDO_SKIP_BS
  40. else if( i_mb_type == I_PCM )
  41. {
  42. x264_cabac_encode_decision_noup( cb, ctx0, 1 );
  43. x264_cabac_encode_flush( h, cb );
  44. }
  45. #endif
  46. else
  47. {
  48. int i_pred = x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode];
  49. x264_cabac_encode_decision_noup( cb, ctx0, 1 );
  50. x264_cabac_encode_terminal( cb );
  51. x264_cabac_encode_decision_noup( cb, ctx1, !!h->mb.i_cbp_luma );
  52. if( h->mb.i_cbp_chroma == 0 )
  53. x264_cabac_encode_decision_noup( cb, ctx2, 0 );
  54. else
  55. {
  56. x264_cabac_encode_decision( cb, ctx2, 1 );
  57. x264_cabac_encode_decision_noup( cb, ctx3, h->mb.i_cbp_chroma>>1 );
  58. }
  59. x264_cabac_encode_decision( cb, ctx4, i_pred>>1 );
  60. x264_cabac_encode_decision_noup( cb, ctx5, i_pred&1 );
  61. }
  62. }
  63. #if !RDO_SKIP_BS
  64. static void cabac_field_decoding_flag( x264_t *h, x264_cabac_t *cb )
  65. {
  66. int ctx = 0;
  67. ctx += h->mb.field_decoding_flag & !!h->mb.i_mb_x;
  68. ctx += (h->mb.i_mb_top_mbpair_xy >= 0
  69. && h->mb.slice_table[h->mb.i_mb_top_mbpair_xy] == h->sh.i_first_mb
  70. && h->mb.field[h->mb.i_mb_top_mbpair_xy]);
  71. x264_cabac_encode_decision_noup( cb, 70 + ctx, MB_INTERLACED );
  72. h->mb.field_decoding_flag = MB_INTERLACED;
  73. }
  74. #endif
  75. static void cabac_intra4x4_pred_mode( x264_cabac_t *cb, int i_pred, int i_mode )
  76. {
  77. if( i_pred == i_mode )
  78. x264_cabac_encode_decision( cb, 68, 1 );
  79. else
  80. {
  81. x264_cabac_encode_decision( cb, 68, 0 );
  82. if( i_mode > i_pred )
  83. i_mode--;
  84. x264_cabac_encode_decision( cb, 69, (i_mode )&0x01 );
  85. x264_cabac_encode_decision( cb, 69, (i_mode >> 1)&0x01 );
  86. x264_cabac_encode_decision( cb, 69, (i_mode >> 2) );
  87. }
  88. }
  89. static void cabac_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb )
  90. {
  91. int i_mode = x264_mb_chroma_pred_mode_fix[h->mb.i_chroma_pred_mode];
  92. int ctx = 0;
  93. /* No need to test for I4x4 or I_16x16 as cache_save handle that */
  94. if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_left_xy[0]] != 0 )
  95. ctx++;
  96. if( (h->mb.i_neighbour & MB_TOP) && h->mb.chroma_pred_mode[h->mb.i_mb_top_xy] != 0 )
  97. ctx++;
  98. x264_cabac_encode_decision_noup( cb, 64 + ctx, i_mode > 0 );
  99. if( i_mode > 0 )
  100. {
  101. x264_cabac_encode_decision( cb, 64 + 3, i_mode > 1 );
  102. if( i_mode > 1 )
  103. x264_cabac_encode_decision_noup( cb, 64 + 3, i_mode > 2 );
  104. }
  105. }
  106. static void cabac_cbp_luma( x264_t *h, x264_cabac_t *cb )
  107. {
  108. int cbp = h->mb.i_cbp_luma;
  109. int cbp_l = h->mb.cache.i_cbp_left;
  110. int cbp_t = h->mb.cache.i_cbp_top;
  111. x264_cabac_encode_decision ( cb, 76 - ((cbp_l >> 1) & 1) - ((cbp_t >> 1) & 2), (cbp >> 0) & 1 );
  112. x264_cabac_encode_decision ( cb, 76 - ((cbp >> 0) & 1) - ((cbp_t >> 2) & 2), (cbp >> 1) & 1 );
  113. x264_cabac_encode_decision ( cb, 76 - ((cbp_l >> 3) & 1) - ((cbp << 1) & 2), (cbp >> 2) & 1 );
  114. x264_cabac_encode_decision_noup( cb, 76 - ((cbp >> 2) & 1) - ((cbp >> 0) & 2), (cbp >> 3) & 1 );
  115. }
  116. static void cabac_cbp_chroma( x264_t *h, x264_cabac_t *cb )
  117. {
  118. int cbp_a = h->mb.cache.i_cbp_left & 0x30;
  119. int cbp_b = h->mb.cache.i_cbp_top & 0x30;
  120. int ctx = 0;
  121. if( cbp_a && h->mb.cache.i_cbp_left != -1 ) ctx++;
  122. if( cbp_b && h->mb.cache.i_cbp_top != -1 ) ctx+=2;
  123. if( h->mb.i_cbp_chroma == 0 )
  124. x264_cabac_encode_decision_noup( cb, 77 + ctx, 0 );
  125. else
  126. {
  127. x264_cabac_encode_decision_noup( cb, 77 + ctx, 1 );
  128. ctx = 4;
  129. if( cbp_a == 0x20 ) ctx++;
  130. if( cbp_b == 0x20 ) ctx += 2;
  131. x264_cabac_encode_decision_noup( cb, 77 + ctx, h->mb.i_cbp_chroma >> 1 );
  132. }
  133. }
  134. static void cabac_qp_delta( x264_t *h, x264_cabac_t *cb )
  135. {
  136. int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
  137. int ctx;
  138. /* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely
  139. * flat background area. Don't do this if it would raise the quantizer, since that could
  140. * cause unexpected deblocking artifacts. */
  141. if( h->mb.i_type == I_16x16 && !h->mb.cbp[h->mb.i_mb_xy] && h->mb.i_qp > h->mb.i_last_qp )
  142. {
  143. #if !RDO_SKIP_BS
  144. h->mb.i_qp = h->mb.i_last_qp;
  145. #endif
  146. i_dqp = 0;
  147. }
  148. ctx = h->mb.i_last_dqp && (h->mb.type[h->mb.i_mb_prev_xy] == I_16x16 || (h->mb.cbp[h->mb.i_mb_prev_xy]&0x3f));
  149. if( i_dqp != 0 )
  150. {
  151. /* Faster than (i_dqp <= 0 ? (-2*i_dqp) : (2*i_dqp-1)).
  152. * If you so much as sneeze on these lines, gcc will compile this suboptimally. */
  153. i_dqp *= 2;
  154. int val = 1 - i_dqp;
  155. if( val < 0 ) val = i_dqp;
  156. val--;
  157. /* dqp is interpreted modulo (QP_MAX_SPEC+1) */
  158. if( val >= QP_MAX_SPEC && val != QP_MAX_SPEC+1 )
  159. val = 2*QP_MAX_SPEC+1 - val;
  160. do
  161. {
  162. x264_cabac_encode_decision( cb, 60 + ctx, 1 );
  163. ctx = 2+(ctx>>1);
  164. } while( --val );
  165. }
  166. x264_cabac_encode_decision_noup( cb, 60 + ctx, 0 );
  167. }
  168. #if !RDO_SKIP_BS
  169. void x264_cabac_mb_skip( x264_t *h, int b_skip )
  170. {
  171. int ctx = h->mb.cache.i_neighbour_skip + 11;
  172. if( h->sh.i_type != SLICE_TYPE_P )
  173. ctx += 13;
  174. x264_cabac_encode_decision( &h->cabac, ctx, b_skip );
  175. }
  176. #endif
  177. static inline void cabac_subpartition_p( x264_cabac_t *cb, int i_sub )
  178. {
  179. if( i_sub == D_L0_8x8 )
  180. {
  181. x264_cabac_encode_decision( cb, 21, 1 );
  182. return;
  183. }
  184. x264_cabac_encode_decision( cb, 21, 0 );
  185. if( i_sub == D_L0_8x4 )
  186. x264_cabac_encode_decision( cb, 22, 0 );
  187. else
  188. {
  189. x264_cabac_encode_decision( cb, 22, 1 );
  190. x264_cabac_encode_decision( cb, 23, i_sub == D_L0_4x8 );
  191. }
  192. }
  193. static ALWAYS_INLINE void cabac_subpartition_b( x264_cabac_t *cb, int i_sub )
  194. {
  195. if( i_sub == D_DIRECT_8x8 )
  196. {
  197. x264_cabac_encode_decision( cb, 36, 0 );
  198. return;
  199. }
  200. x264_cabac_encode_decision( cb, 36, 1 );
  201. if( i_sub == D_BI_8x8 )
  202. {
  203. x264_cabac_encode_decision( cb, 37, 1 );
  204. x264_cabac_encode_decision( cb, 38, 0 );
  205. x264_cabac_encode_decision( cb, 39, 0 );
  206. x264_cabac_encode_decision( cb, 39, 0 );
  207. return;
  208. }
  209. x264_cabac_encode_decision( cb, 37, 0 );
  210. x264_cabac_encode_decision( cb, 39, i_sub == D_L1_8x8 );
  211. }
  212. static ALWAYS_INLINE void cabac_transform_size( x264_t *h, x264_cabac_t *cb )
  213. {
  214. int ctx = 399 + h->mb.cache.i_neighbour_transform_size;
  215. x264_cabac_encode_decision_noup( cb, ctx, h->mb.b_transform_8x8 );
  216. }
  217. static ALWAYS_INLINE void cabac_ref_internal( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int bframe )
  218. {
  219. const int i8 = x264_scan8[idx];
  220. const int i_refa = h->mb.cache.ref[i_list][i8 - 1];
  221. const int i_refb = h->mb.cache.ref[i_list][i8 - 8];
  222. int ctx = 0;
  223. if( i_refa > 0 && (!bframe || !h->mb.cache.skip[i8 - 1]) )
  224. ctx++;
  225. if( i_refb > 0 && (!bframe || !h->mb.cache.skip[i8 - 8]) )
  226. ctx += 2;
  227. for( int i_ref = h->mb.cache.ref[i_list][i8]; i_ref > 0; i_ref-- )
  228. {
  229. x264_cabac_encode_decision( cb, 54 + ctx, 1 );
  230. ctx = (ctx>>2)+4;
  231. }
  232. x264_cabac_encode_decision( cb, 54 + ctx, 0 );
  233. }
  234. static NOINLINE void cabac_ref_p( x264_t *h, x264_cabac_t *cb, int idx )
  235. {
  236. cabac_ref_internal( h, cb, 0, idx, 0 );
  237. }
  238. static NOINLINE void cabac_ref_b( x264_t *h, x264_cabac_t *cb, int i_list, int idx )
  239. {
  240. cabac_ref_internal( h, cb, i_list, idx, 1 );
  241. }
  242. static ALWAYS_INLINE int cabac_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd, int ctx )
  243. {
  244. int ctxbase = l ? 47 : 40;
  245. if( mvd == 0 )
  246. {
  247. x264_cabac_encode_decision( cb, ctxbase + ctx, 0 );
  248. return 0;
  249. }
  250. int i_abs = abs( mvd );
  251. x264_cabac_encode_decision( cb, ctxbase + ctx, 1 );
  252. #if RDO_SKIP_BS
  253. if( i_abs <= 3 )
  254. {
  255. for( int i = 1; i < i_abs; i++ )
  256. x264_cabac_encode_decision( cb, ctxbase + i + 2, 1 );
  257. x264_cabac_encode_decision( cb, ctxbase + i_abs + 2, 0 );
  258. x264_cabac_encode_bypass( cb, mvd >> 31 );
  259. }
  260. else
  261. {
  262. x264_cabac_encode_decision( cb, ctxbase + 3, 1 );
  263. x264_cabac_encode_decision( cb, ctxbase + 4, 1 );
  264. x264_cabac_encode_decision( cb, ctxbase + 5, 1 );
  265. if( i_abs < 9 )
  266. {
  267. cb->f8_bits_encoded += x264_cabac_size_unary[i_abs - 3][cb->state[ctxbase+6]];
  268. cb->state[ctxbase+6] = x264_cabac_transition_unary[i_abs - 3][cb->state[ctxbase+6]];
  269. }
  270. else
  271. {
  272. cb->f8_bits_encoded += cabac_size_5ones[cb->state[ctxbase+6]];
  273. cb->state[ctxbase+6] = cabac_transition_5ones[cb->state[ctxbase+6]];
  274. x264_cabac_encode_ue_bypass( cb, 3, i_abs - 9 );
  275. }
  276. }
  277. #else
  278. static const uint8_t ctxes[8] = { 3,4,5,6,6,6,6,6 };
  279. if( i_abs < 9 )
  280. {
  281. for( int i = 1; i < i_abs; i++ )
  282. x264_cabac_encode_decision( cb, ctxbase + ctxes[i-1], 1 );
  283. x264_cabac_encode_decision( cb, ctxbase + ctxes[i_abs-1], 0 );
  284. }
  285. else
  286. {
  287. for( int i = 1; i < 9; i++ )
  288. x264_cabac_encode_decision( cb, ctxbase + ctxes[i-1], 1 );
  289. x264_cabac_encode_ue_bypass( cb, 3, i_abs - 9 );
  290. }
  291. x264_cabac_encode_bypass( cb, mvd >> 31 );
  292. #endif
  293. /* Since we don't need to keep track of MVDs larger than 66, just cap the value.
  294. * This lets us store MVDs as 8-bit values instead of 16-bit. */
  295. return X264_MIN( i_abs, 66 );
  296. }
  297. static NOINLINE uint16_t cabac_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width )
  298. {
  299. ALIGNED_4( int16_t mvp[2] );
  300. int mdx, mdy;
  301. /* Calculate mvd */
  302. x264_mb_predict_mv( h, i_list, idx, width, mvp );
  303. mdx = h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0];
  304. mdy = h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1];
  305. uint16_t amvd = x264_cabac_mvd_sum(h->mb.cache.mvd[i_list][x264_scan8[idx] - 1],
  306. h->mb.cache.mvd[i_list][x264_scan8[idx] - 8]);
  307. /* encode */
  308. mdx = cabac_mvd_cpn( h, cb, i_list, idx, 0, mdx, amvd&0xFF );
  309. mdy = cabac_mvd_cpn( h, cb, i_list, idx, 1, mdy, amvd>>8 );
  310. return pack8to16(mdx,mdy);
  311. }
  312. #define cabac_mvd(h,cb,i_list,idx,width,height)\
  313. do\
  314. {\
  315. uint16_t mvd = cabac_mvd(h,cb,i_list,idx,width);\
  316. x264_macroblock_cache_mvd( h, block_idx_x[idx], block_idx_y[idx], width, height, i_list, mvd );\
  317. } while( 0 )
  318. static inline void cabac_8x8_mvd( x264_t *h, x264_cabac_t *cb, int i )
  319. {
  320. switch( h->mb.i_sub_partition[i] )
  321. {
  322. case D_L0_8x8:
  323. cabac_mvd( h, cb, 0, 4*i, 2, 2 );
  324. break;
  325. case D_L0_8x4:
  326. cabac_mvd( h, cb, 0, 4*i+0, 2, 1 );
  327. cabac_mvd( h, cb, 0, 4*i+2, 2, 1 );
  328. break;
  329. case D_L0_4x8:
  330. cabac_mvd( h, cb, 0, 4*i+0, 1, 2 );
  331. cabac_mvd( h, cb, 0, 4*i+1, 1, 2 );
  332. break;
  333. case D_L0_4x4:
  334. cabac_mvd( h, cb, 0, 4*i+0, 1, 1 );
  335. cabac_mvd( h, cb, 0, 4*i+1, 1, 1 );
  336. cabac_mvd( h, cb, 0, 4*i+2, 1, 1 );
  337. cabac_mvd( h, cb, 0, 4*i+3, 1, 1 );
  338. break;
  339. default:
  340. assert(0);
  341. }
  342. }
  343. static ALWAYS_INLINE void cabac_mb_header_i( x264_t *h, x264_cabac_t *cb, int i_mb_type, int slice_type, int chroma )
  344. {
  345. if( slice_type == SLICE_TYPE_I )
  346. {
  347. int ctx = 0;
  348. if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left[0] != I_4x4 )
  349. ctx++;
  350. if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != I_4x4 )
  351. ctx++;
  352. cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 );
  353. }
  354. else if( slice_type == SLICE_TYPE_P )
  355. {
  356. /* prefix */
  357. x264_cabac_encode_decision_noup( cb, 14, 1 );
  358. /* suffix */
  359. cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 );
  360. }
  361. else if( slice_type == SLICE_TYPE_B )
  362. {
  363. /* prefix */
  364. x264_cabac_encode_decision_noup( cb, 27+3, 1 );
  365. x264_cabac_encode_decision_noup( cb, 27+4, 1 );
  366. x264_cabac_encode_decision( cb, 27+5, 1 );
  367. x264_cabac_encode_decision( cb, 27+5, 0 );
  368. x264_cabac_encode_decision( cb, 27+5, 1 );
  369. /* suffix */
  370. cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 );
  371. }
  372. if( i_mb_type == I_PCM )
  373. return;
  374. if( i_mb_type != I_16x16 )
  375. {
  376. if( h->pps->b_transform_8x8_mode )
  377. cabac_transform_size( h, cb );
  378. int di = h->mb.b_transform_8x8 ? 4 : 1;
  379. for( int i = 0; i < 16; i += di )
  380. {
  381. const int i_pred = x264_mb_predict_intra4x4_mode( h, i );
  382. const int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
  383. cabac_intra4x4_pred_mode( cb, i_pred, i_mode );
  384. }
  385. }
  386. if( chroma )
  387. cabac_intra_chroma_pred_mode( h, cb );
  388. }
  389. static ALWAYS_INLINE void cabac_mb_header_p( x264_t *h, x264_cabac_t *cb, int i_mb_type, int chroma )
  390. {
  391. if( i_mb_type == P_L0 )
  392. {
  393. x264_cabac_encode_decision_noup( cb, 14, 0 );
  394. if( h->mb.i_partition == D_16x16 )
  395. {
  396. x264_cabac_encode_decision_noup( cb, 15, 0 );
  397. x264_cabac_encode_decision_noup( cb, 16, 0 );
  398. if( h->mb.pic.i_fref[0] > 1 )
  399. cabac_ref_p( h, cb, 0 );
  400. cabac_mvd( h, cb, 0, 0, 4, 4 );
  401. }
  402. else if( h->mb.i_partition == D_16x8 )
  403. {
  404. x264_cabac_encode_decision_noup( cb, 15, 1 );
  405. x264_cabac_encode_decision_noup( cb, 17, 1 );
  406. if( h->mb.pic.i_fref[0] > 1 )
  407. {
  408. cabac_ref_p( h, cb, 0 );
  409. cabac_ref_p( h, cb, 8 );
  410. }
  411. cabac_mvd( h, cb, 0, 0, 4, 2 );
  412. cabac_mvd( h, cb, 0, 8, 4, 2 );
  413. }
  414. else //if( h->mb.i_partition == D_8x16 )
  415. {
  416. x264_cabac_encode_decision_noup( cb, 15, 1 );
  417. x264_cabac_encode_decision_noup( cb, 17, 0 );
  418. if( h->mb.pic.i_fref[0] > 1 )
  419. {
  420. cabac_ref_p( h, cb, 0 );
  421. cabac_ref_p( h, cb, 4 );
  422. }
  423. cabac_mvd( h, cb, 0, 0, 2, 4 );
  424. cabac_mvd( h, cb, 0, 4, 2, 4 );
  425. }
  426. }
  427. else if( i_mb_type == P_8x8 )
  428. {
  429. x264_cabac_encode_decision_noup( cb, 14, 0 );
  430. x264_cabac_encode_decision_noup( cb, 15, 0 );
  431. x264_cabac_encode_decision_noup( cb, 16, 1 );
  432. /* sub mb type */
  433. for( int i = 0; i < 4; i++ )
  434. cabac_subpartition_p( cb, h->mb.i_sub_partition[i] );
  435. /* ref 0 */
  436. if( h->mb.pic.i_fref[0] > 1 )
  437. {
  438. cabac_ref_p( h, cb, 0 );
  439. cabac_ref_p( h, cb, 4 );
  440. cabac_ref_p( h, cb, 8 );
  441. cabac_ref_p( h, cb, 12 );
  442. }
  443. for( int i = 0; i < 4; i++ )
  444. cabac_8x8_mvd( h, cb, i );
  445. }
  446. else /* intra */
  447. cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_P, chroma );
  448. }
  449. static ALWAYS_INLINE void cabac_mb_header_b( x264_t *h, x264_cabac_t *cb, int i_mb_type, int chroma )
  450. {
  451. int ctx = 0;
  452. if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left[0] != B_SKIP && h->mb.i_mb_type_left[0] != B_DIRECT )
  453. ctx++;
  454. if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
  455. ctx++;
  456. if( i_mb_type == B_DIRECT )
  457. {
  458. x264_cabac_encode_decision_noup( cb, 27+ctx, 0 );
  459. return;
  460. }
  461. x264_cabac_encode_decision_noup( cb, 27+ctx, 1 );
  462. if( i_mb_type == B_8x8 )
  463. {
  464. x264_cabac_encode_decision_noup( cb, 27+3, 1 );
  465. x264_cabac_encode_decision_noup( cb, 27+4, 1 );
  466. x264_cabac_encode_decision( cb, 27+5, 1 );
  467. x264_cabac_encode_decision( cb, 27+5, 1 );
  468. x264_cabac_encode_decision_noup( cb, 27+5, 1 );
  469. /* sub mb type */
  470. for( int i = 0; i < 4; i++ )
  471. cabac_subpartition_b( cb, h->mb.i_sub_partition[i] );
  472. /* ref */
  473. if( h->mb.pic.i_fref[0] > 1 )
  474. for( int i = 0; i < 4; i++ )
  475. if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
  476. cabac_ref_b( h, cb, 0, 4*i );
  477. if( h->mb.pic.i_fref[1] > 1 )
  478. for( int i = 0; i < 4; i++ )
  479. if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
  480. cabac_ref_b( h, cb, 1, 4*i );
  481. for( int i = 0; i < 4; i++ )
  482. if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
  483. cabac_mvd( h, cb, 0, 4*i, 2, 2 );
  484. for( int i = 0; i < 4; i++ )
  485. if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
  486. cabac_mvd( h, cb, 1, 4*i, 2, 2 );
  487. }
  488. else if( i_mb_type >= B_L0_L0 && i_mb_type <= B_BI_BI )
  489. {
  490. /* All B modes */
  491. static const uint8_t i_mb_bits[9*3] =
  492. {
  493. 0x31, 0x29, 0x4, /* L0 L0 */
  494. 0x35, 0x2d, 0, /* L0 L1 */
  495. 0x43, 0x63, 0, /* L0 BI */
  496. 0x3d, 0x2f, 0, /* L1 L0 */
  497. 0x39, 0x25, 0x6, /* L1 L1 */
  498. 0x53, 0x73, 0, /* L1 BI */
  499. 0x4b, 0x6b, 0, /* BI L0 */
  500. 0x5b, 0x7b, 0, /* BI L1 */
  501. 0x47, 0x67, 0x21 /* BI BI */
  502. };
  503. const int idx = (i_mb_type - B_L0_L0) * 3 + (h->mb.i_partition - D_16x8);
  504. int bits = i_mb_bits[idx];
  505. x264_cabac_encode_decision_noup( cb, 27+3, bits&1 );
  506. x264_cabac_encode_decision( cb, 27+5-(bits&1), (bits>>1)&1 ); bits >>= 2;
  507. if( bits != 1 )
  508. {
  509. x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
  510. x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
  511. x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
  512. if( bits != 1 )
  513. x264_cabac_encode_decision_noup( cb, 27+5, bits&1 );
  514. }
  515. const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
  516. if( h->mb.pic.i_fref[0] > 1 )
  517. {
  518. if( b_list[0][0] )
  519. cabac_ref_b( h, cb, 0, 0 );
  520. if( b_list[0][1] && h->mb.i_partition != D_16x16 )
  521. cabac_ref_b( h, cb, 0, 8 >> (h->mb.i_partition == D_8x16) );
  522. }
  523. if( h->mb.pic.i_fref[1] > 1 )
  524. {
  525. if( b_list[1][0] )
  526. cabac_ref_b( h, cb, 1, 0 );
  527. if( b_list[1][1] && h->mb.i_partition != D_16x16 )
  528. cabac_ref_b( h, cb, 1, 8 >> (h->mb.i_partition == D_8x16) );
  529. }
  530. for( int i_list = 0; i_list < 2; i_list++ )
  531. {
  532. if( h->mb.i_partition == D_16x16 )
  533. {
  534. if( b_list[i_list][0] ) cabac_mvd( h, cb, i_list, 0, 4, 4 );
  535. }
  536. else if( h->mb.i_partition == D_16x8 )
  537. {
  538. if( b_list[i_list][0] ) cabac_mvd( h, cb, i_list, 0, 4, 2 );
  539. if( b_list[i_list][1] ) cabac_mvd( h, cb, i_list, 8, 4, 2 );
  540. }
  541. else //if( h->mb.i_partition == D_8x16 )
  542. {
  543. if( b_list[i_list][0] ) cabac_mvd( h, cb, i_list, 0, 2, 4 );
  544. if( b_list[i_list][1] ) cabac_mvd( h, cb, i_list, 4, 2, 4 );
  545. }
  546. }
  547. }
  548. else /* intra */
  549. cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_B, chroma );
  550. }
  551. static ALWAYS_INLINE int cabac_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx, int b_intra, int b_dc )
  552. {
  553. static const uint16_t base_ctx[14] = {85,89,93,97,101,1012,460,464,468,1016,472,476,480,1020};
  554. if( b_dc )
  555. {
  556. i_idx -= LUMA_DC;
  557. if( i_cat == DCT_CHROMA_DC )
  558. {
  559. int i_nza = h->mb.cache.i_cbp_left != -1 ? (h->mb.cache.i_cbp_left >> (8 + i_idx)) & 1 : b_intra;
  560. int i_nzb = h->mb.cache.i_cbp_top != -1 ? (h->mb.cache.i_cbp_top >> (8 + i_idx)) & 1 : b_intra;
  561. return base_ctx[i_cat] + 2*i_nzb + i_nza;
  562. }
  563. else
  564. {
  565. int i_nza = (h->mb.cache.i_cbp_left >> (8 + i_idx)) & 1;
  566. int i_nzb = (h->mb.cache.i_cbp_top >> (8 + i_idx)) & 1;
  567. return base_ctx[i_cat] + 2*i_nzb + i_nza;
  568. }
  569. }
  570. else
  571. {
  572. int i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1];
  573. int i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8];
  574. if( x264_constant_p(b_intra) && !b_intra )
  575. return base_ctx[i_cat] + ((2*i_nzb + i_nza)&0x7f);
  576. else
  577. {
  578. i_nza &= 0x7f + (b_intra << 7);
  579. i_nzb &= 0x7f + (b_intra << 7);
  580. return base_ctx[i_cat] + 2*!!i_nzb + !!i_nza;
  581. }
  582. }
  583. }
  584. // node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
  585. // 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
  586. /* map node ctx => cabac ctx for level=1 */
  587. static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
  588. /* map node ctx => cabac ctx for level>1 */
  589. static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
  590. /* 4:2:2 chroma dc uses a slightly different state machine for some reason, also note that
  591. * 4:2:0 chroma dc doesn't use the last state so it has identical output with both arrays. */
  592. static const uint8_t coeff_abs_levelgt1_ctx_chroma_dc[8] = { 5, 5, 5, 5, 6, 7, 8, 8 };
  593. static const uint8_t coeff_abs_level_transition[2][8] = {
  594. /* update node ctx after coding a level=1 */
  595. { 1, 2, 3, 3, 4, 5, 6, 7 },
  596. /* update node ctx after coding a level>1 */
  597. { 4, 4, 4, 4, 5, 6, 7, 7 }
  598. };
  599. #if !RDO_SKIP_BS
  600. static ALWAYS_INLINE void cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int chroma422dc )
  601. {
  602. int ctx_sig = x264_significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
  603. int ctx_last = x264_last_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
  604. int ctx_level = x264_coeff_abs_level_m1_offset[ctx_block_cat];
  605. int coeff_idx = -1, node_ctx = 0;
  606. int last = h->quantf.coeff_last[ctx_block_cat]( l );
  607. const uint8_t *levelgt1_ctx = chroma422dc ? coeff_abs_levelgt1_ctx_chroma_dc : coeff_abs_levelgt1_ctx;
  608. dctcoef coeffs[64];
  609. #define WRITE_SIGMAP( sig_off, last_off )\
  610. {\
  611. int i = 0;\
  612. while( 1 )\
  613. {\
  614. if( l[i] )\
  615. {\
  616. coeffs[++coeff_idx] = l[i];\
  617. x264_cabac_encode_decision( cb, ctx_sig + sig_off, 1 );\
  618. if( i == last )\
  619. {\
  620. x264_cabac_encode_decision( cb, ctx_last + last_off, 1 );\
  621. break;\
  622. }\
  623. else\
  624. x264_cabac_encode_decision( cb, ctx_last + last_off, 0 );\
  625. }\
  626. else\
  627. x264_cabac_encode_decision( cb, ctx_sig + sig_off, 0 );\
  628. if( ++i == count_m1 )\
  629. {\
  630. coeffs[++coeff_idx] = l[i];\
  631. break;\
  632. }\
  633. }\
  634. }
  635. if( chroma422dc )
  636. {
  637. int count_m1 = 7;
  638. WRITE_SIGMAP( x264_coeff_flag_offset_chroma_422_dc[i], x264_coeff_flag_offset_chroma_422_dc[i] )
  639. }
  640. else
  641. {
  642. int count_m1 = x264_count_cat_m1[ctx_block_cat];
  643. if( count_m1 == 63 )
  644. {
  645. const uint8_t *sig_offset = x264_significant_coeff_flag_offset_8x8[MB_INTERLACED];
  646. WRITE_SIGMAP( sig_offset[i], x264_last_coeff_flag_offset_8x8[i] )
  647. }
  648. else
  649. WRITE_SIGMAP( i, i )
  650. }
  651. do
  652. {
  653. /* write coeff_abs - 1 */
  654. int coeff = coeffs[coeff_idx];
  655. int abs_coeff = abs(coeff);
  656. int coeff_sign = coeff >> 31;
  657. int ctx = coeff_abs_level1_ctx[node_ctx] + ctx_level;
  658. if( abs_coeff > 1 )
  659. {
  660. x264_cabac_encode_decision( cb, ctx, 1 );
  661. ctx = levelgt1_ctx[node_ctx] + ctx_level;
  662. for( int i = X264_MIN( abs_coeff, 15 ) - 2; i > 0; i-- )
  663. x264_cabac_encode_decision( cb, ctx, 1 );
  664. if( abs_coeff < 15 )
  665. x264_cabac_encode_decision( cb, ctx, 0 );
  666. else
  667. x264_cabac_encode_ue_bypass( cb, 0, abs_coeff - 15 );
  668. node_ctx = coeff_abs_level_transition[1][node_ctx];
  669. }
  670. else
  671. {
  672. x264_cabac_encode_decision( cb, ctx, 0 );
  673. node_ctx = coeff_abs_level_transition[0][node_ctx];
  674. }
  675. x264_cabac_encode_bypass( cb, coeff_sign );
  676. } while( --coeff_idx >= 0 );
  677. }
  678. void x264_cabac_block_residual_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
  679. {
  680. cabac_block_residual_internal( h, cb, ctx_block_cat, l, 0 );
  681. }
  682. static ALWAYS_INLINE void cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
  683. {
  684. #if ARCH_X86_64 && HAVE_MMX && !defined( __MACH__ )
  685. h->bsf.cabac_block_residual_internal( l, MB_INTERLACED, ctx_block_cat, cb );
  686. #else
  687. x264_cabac_block_residual_c( h, cb, ctx_block_cat, l );
  688. #endif
  689. }
  690. static void cabac_block_residual_422_dc( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
  691. {
  692. /* Template a version specifically for chroma 4:2:2 DC in order to avoid
  693. * slowing down everything else due to the added complexity. */
  694. cabac_block_residual_internal( h, cb, DCT_CHROMA_DC, l, 1 );
  695. }
  696. #define cabac_block_residual_8x8( h, cb, cat, l ) cabac_block_residual( h, cb, cat, l )
  697. #else
  698. /* Faster RDO by merging sigmap and level coding. Note that for 8x8dct and chroma 4:2:2 dc this is
  699. * slightly incorrect because the sigmap is not reversible (contexts are repeated). However, there
  700. * is nearly no quality penalty for this (~0.001db) and the speed boost (~30%) is worth it. */
  701. static ALWAYS_INLINE void cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int b_8x8, int chroma422dc )
  702. {
  703. const uint8_t *sig_offset = x264_significant_coeff_flag_offset_8x8[MB_INTERLACED];
  704. int ctx_sig = x264_significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
  705. int ctx_last = x264_last_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
  706. int ctx_level = x264_coeff_abs_level_m1_offset[ctx_block_cat];
  707. int last = h->quantf.coeff_last[ctx_block_cat]( l );
  708. int coeff_abs = abs(l[last]);
  709. int ctx = coeff_abs_level1_ctx[0] + ctx_level;
  710. int node_ctx;
  711. const uint8_t *levelgt1_ctx = chroma422dc ? coeff_abs_levelgt1_ctx_chroma_dc : coeff_abs_levelgt1_ctx;
  712. if( last != (b_8x8 ? 63 : chroma422dc ? 7 : x264_count_cat_m1[ctx_block_cat]) )
  713. {
  714. x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[last] :
  715. chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[last] : last), 1 );
  716. x264_cabac_encode_decision( cb, ctx_last + (b_8x8 ? x264_last_coeff_flag_offset_8x8[last] :
  717. chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[last] : last), 1 );
  718. }
  719. if( coeff_abs > 1 )
  720. {
  721. x264_cabac_encode_decision( cb, ctx, 1 );
  722. ctx = levelgt1_ctx[0] + ctx_level;
  723. if( coeff_abs < 15 )
  724. {
  725. cb->f8_bits_encoded += x264_cabac_size_unary[coeff_abs-1][cb->state[ctx]];
  726. cb->state[ctx] = x264_cabac_transition_unary[coeff_abs-1][cb->state[ctx]];
  727. }
  728. else
  729. {
  730. cb->f8_bits_encoded += x264_cabac_size_unary[14][cb->state[ctx]];
  731. cb->state[ctx] = x264_cabac_transition_unary[14][cb->state[ctx]];
  732. x264_cabac_encode_ue_bypass( cb, 0, coeff_abs - 15 );
  733. }
  734. node_ctx = coeff_abs_level_transition[1][0];
  735. }
  736. else
  737. {
  738. x264_cabac_encode_decision( cb, ctx, 0 );
  739. node_ctx = coeff_abs_level_transition[0][0];
  740. x264_cabac_encode_bypass( cb, 0 ); // sign
  741. }
  742. for( int i = last-1; i >= 0; i-- )
  743. {
  744. if( l[i] )
  745. {
  746. coeff_abs = abs(l[i]);
  747. x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[i] :
  748. chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[i] : i), 1 );
  749. x264_cabac_encode_decision( cb, ctx_last + (b_8x8 ? x264_last_coeff_flag_offset_8x8[i] :
  750. chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[i] : i), 0 );
  751. ctx = coeff_abs_level1_ctx[node_ctx] + ctx_level;
  752. if( coeff_abs > 1 )
  753. {
  754. x264_cabac_encode_decision( cb, ctx, 1 );
  755. ctx = levelgt1_ctx[node_ctx] + ctx_level;
  756. if( coeff_abs < 15 )
  757. {
  758. cb->f8_bits_encoded += x264_cabac_size_unary[coeff_abs-1][cb->state[ctx]];
  759. cb->state[ctx] = x264_cabac_transition_unary[coeff_abs-1][cb->state[ctx]];
  760. }
  761. else
  762. {
  763. cb->f8_bits_encoded += x264_cabac_size_unary[14][cb->state[ctx]];
  764. cb->state[ctx] = x264_cabac_transition_unary[14][cb->state[ctx]];
  765. x264_cabac_encode_ue_bypass( cb, 0, coeff_abs - 15 );
  766. }
  767. node_ctx = coeff_abs_level_transition[1][node_ctx];
  768. }
  769. else
  770. {
  771. x264_cabac_encode_decision( cb, ctx, 0 );
  772. node_ctx = coeff_abs_level_transition[0][node_ctx];
  773. x264_cabac_encode_bypass( cb, 0 );
  774. }
  775. }
  776. else
  777. x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[i] :
  778. chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[i] : i), 0 );
  779. }
  780. }
  781. void x264_cabac_block_residual_8x8_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
  782. {
  783. cabac_block_residual_internal( h, cb, ctx_block_cat, l, 1, 0 );
  784. }
  785. void x264_cabac_block_residual_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
  786. {
  787. cabac_block_residual_internal( h, cb, ctx_block_cat, l, 0, 0 );
  788. }
  789. static ALWAYS_INLINE void cabac_block_residual_8x8( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
  790. {
  791. #if ARCH_X86_64 && HAVE_MMX && !defined( __MACH__ )
  792. h->bsf.cabac_block_residual_8x8_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb );
  793. #else
  794. x264_cabac_block_residual_8x8_rd_c( h, cb, ctx_block_cat, l );
  795. #endif
  796. }
  797. static ALWAYS_INLINE void cabac_block_residual( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
  798. {
  799. #if ARCH_X86_64 && HAVE_MMX && !defined( __MACH__ )
  800. h->bsf.cabac_block_residual_rd_internal( l, MB_INTERLACED, ctx_block_cat, cb );
  801. #else
  802. x264_cabac_block_residual_rd_c( h, cb, ctx_block_cat, l );
  803. #endif
  804. }
  805. static void cabac_block_residual_422_dc( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l )
  806. {
  807. cabac_block_residual_internal( h, cb, DCT_CHROMA_DC, l, 0, 1 );
  808. }
  809. #endif
  810. #define cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, b_dc, name )\
  811. do\
  812. {\
  813. int ctxidxinc = cabac_cbf_ctxidxinc( h, ctx_block_cat, i_idx, b_intra, b_dc );\
  814. if( h->mb.cache.non_zero_count[x264_scan8[i_idx]] )\
  815. {\
  816. x264_cabac_encode_decision( cb, ctxidxinc, 1 );\
  817. cabac_block_residual##name( h, cb, ctx_block_cat, l );\
  818. }\
  819. else\
  820. x264_cabac_encode_decision( cb, ctxidxinc, 0 );\
  821. } while( 0 )
  822. #define cabac_block_residual_dc_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\
  823. cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 1, )
  824. #define cabac_block_residual_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\
  825. cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 0, )
  826. #define cabac_block_residual_8x8_cbf( h, cb, ctx_block_cat, i_idx, l, b_intra )\
  827. cabac_block_residual_cbf_internal( h, cb, ctx_block_cat, i_idx, l, b_intra, 0, _8x8 )
  828. #define cabac_block_residual_422_dc_cbf( h, cb, ch, b_intra )\
  829. cabac_block_residual_cbf_internal( h, cb, DCT_CHROMA_DC, CHROMA_DC+(ch), h->dct.chroma_dc[ch], b_intra, 1, _422_dc )
  830. static ALWAYS_INLINE void macroblock_write_cabac_internal( x264_t *h, x264_cabac_t *cb, int plane_count, int chroma )
  831. {
  832. const int i_mb_type = h->mb.i_type;
  833. #if !RDO_SKIP_BS
  834. const int i_mb_pos_start = x264_cabac_pos( cb );
  835. int i_mb_pos_tex;
  836. if( SLICE_MBAFF &&
  837. (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
  838. {
  839. cabac_field_decoding_flag( h, cb );
  840. }
  841. #endif
  842. if( h->sh.i_type == SLICE_TYPE_P )
  843. cabac_mb_header_p( h, cb, i_mb_type, chroma );
  844. else if( h->sh.i_type == SLICE_TYPE_B )
  845. cabac_mb_header_b( h, cb, i_mb_type, chroma );
  846. else //if( h->sh.i_type == SLICE_TYPE_I )
  847. cabac_mb_header_i( h, cb, i_mb_type, SLICE_TYPE_I, chroma );
  848. #if !RDO_SKIP_BS
  849. i_mb_pos_tex = x264_cabac_pos( cb );
  850. h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
  851. if( i_mb_type == I_PCM )
  852. {
  853. bs_t s;
  854. bs_init( &s, cb->p, cb->p_end - cb->p );
  855. for( int p = 0; p < plane_count; p++ )
  856. for( int i = 0; i < 256; i++ )
  857. bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[p][i] );
  858. if( chroma )
  859. for( int ch = 1; ch < 3; ch++ )
  860. for( int i = 0; i < 16>>CHROMA_V_SHIFT; i++ )
  861. for( int j = 0; j < 8; j++ )
  862. bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] );
  863. bs_flush( &s );
  864. cb->p = s.p;
  865. x264_cabac_encode_init_core( cb );
  866. h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
  867. return;
  868. }
  869. #endif
  870. if( i_mb_type != I_16x16 )
  871. {
  872. cabac_cbp_luma( h, cb );
  873. if( chroma )
  874. cabac_cbp_chroma( h, cb );
  875. }
  876. if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
  877. {
  878. cabac_transform_size( h, cb );
  879. }
  880. if( h->mb.i_cbp_luma || (chroma && h->mb.i_cbp_chroma) || i_mb_type == I_16x16 )
  881. {
  882. const int b_intra = IS_INTRA( i_mb_type );
  883. cabac_qp_delta( h, cb );
  884. /* write residual */
  885. if( i_mb_type == I_16x16 )
  886. {
  887. /* DC Luma */
  888. for( int p = 0; p < plane_count; p++ )
  889. {
  890. cabac_block_residual_dc_cbf( h, cb, ctx_cat_plane[DCT_LUMA_DC][p], LUMA_DC+p, h->dct.luma16x16_dc[p], 1 );
  891. /* AC Luma */
  892. if( h->mb.i_cbp_luma )
  893. for( int i = p*16; i < p*16+16; i++ )
  894. cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_AC][p], i, h->dct.luma4x4[i]+1, 1 );
  895. }
  896. }
  897. else if( h->mb.b_transform_8x8 )
  898. {
  899. if( plane_count == 3 )
  900. {
  901. ALIGNED_4( uint8_t nnzbak[3][8] );
  902. /* Stupid nnz munging in the case that neighbors don't have
  903. * 8x8 transform enabled. */
  904. #define BACKUP( dst, src, res )\
  905. dst = src;\
  906. src = res;
  907. #define RESTORE( dst, src, res )\
  908. src = dst;
  909. #define MUNGE_8x8_NNZ( MUNGE )\
  910. if( (h->mb.i_neighbour & MB_LEFT) && !h->mb.mb_transform_size[h->mb.i_mb_left_xy[0]] && !(h->mb.cbp[h->mb.i_mb_left_xy[0]] & 0x1000) )\
  911. {\
  912. MUNGE( nnzbak[0][0], h->mb.cache.non_zero_count[x264_scan8[16*0+ 0] - 1], 0x00 )\
  913. MUNGE( nnzbak[0][1], h->mb.cache.non_zero_count[x264_scan8[16*0+ 2] - 1], 0x00 )\
  914. MUNGE( nnzbak[1][0], h->mb.cache.non_zero_count[x264_scan8[16*1+ 0] - 1], 0x00 )\
  915. MUNGE( nnzbak[1][1], h->mb.cache.non_zero_count[x264_scan8[16*1+ 2] - 1], 0x00 )\
  916. MUNGE( nnzbak[2][0], h->mb.cache.non_zero_count[x264_scan8[16*2+ 0] - 1], 0x00 )\
  917. MUNGE( nnzbak[2][1], h->mb.cache.non_zero_count[x264_scan8[16*2+ 2] - 1], 0x00 )\
  918. }\
  919. if( (h->mb.i_neighbour & MB_LEFT) && !h->mb.mb_transform_size[h->mb.i_mb_left_xy[1]] && !(h->mb.cbp[h->mb.i_mb_left_xy[1]] & 0x1000) )\
  920. {\
  921. MUNGE( nnzbak[0][2], h->mb.cache.non_zero_count[x264_scan8[16*0+ 8] - 1], 0x00 )\
  922. MUNGE( nnzbak[0][3], h->mb.cache.non_zero_count[x264_scan8[16*0+10] - 1], 0x00 )\
  923. MUNGE( nnzbak[1][2], h->mb.cache.non_zero_count[x264_scan8[16*1+ 8] - 1], 0x00 )\
  924. MUNGE( nnzbak[1][3], h->mb.cache.non_zero_count[x264_scan8[16*1+10] - 1], 0x00 )\
  925. MUNGE( nnzbak[2][2], h->mb.cache.non_zero_count[x264_scan8[16*2+ 8] - 1], 0x00 )\
  926. MUNGE( nnzbak[2][3], h->mb.cache.non_zero_count[x264_scan8[16*2+10] - 1], 0x00 )\
  927. }\
  928. if( (h->mb.i_neighbour & MB_TOP) && !h->mb.mb_transform_size[h->mb.i_mb_top_xy] && !(h->mb.cbp[h->mb.i_mb_top_xy] & 0x1000) )\
  929. {\
  930. MUNGE( M32( &nnzbak[0][4] ), M32( &h->mb.cache.non_zero_count[x264_scan8[16*0] - 8] ), 0x00000000U )\
  931. MUNGE( M32( &nnzbak[1][4] ), M32( &h->mb.cache.non_zero_count[x264_scan8[16*1] - 8] ), 0x00000000U )\
  932. MUNGE( M32( &nnzbak[2][4] ), M32( &h->mb.cache.non_zero_count[x264_scan8[16*2] - 8] ), 0x00000000U )\
  933. }
  934. MUNGE_8x8_NNZ( BACKUP )
  935. for( int p = 0; p < 3; p++ )
  936. FOREACH_BIT( i, 0, h->mb.i_cbp_luma )
  937. cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i*4+p*16, h->dct.luma8x8[i+p*4], b_intra );
  938. MUNGE_8x8_NNZ( RESTORE )
  939. }
  940. else
  941. {
  942. FOREACH_BIT( i, 0, h->mb.i_cbp_luma )
  943. cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i] );
  944. }
  945. }
  946. else
  947. {
  948. for( int p = 0; p < plane_count; p++ )
  949. FOREACH_BIT( i8x8, 0, h->mb.i_cbp_luma )
  950. for( int i = 0; i < 4; i++ )
  951. cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i+i8x8*4+p*16, h->dct.luma4x4[i+i8x8*4+p*16], b_intra );
  952. }
  953. if( chroma && h->mb.i_cbp_chroma ) /* Chroma DC residual present */
  954. {
  955. if( CHROMA_FORMAT == CHROMA_422 )
  956. {
  957. cabac_block_residual_422_dc_cbf( h, cb, 0, b_intra );
  958. cabac_block_residual_422_dc_cbf( h, cb, 1, b_intra );
  959. }
  960. else
  961. {
  962. cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0], b_intra );
  963. cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1], b_intra );
  964. }
  965. if( h->mb.i_cbp_chroma == 2 ) /* Chroma AC residual present */
  966. {
  967. int step = 8 << CHROMA_V_SHIFT;
  968. for( int i = 16; i < 3*16; i += step )
  969. for( int j = i; j < i+4; j++ )
  970. cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, j, h->dct.luma4x4[j]+1, b_intra );
  971. }
  972. }
  973. }
  974. #if !RDO_SKIP_BS
  975. h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
  976. #endif
  977. }
  978. void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
  979. {
  980. if( CHROMA444 )
  981. macroblock_write_cabac_internal( h, cb, 3, 0 );
  982. else if( CHROMA_FORMAT )
  983. macroblock_write_cabac_internal( h, cb, 1, 1 );
  984. else
  985. macroblock_write_cabac_internal( h, cb, 1, 0 );
  986. }
  987. #if RDO_SKIP_BS
  988. /*****************************************************************************
  989. * RD only; doesn't generate a valid bitstream
  990. * doesn't write cbp or chroma dc (I don't know how much this matters)
  991. * doesn't write ref (never varies between calls, so no point in doing so)
  992. * only writes subpartition for p8x8, needed for sub-8x8 mode decision RDO
  993. * works on all partition sizes except 16x16
  994. *****************************************************************************/
  995. static void partition_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_pixel )
  996. {
  997. const int i_mb_type = h->mb.i_type;
  998. int b_8x16 = h->mb.i_partition == D_8x16;
  999. int plane_count = CHROMA444 ? 3 : 1;
  1000. if( i_mb_type == P_8x8 )
  1001. {
  1002. cabac_8x8_mvd( h, cb, i8 );
  1003. cabac_subpartition_p( cb, h->mb.i_sub_partition[i8] );
  1004. }
  1005. else if( i_mb_type == P_L0 )
  1006. cabac_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
  1007. else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
  1008. {
  1009. if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) cabac_mvd( h, cb, 0, 4*i8, 4>>b_8x16, 2<<b_8x16 );
  1010. if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) cabac_mvd( h, cb, 1, 4*i8, 4>>b_8x16, 2<<b_8x16 );
  1011. }
  1012. else //if( i_mb_type == B_8x8 )
  1013. {
  1014. if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
  1015. cabac_mvd( h, cb, 0, 4*i8, 2, 2 );
  1016. if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
  1017. cabac_mvd( h, cb, 1, 4*i8, 2, 2 );
  1018. }
  1019. for( int j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
  1020. {
  1021. if( h->mb.i_cbp_luma & (1 << i8) )
  1022. {
  1023. if( h->mb.b_transform_8x8 )
  1024. {
  1025. if( CHROMA444 )
  1026. for( int p = 0; p < 3; p++ )
  1027. cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i8*4+p*16, h->dct.luma8x8[i8+p*4], 0 );
  1028. else
  1029. cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i8] );
  1030. }
  1031. else
  1032. for( int p = 0; p < plane_count; p++ )
  1033. for( int i4 = 0; i4 < 4; i4++ )
  1034. cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i4+i8*4+p*16, h->dct.luma4x4[i4+i8*4+p*16], 0 );
  1035. }
  1036. if( h->mb.i_cbp_chroma )
  1037. {
  1038. if( CHROMA_FORMAT == CHROMA_422 )
  1039. {
  1040. int offset = (5*i8) & 0x09;
  1041. cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 16+offset, h->dct.luma4x4[16+offset]+1, 0 );
  1042. cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 18+offset, h->dct.luma4x4[18+offset]+1, 0 );
  1043. cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 32+offset, h->dct.luma4x4[32+offset]+1, 0 );
  1044. cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 34+offset, h->dct.luma4x4[34+offset]+1, 0 );
  1045. }
  1046. else
  1047. {
  1048. cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1, 0 );
  1049. cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, 32+i8, h->dct.luma4x4[32+i8]+1, 0 );
  1050. }
  1051. }
  1052. i8 += x264_pixel_size[i_pixel].h >> 3;
  1053. }
  1054. }
  1055. static void subpartition_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_pixel )
  1056. {
  1057. int b_8x4 = i_pixel == PIXEL_8x4;
  1058. int plane_count = CHROMA444 ? 3 : 1;
  1059. if( i_pixel == PIXEL_4x4 )
  1060. cabac_mvd( h, cb, 0, i4, 1, 1 );
  1061. else
  1062. cabac_mvd( h, cb, 0, i4, 1+b_8x4, 2-b_8x4 );
  1063. for( int p = 0; p < plane_count; p++ )
  1064. {
  1065. cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], p*16+i4, h->dct.luma4x4[p*16+i4], 0 );
  1066. if( i_pixel != PIXEL_4x4 )
  1067. cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], p*16+i4+2-b_8x4, h->dct.luma4x4[p*16+i4+2-b_8x4], 0 );
  1068. }
  1069. }
  1070. static void partition_i8x8_size_cabac( x264_t *h, x264_cabac_t *cb, int i8, int i_mode )
  1071. {
  1072. const int i_pred = x264_mb_predict_intra4x4_mode( h, 4*i8 );
  1073. i_mode = x264_mb_pred_mode4x4_fix( i_mode );
  1074. cabac_intra4x4_pred_mode( cb, i_pred, i_mode );
  1075. cabac_cbp_luma( h, cb );
  1076. if( h->mb.i_cbp_luma & (1 << i8) )
  1077. {
  1078. if( CHROMA444 )
  1079. for( int p = 0; p < 3; p++ )
  1080. cabac_block_residual_8x8_cbf( h, cb, ctx_cat_plane[DCT_LUMA_8x8][p], i8*4+p*16, h->dct.luma8x8[i8+p*4], 1 );
  1081. else
  1082. cabac_block_residual_8x8( h, cb, DCT_LUMA_8x8, h->dct.luma8x8[i8] );
  1083. }
  1084. }
  1085. static void partition_i4x4_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_mode )
  1086. {
  1087. const int i_pred = x264_mb_predict_intra4x4_mode( h, i4 );
  1088. int plane_count = CHROMA444 ? 3 : 1;
  1089. i_mode = x264_mb_pred_mode4x4_fix( i_mode );
  1090. cabac_intra4x4_pred_mode( cb, i_pred, i_mode );
  1091. for( int p = 0; p < plane_count; p++ )
  1092. cabac_block_residual_cbf( h, cb, ctx_cat_plane[DCT_LUMA_4x4][p], i4+p*16, h->dct.luma4x4[i4+p*16], 1 );
  1093. }
  1094. static void chroma_size_cabac( x264_t *h, x264_cabac_t *cb )
  1095. {
  1096. cabac_intra_chroma_pred_mode( h, cb );
  1097. cabac_cbp_chroma( h, cb );
  1098. if( h->mb.i_cbp_chroma )
  1099. {
  1100. if( CHROMA_FORMAT == CHROMA_422 )
  1101. {
  1102. cabac_block_residual_422_dc_cbf( h, cb, 0, 1 );
  1103. cabac_block_residual_422_dc_cbf( h, cb, 1, 1 );
  1104. }
  1105. else
  1106. {
  1107. cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0], 1 );
  1108. cabac_block_residual_dc_cbf( h, cb, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1], 1 );
  1109. }
  1110. if( h->mb.i_cbp_chroma == 2 )
  1111. {
  1112. int step = 8 << CHROMA_V_SHIFT;
  1113. for( int i = 16; i < 3*16; i += step )
  1114. for( int j = i; j < i+4; j++ )
  1115. cabac_block_residual_cbf( h, cb, DCT_CHROMA_AC, j, h->dct.luma4x4[j]+1, 1 );
  1116. }
  1117. }
  1118. }
  1119. #endif