predict-c.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609
  1. /*****************************************************************************
  2. * predict-c.c: intra prediction
  3. *****************************************************************************
  4. * Copyright (C) 2003-2018 x264 project
  5. *
  6. * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  7. * Loren Merritt <lorenm@u.washington.edu>
  8. * Fiona Glaser <fiona@x264.com>
  9. *
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License as published by
  12. * the Free Software Foundation; either version 2 of the License, or
  13. * (at your option) any later version.
  14. *
  15. * This program is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. * GNU General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU General Public License
  21. * along with this program; if not, write to the Free Software
  22. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
  23. *
  24. * This program is also available under a commercial proprietary license.
  25. * For more information, contact us at licensing@x264.com.
  26. *****************************************************************************/
  27. #include "common/common.h"
  28. #include "predict.h"
  29. #include "pixel.h"
  30. #define PREDICT_P_SUM(j,i)\
  31. H += i * ( src[j+i - FDEC_STRIDE ] - src[j-i - FDEC_STRIDE ] );\
  32. V += i * ( src[(j+i)*FDEC_STRIDE -1] - src[(j-i)*FDEC_STRIDE -1] );
  33. #if HAVE_X86_INLINE_ASM
  34. #if HIGH_BIT_DEPTH
  35. ALIGNED_16( static const int16_t pw_12345678[8] ) = {1,2,3,4,5,6,7,8};
  36. ALIGNED_16( static const int16_t pw_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
  37. ALIGNED_16( static const int16_t pw_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
  38. #else // !HIGH_BIT_DEPTH
  39. ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};
  40. ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
  41. ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
  42. #endif // HIGH_BIT_DEPTH
  43. #endif // HAVE_X86_INLINE_ASM
  44. #define PREDICT_16x16_P_CORE\
  45. int H = 0;\
  46. int V = 0;\
  47. PREDICT_P_SUM(7,1)\
  48. PREDICT_P_SUM(7,2)\
  49. PREDICT_P_SUM(7,3)\
  50. PREDICT_P_SUM(7,4)\
  51. PREDICT_P_SUM(7,5)\
  52. PREDICT_P_SUM(7,6)\
  53. PREDICT_P_SUM(7,7)\
  54. PREDICT_P_SUM(7,8)
  55. #define PREDICT_16x16_P_END(name)\
  56. int a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );\
  57. int b = ( 5 * H + 32 ) >> 6;\
  58. int c = ( 5 * V + 32 ) >> 6;\
  59. int i00 = a - b * 7 - c * 7 + 16;\
  60. /* b*15 + c*15 can overflow: it's easier to just branch away in this rare case
  61. * than to try to consider it in the asm. */\
  62. if( BIT_DEPTH > 8 && (i00 > 0x7fff || abs(b) > 1092 || abs(c) > 1092) )\
  63. x264_predict_16x16_p_c( src );\
  64. else\
  65. x264_predict_16x16_p_core_##name( src, i00, b, c );
  66. #define PREDICT_16x16_P(name, name2)\
  67. static void predict_16x16_p_##name( pixel *src )\
  68. {\
  69. PREDICT_16x16_P_CORE\
  70. PREDICT_16x16_P_END(name2)\
  71. }
  72. #if HAVE_X86_INLINE_ASM
  73. #if HIGH_BIT_DEPTH
  74. #define PREDICT_16x16_P_ASM\
  75. asm (\
  76. "movdqu %1, %%xmm1 \n"\
  77. "movdqa %2, %%xmm0 \n"\
  78. "pmaddwd %3, %%xmm0 \n"\
  79. "pmaddwd %4, %%xmm1 \n"\
  80. "paddd %%xmm1, %%xmm0 \n"\
  81. "movhlps %%xmm0, %%xmm1 \n"\
  82. "paddd %%xmm1, %%xmm0 \n"\
  83. "pshuflw $14, %%xmm0, %%xmm1 \n"\
  84. "paddd %%xmm1, %%xmm0 \n"\
  85. "movd %%xmm0, %0 \n"\
  86. :"=r"(H)\
  87. :"m"(src[-FDEC_STRIDE-1]), "m"(src[-FDEC_STRIDE+8]),\
  88. "m"(*pw_12345678), "m"(*pw_m87654321)\
  89. );
  90. #else // !HIGH_BIT_DEPTH
  91. #define PREDICT_16x16_P_ASM\
  92. asm (\
  93. "movq %1, %%mm1 \n"\
  94. "movq %2, %%mm0 \n"\
  95. "palignr $7, %3, %%mm1 \n"\
  96. "pmaddubsw %4, %%mm0 \n"\
  97. "pmaddubsw %5, %%mm1 \n"\
  98. "paddw %%mm1, %%mm0 \n"\
  99. "pshufw $14, %%mm0, %%mm1 \n"\
  100. "paddw %%mm1, %%mm0 \n"\
  101. "pshufw $1, %%mm0, %%mm1 \n"\
  102. "paddw %%mm1, %%mm0 \n"\
  103. "movd %%mm0, %0 \n"\
  104. "movswl %w0, %0 \n"\
  105. :"=r"(H)\
  106. :"m"(src[-FDEC_STRIDE]), "m"(src[-FDEC_STRIDE+8]),\
  107. "m"(src[-FDEC_STRIDE-8]), "m"(*pb_12345678), "m"(*pb_m87654321)\
  108. );
  109. #endif // HIGH_BIT_DEPTH
  110. #define PREDICT_16x16_P_CORE_INLINE\
  111. int H, V;\
  112. PREDICT_16x16_P_ASM\
  113. V = 8 * ( src[15*FDEC_STRIDE-1] - src[-1*FDEC_STRIDE-1] )\
  114. + 7 * ( src[14*FDEC_STRIDE-1] - src[ 0*FDEC_STRIDE-1] )\
  115. + 6 * ( src[13*FDEC_STRIDE-1] - src[ 1*FDEC_STRIDE-1] )\
  116. + 5 * ( src[12*FDEC_STRIDE-1] - src[ 2*FDEC_STRIDE-1] )\
  117. + 4 * ( src[11*FDEC_STRIDE-1] - src[ 3*FDEC_STRIDE-1] )\
  118. + 3 * ( src[10*FDEC_STRIDE-1] - src[ 4*FDEC_STRIDE-1] )\
  119. + 2 * ( src[ 9*FDEC_STRIDE-1] - src[ 5*FDEC_STRIDE-1] )\
  120. + 1 * ( src[ 8*FDEC_STRIDE-1] - src[ 6*FDEC_STRIDE-1] );
  121. #define PREDICT_16x16_P_INLINE(name, name2)\
  122. static void predict_16x16_p_##name( pixel *src )\
  123. {\
  124. PREDICT_16x16_P_CORE_INLINE\
  125. PREDICT_16x16_P_END(name2)\
  126. }
  127. #else // !HAVE_X86_INLINE_ASM
  128. #define PREDICT_16x16_P_INLINE(name, name2) PREDICT_16x16_P(name, name2)
  129. #endif // HAVE_X86_INLINE_ASM
  130. #if HIGH_BIT_DEPTH
  131. PREDICT_16x16_P_INLINE( sse2, sse2 )
  132. #else // !HIGH_BIT_DEPTH
  133. #if !ARCH_X86_64
  134. PREDICT_16x16_P( mmx2, mmx2 )
  135. #endif // !ARCH_X86_64
  136. PREDICT_16x16_P( sse2, sse2 )
  137. #if HAVE_X86_INLINE_ASM
  138. PREDICT_16x16_P_INLINE( ssse3, sse2 )
  139. #endif // HAVE_X86_INLINE_ASM
  140. PREDICT_16x16_P_INLINE( avx, avx )
  141. #endif // HIGH_BIT_DEPTH
  142. PREDICT_16x16_P_INLINE( avx2, avx2 )
  143. #define PREDICT_8x16C_P_CORE\
  144. int H = 0, V = 0;\
  145. for( int i = 0; i < 4; i++ )\
  146. H += ( i + 1 ) * ( src[4 + i - FDEC_STRIDE] - src[2 - i - FDEC_STRIDE] );\
  147. for( int i = 0; i < 8; i++ )\
  148. V += ( i + 1 ) * ( src[-1 + (i+8)*FDEC_STRIDE] - src[-1 + (6-i)*FDEC_STRIDE] );
  149. #if HIGH_BIT_DEPTH
  150. #define PREDICT_8x16C_P_END(name)\
  151. int a = 16 * ( src[-1 + 15*FDEC_STRIDE] + src[7 - FDEC_STRIDE] );\
  152. int b = ( 17 * H + 16 ) >> 5;\
  153. int c = ( 5 * V + 32 ) >> 6;\
  154. x264_predict_8x16c_p_core_##name( src, a, b, c );
  155. #else // !HIGH_BIT_DEPTH
  156. #define PREDICT_8x16C_P_END(name)\
  157. int a = 16 * ( src[-1 + 15*FDEC_STRIDE] + src[7 - FDEC_STRIDE] );\
  158. int b = ( 17 * H + 16 ) >> 5;\
  159. int c = ( 5 * V + 32 ) >> 6;\
  160. int i00 = a -3*b -7*c + 16;\
  161. x264_predict_8x16c_p_core_##name( src, i00, b, c );
  162. #endif // HIGH_BIT_DEPTH
  163. #define PREDICT_8x16C_P(name)\
  164. static void predict_8x16c_p_##name( pixel *src )\
  165. {\
  166. PREDICT_8x16C_P_CORE\
  167. PREDICT_8x16C_P_END(name)\
  168. }
  169. #if !ARCH_X86_64 && !HIGH_BIT_DEPTH
  170. PREDICT_8x16C_P( mmx2 )
  171. #endif // !ARCH_X86_64 && !HIGH_BIT_DEPTH
  172. PREDICT_8x16C_P( sse2 )
  173. PREDICT_8x16C_P( avx )
  174. PREDICT_8x16C_P( avx2 )
  175. #define PREDICT_8x8C_P_CORE\
  176. int H = 0;\
  177. int V = 0;\
  178. PREDICT_P_SUM(3,1)\
  179. PREDICT_P_SUM(3,2)\
  180. PREDICT_P_SUM(3,3)\
  181. PREDICT_P_SUM(3,4)
  182. #if HIGH_BIT_DEPTH
  183. #define PREDICT_8x8C_P_END(name)\
  184. int a = 16 * ( src[7*FDEC_STRIDE -1] + src[7 - FDEC_STRIDE] );\
  185. int b = ( 17 * H + 16 ) >> 5;\
  186. int c = ( 17 * V + 16 ) >> 5;\
  187. x264_predict_8x8c_p_core_##name( src, a, b, c );
  188. #else // !HIGH_BIT_DEPTH
  189. #define PREDICT_8x8C_P_END(name)\
  190. int a = 16 * ( src[7*FDEC_STRIDE -1] + src[7 - FDEC_STRIDE] );\
  191. int b = ( 17 * H + 16 ) >> 5;\
  192. int c = ( 17 * V + 16 ) >> 5;\
  193. int i00 = a -3*b -3*c + 16;\
  194. x264_predict_8x8c_p_core_##name( src, i00, b, c );
  195. #endif // HIGH_BIT_DEPTH
  196. #define PREDICT_8x8C_P(name, name2)\
  197. static void predict_8x8c_p_##name( pixel *src )\
  198. {\
  199. PREDICT_8x8C_P_CORE\
  200. PREDICT_8x8C_P_END(name2)\
  201. }
  202. #if HAVE_X86_INLINE_ASM
  203. #if HIGH_BIT_DEPTH
  204. #define PREDICT_8x8C_P_ASM\
  205. asm (\
  206. "movdqa %1, %%xmm0 \n"\
  207. "pmaddwd %2, %%xmm0 \n"\
  208. "movhlps %%xmm0, %%xmm1 \n"\
  209. "paddd %%xmm1, %%xmm0 \n"\
  210. "pshuflw $14, %%xmm0, %%xmm1 \n"\
  211. "paddd %%xmm1, %%xmm0 \n"\
  212. "movd %%xmm0, %0 \n"\
  213. :"=r"(H)\
  214. :"m"(src[-FDEC_STRIDE]), "m"(*pw_m32101234)\
  215. );
  216. #else // !HIGH_BIT_DEPTH
  217. #define PREDICT_8x8C_P_ASM\
  218. asm (\
  219. "movq %1, %%mm0 \n"\
  220. "pmaddubsw %2, %%mm0 \n"\
  221. "pshufw $14, %%mm0, %%mm1 \n"\
  222. "paddw %%mm1, %%mm0 \n"\
  223. "pshufw $1, %%mm0, %%mm1 \n"\
  224. "paddw %%mm1, %%mm0 \n"\
  225. "movd %%mm0, %0 \n"\
  226. "movswl %w0, %0 \n"\
  227. :"=r"(H)\
  228. :"m"(src[-FDEC_STRIDE]), "m"(*pb_m32101234)\
  229. );
  230. #endif // HIGH_BIT_DEPTH
  231. #define PREDICT_8x8C_P_CORE_INLINE\
  232. int H, V;\
  233. PREDICT_8x8C_P_ASM\
  234. V = 1 * ( src[4*FDEC_STRIDE -1] - src[ 2*FDEC_STRIDE -1] )\
  235. + 2 * ( src[5*FDEC_STRIDE -1] - src[ 1*FDEC_STRIDE -1] )\
  236. + 3 * ( src[6*FDEC_STRIDE -1] - src[ 0*FDEC_STRIDE -1] )\
  237. + 4 * ( src[7*FDEC_STRIDE -1] - src[-1*FDEC_STRIDE -1] );\
  238. H += -4 * src[-1*FDEC_STRIDE -1];
  239. #define PREDICT_8x8C_P_INLINE(name, name2)\
  240. static void predict_8x8c_p_##name( pixel *src )\
  241. {\
  242. PREDICT_8x8C_P_CORE_INLINE\
  243. PREDICT_8x8C_P_END(name2)\
  244. }
  245. #else // !HAVE_X86_INLINE_ASM
  246. #define PREDICT_8x8C_P_INLINE(name, name2) PREDICT_8x8C_P(name, name2)
  247. #endif // HAVE_X86_INLINE_ASM
  248. #if HIGH_BIT_DEPTH
  249. PREDICT_8x8C_P_INLINE( sse2, sse2 )
  250. #else //!HIGH_BIT_DEPTH
  251. #if !ARCH_X86_64
  252. PREDICT_8x8C_P( mmx2, mmx2 )
  253. #endif // !ARCH_X86_64
  254. PREDICT_8x8C_P( sse2, sse2 )
  255. #if HAVE_X86_INLINE_ASM
  256. PREDICT_8x8C_P_INLINE( ssse3, sse2 )
  257. #endif // HAVE_X86_INLINE_ASM
  258. #endif // HIGH_BIT_DEPTH
  259. PREDICT_8x8C_P_INLINE( avx, avx )
  260. PREDICT_8x8C_P_INLINE( avx2, avx2 )
  261. #if ARCH_X86_64 && !HIGH_BIT_DEPTH
  262. static void predict_8x8c_dc_left( uint8_t *src )
  263. {
  264. int y;
  265. uint32_t s0 = 0, s1 = 0;
  266. uint64_t dc0, dc1;
  267. for( y = 0; y < 4; y++ )
  268. {
  269. s0 += src[y * FDEC_STRIDE - 1];
  270. s1 += src[(y+4) * FDEC_STRIDE - 1];
  271. }
  272. dc0 = (( s0 + 2 ) >> 2) * 0x0101010101010101ULL;
  273. dc1 = (( s1 + 2 ) >> 2) * 0x0101010101010101ULL;
  274. for( y = 0; y < 4; y++ )
  275. {
  276. M64( src ) = dc0;
  277. src += FDEC_STRIDE;
  278. }
  279. for( y = 0; y < 4; y++ )
  280. {
  281. M64( src ) = dc1;
  282. src += FDEC_STRIDE;
  283. }
  284. }
  285. #endif // ARCH_X86_64 && !HIGH_BIT_DEPTH
  286. /****************************************************************************
  287. * Exported functions:
  288. ****************************************************************************/
  289. void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
  290. {
  291. if( !(cpu&X264_CPU_MMX2) )
  292. return;
  293. pf[I_PRED_16x16_V] = x264_predict_16x16_v_mmx2;
  294. pf[I_PRED_16x16_H] = x264_predict_16x16_h_mmx2;
  295. #if HIGH_BIT_DEPTH
  296. if( !(cpu&X264_CPU_SSE) )
  297. return;
  298. pf[I_PRED_16x16_V] = x264_predict_16x16_v_sse;
  299. if( !(cpu&X264_CPU_SSE2) )
  300. return;
  301. pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_sse2;
  302. pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_sse2;
  303. pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2;
  304. pf[I_PRED_16x16_H] = x264_predict_16x16_h_sse2;
  305. pf[I_PRED_16x16_P] = predict_16x16_p_sse2;
  306. if( !(cpu&X264_CPU_AVX) )
  307. return;
  308. pf[I_PRED_16x16_V] = x264_predict_16x16_v_avx;
  309. if( !(cpu&X264_CPU_AVX2) )
  310. return;
  311. pf[I_PRED_16x16_H] = x264_predict_16x16_h_avx2;
  312. #else
  313. #if !ARCH_X86_64
  314. pf[I_PRED_16x16_P] = predict_16x16_p_mmx2;
  315. #endif
  316. if( !(cpu&X264_CPU_SSE) )
  317. return;
  318. pf[I_PRED_16x16_V] = x264_predict_16x16_v_sse;
  319. if( !(cpu&X264_CPU_SSE2) )
  320. return;
  321. pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_sse2;
  322. if( cpu&X264_CPU_SSE2_IS_SLOW )
  323. return;
  324. pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_sse2;
  325. pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_sse2;
  326. pf[I_PRED_16x16_P] = predict_16x16_p_sse2;
  327. if( !(cpu&X264_CPU_SSSE3) )
  328. return;
  329. if( !(cpu&X264_CPU_SLOW_PSHUFB) )
  330. pf[I_PRED_16x16_H] = x264_predict_16x16_h_ssse3;
  331. #if HAVE_X86_INLINE_ASM
  332. pf[I_PRED_16x16_P] = predict_16x16_p_ssse3;
  333. #endif
  334. if( !(cpu&X264_CPU_AVX) )
  335. return;
  336. pf[I_PRED_16x16_P] = predict_16x16_p_avx;
  337. #endif // HIGH_BIT_DEPTH
  338. if( cpu&X264_CPU_AVX2 )
  339. {
  340. pf[I_PRED_16x16_P] = predict_16x16_p_avx2;
  341. pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_avx2;
  342. pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_avx2;
  343. pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_avx2;
  344. }
  345. }
  346. void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
  347. {
  348. if( !(cpu&X264_CPU_MMX) )
  349. return;
  350. #if HIGH_BIT_DEPTH
  351. pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_mmx;
  352. if( !(cpu&X264_CPU_MMX2) )
  353. return;
  354. pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_mmx2;
  355. pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_mmx2;
  356. if( !(cpu&X264_CPU_SSE) )
  357. return;
  358. pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_sse;
  359. if( !(cpu&X264_CPU_SSE2) )
  360. return;
  361. pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_sse2;
  362. pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_sse2;
  363. pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_sse2;
  364. pf[I_PRED_CHROMA_P] = predict_8x8c_p_sse2;
  365. if( !(cpu&X264_CPU_AVX) )
  366. return;
  367. pf[I_PRED_CHROMA_P] = predict_8x8c_p_avx;
  368. if( !(cpu&X264_CPU_AVX2) )
  369. return;
  370. pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_avx2;
  371. #else
  372. #if ARCH_X86_64
  373. pf[I_PRED_CHROMA_DC_LEFT] = predict_8x8c_dc_left;
  374. #endif
  375. pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_mmx;
  376. if( !(cpu&X264_CPU_MMX2) )
  377. return;
  378. pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_mmx2;
  379. pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_mmx2;
  380. #if !ARCH_X86_64
  381. pf[I_PRED_CHROMA_P] = predict_8x8c_p_mmx2;
  382. #endif
  383. pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_mmx2;
  384. if( !(cpu&X264_CPU_SSE2) )
  385. return;
  386. pf[I_PRED_CHROMA_P] = predict_8x8c_p_sse2;
  387. if( !(cpu&X264_CPU_SSSE3) )
  388. return;
  389. pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_ssse3;
  390. #if HAVE_X86_INLINE_ASM
  391. pf[I_PRED_CHROMA_P] = predict_8x8c_p_ssse3;
  392. #endif
  393. if( !(cpu&X264_CPU_AVX) )
  394. return;
  395. pf[I_PRED_CHROMA_P] = predict_8x8c_p_avx;
  396. #endif // HIGH_BIT_DEPTH
  397. if( cpu&X264_CPU_AVX2 )
  398. {
  399. pf[I_PRED_CHROMA_P] = predict_8x8c_p_avx2;
  400. }
  401. }
  402. void x264_predict_8x16c_init_mmx( int cpu, x264_predict_t pf[7] )
  403. {
  404. if( !(cpu&X264_CPU_MMX) )
  405. return;
  406. #if HIGH_BIT_DEPTH
  407. if( !(cpu&X264_CPU_MMX2) )
  408. return;
  409. pf[I_PRED_CHROMA_DC] = x264_predict_8x16c_dc_mmx2;
  410. pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_mmx2;
  411. if( !(cpu&X264_CPU_SSE) )
  412. return;
  413. pf[I_PRED_CHROMA_V] = x264_predict_8x16c_v_sse;
  414. if( !(cpu&X264_CPU_SSE2) )
  415. return;
  416. pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x16c_dc_top_sse2;
  417. pf[I_PRED_CHROMA_DC] = x264_predict_8x16c_dc_sse2;
  418. pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_sse2;
  419. pf[I_PRED_CHROMA_P] = predict_8x16c_p_sse2;
  420. if( !(cpu&X264_CPU_AVX) )
  421. return;
  422. pf[I_PRED_CHROMA_P] = predict_8x16c_p_avx;
  423. if( !(cpu&X264_CPU_AVX2) )
  424. return;
  425. pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_avx2;
  426. #else
  427. pf[I_PRED_CHROMA_V] = x264_predict_8x16c_v_mmx;
  428. if( !(cpu&X264_CPU_MMX2) )
  429. return;
  430. pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x16c_dc_top_mmx2;
  431. pf[I_PRED_CHROMA_DC] = x264_predict_8x16c_dc_mmx2;
  432. pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_mmx2;
  433. #if !ARCH_X86_64
  434. pf[I_PRED_CHROMA_P] = predict_8x16c_p_mmx2;
  435. #endif
  436. if( !(cpu&X264_CPU_SSE2) )
  437. return;
  438. pf[I_PRED_CHROMA_P] = predict_8x16c_p_sse2;
  439. if( !(cpu&X264_CPU_SSSE3) )
  440. return;
  441. pf[I_PRED_CHROMA_H] = x264_predict_8x16c_h_ssse3;
  442. if( !(cpu&X264_CPU_AVX) )
  443. return;
  444. pf[I_PRED_CHROMA_P] = predict_8x16c_p_avx;
  445. #endif // HIGH_BIT_DEPTH
  446. if( cpu&X264_CPU_AVX2 )
  447. {
  448. pf[I_PRED_CHROMA_P] = predict_8x16c_p_avx2;
  449. }
  450. }
  451. void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter )
  452. {
  453. if( !(cpu&X264_CPU_MMX2) )
  454. return;
  455. #if HIGH_BIT_DEPTH
  456. if( !(cpu&X264_CPU_SSE) )
  457. return;
  458. pf[I_PRED_8x8_V] = x264_predict_8x8_v_sse;
  459. if( !(cpu&X264_CPU_SSE2) )
  460. return;
  461. pf[I_PRED_8x8_H] = x264_predict_8x8_h_sse2;
  462. pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_sse2;
  463. pf[I_PRED_8x8_DC_TOP] = x264_predict_8x8_dc_top_sse2;
  464. pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_sse2;
  465. pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_sse2;
  466. pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_sse2;
  467. pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_sse2;
  468. pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_sse2;
  469. pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_sse2;
  470. pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_sse2;
  471. *predict_8x8_filter = x264_predict_8x8_filter_sse2;
  472. if( !(cpu&X264_CPU_SSSE3) )
  473. return;
  474. pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_ssse3;
  475. pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_ssse3;
  476. pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_ssse3;
  477. pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_ssse3;
  478. pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_ssse3;
  479. pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_ssse3;
  480. *predict_8x8_filter = x264_predict_8x8_filter_ssse3;
  481. if( cpu&X264_CPU_CACHELINE_64 )
  482. {
  483. pf[I_PRED_8x8_DDL]= x264_predict_8x8_ddl_cache64_ssse3;
  484. pf[I_PRED_8x8_DDR]= x264_predict_8x8_ddr_cache64_ssse3;
  485. }
  486. if( !(cpu&X264_CPU_AVX) )
  487. return;
  488. pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_avx;
  489. pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_avx;
  490. pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_avx;
  491. pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_avx;
  492. *predict_8x8_filter = x264_predict_8x8_filter_avx;
  493. #else
  494. pf[I_PRED_8x8_V] = x264_predict_8x8_v_mmx2;
  495. pf[I_PRED_8x8_H] = x264_predict_8x8_h_mmx2;
  496. pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_mmx2;
  497. pf[I_PRED_8x8_DC_TOP] = x264_predict_8x8_dc_top_mmx2;
  498. pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_mmx2;
  499. pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_mmx2;
  500. pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_mmx2;
  501. *predict_8x8_filter = x264_predict_8x8_filter_mmx2;
  502. #if ARCH_X86
  503. pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_mmx2;
  504. pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_mmx2;
  505. pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_mmx2;
  506. pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_mmx2;
  507. #endif
  508. if( !(cpu&X264_CPU_SSE2) )
  509. return;
  510. pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_sse2;
  511. pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_sse2;
  512. pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_sse2;
  513. pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_sse2;
  514. pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_sse2;
  515. pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_sse2;
  516. if( !(cpu&X264_CPU_SSSE3) )
  517. return;
  518. if( !(cpu&X264_CPU_SLOW_PALIGNR) )
  519. {
  520. pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_ssse3;
  521. pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_ssse3;
  522. }
  523. pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_ssse3;
  524. *predict_8x8_filter = x264_predict_8x8_filter_ssse3;
  525. if( !(cpu&X264_CPU_AVX) )
  526. return;
  527. pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_avx;
  528. pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_avx;
  529. pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_avx;
  530. pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_avx;
  531. pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_avx;
  532. #endif // HIGH_BIT_DEPTH
  533. }
  534. void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
  535. {
  536. if( !(cpu&X264_CPU_MMX2) )
  537. return;
  538. pf[I_PRED_4x4_DC] = x264_predict_4x4_dc_mmx2;
  539. pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmx2;
  540. pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_mmx2;
  541. pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_mmx2;
  542. pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_mmx2;
  543. pf[I_PRED_4x4_HU] = x264_predict_4x4_hu_mmx2;
  544. #if HIGH_BIT_DEPTH
  545. if( !(cpu&X264_CPU_SSE2) )
  546. return;
  547. pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_sse2;
  548. pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_sse2;
  549. pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_sse2;
  550. pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_sse2;
  551. pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_sse2;
  552. if( !(cpu&X264_CPU_SSSE3) )
  553. return;
  554. pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
  555. pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3;
  556. pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_ssse3;
  557. if( !(cpu&X264_CPU_AVX) )
  558. return;
  559. pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_avx;
  560. pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_avx;
  561. pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_avx;
  562. pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_avx;
  563. pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_avx;
  564. if( !(cpu&X264_CPU_AVX2) )
  565. return;
  566. pf[I_PRED_4x4_H] = x264_predict_4x4_h_avx2;
  567. #else
  568. pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmx2;
  569. if( !(cpu&X264_CPU_SSSE3) )
  570. return;
  571. pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
  572. pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3;
  573. pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_ssse3;
  574. if( cpu&X264_CPU_CACHELINE_64 )
  575. pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_cache64_ssse3;
  576. #endif // HIGH_BIT_DEPTH
  577. }