rectangle.h 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. /*****************************************************************************
  2. * rectangle.h: rectangle filling
  3. *****************************************************************************
  4. * Copyright (C) 2003-2018 x264 project
  5. *
  6. * Authors: Fiona Glaser <fiona@x264.com>
  7. * Loren Merritt <lorenm@u.washington.edu>
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU General Public License as published by
  11. * the Free Software Foundation; either version 2 of the License, or
  12. * (at your option) any later version.
  13. *
  14. * This program is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. * GNU General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU General Public License
  20. * along with this program; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
  22. *
  23. * This program is also available under a commercial proprietary license.
  24. * For more information, contact us at licensing@x264.com.
  25. *****************************************************************************/
  26. /* This function should only be called with constant w / h / s arguments! */
  27. static ALWAYS_INLINE void x264_macroblock_cache_rect( void *dst, int w, int h, int s, uint32_t v )
  28. {
  29. uint8_t *d = dst;
  30. uint16_t v2 = s == 2 ? v : v * 0x101;
  31. uint32_t v4 = s == 4 ? v : s == 2 ? v * 0x10001 : v * 0x1010101;
  32. uint64_t v8 = v4 + ((uint64_t)v4 << 32);
  33. s *= 8;
  34. if( w == 2 )
  35. {
  36. M16( d+s*0 ) = v2;
  37. if( h == 1 ) return;
  38. M16( d+s*1 ) = v2;
  39. if( h == 2 ) return;
  40. M16( d+s*2 ) = v2;
  41. M16( d+s*3 ) = v2;
  42. }
  43. else if( w == 4 )
  44. {
  45. M32( d+s*0 ) = v4;
  46. if( h == 1 ) return;
  47. M32( d+s*1 ) = v4;
  48. if( h == 2 ) return;
  49. M32( d+s*2 ) = v4;
  50. M32( d+s*3 ) = v4;
  51. }
  52. else if( w == 8 )
  53. {
  54. if( WORD_SIZE == 8 )
  55. {
  56. M64( d+s*0 ) = v8;
  57. if( h == 1 ) return;
  58. M64( d+s*1 ) = v8;
  59. if( h == 2 ) return;
  60. M64( d+s*2 ) = v8;
  61. M64( d+s*3 ) = v8;
  62. }
  63. else
  64. {
  65. M32( d+s*0+0 ) = v4;
  66. M32( d+s*0+4 ) = v4;
  67. if( h == 1 ) return;
  68. M32( d+s*1+0 ) = v4;
  69. M32( d+s*1+4 ) = v4;
  70. if( h == 2 ) return;
  71. M32( d+s*2+0 ) = v4;
  72. M32( d+s*2+4 ) = v4;
  73. M32( d+s*3+0 ) = v4;
  74. M32( d+s*3+4 ) = v4;
  75. }
  76. }
  77. else if( w == 16 )
  78. {
  79. /* height 1, width 16 doesn't occur */
  80. assert( h != 1 );
  81. #if HAVE_VECTOREXT && defined(__SSE__)
  82. v4si v16 = {v,v,v,v};
  83. M128( d+s*0+0 ) = (__m128)v16;
  84. M128( d+s*1+0 ) = (__m128)v16;
  85. if( h == 2 ) return;
  86. M128( d+s*2+0 ) = (__m128)v16;
  87. M128( d+s*3+0 ) = (__m128)v16;
  88. #else
  89. if( WORD_SIZE == 8 )
  90. {
  91. do
  92. {
  93. M64( d+s*0+0 ) = v8;
  94. M64( d+s*0+8 ) = v8;
  95. M64( d+s*1+0 ) = v8;
  96. M64( d+s*1+8 ) = v8;
  97. h -= 2;
  98. d += s*2;
  99. } while( h );
  100. }
  101. else
  102. {
  103. do
  104. {
  105. M32( d+ 0 ) = v4;
  106. M32( d+ 4 ) = v4;
  107. M32( d+ 8 ) = v4;
  108. M32( d+12 ) = v4;
  109. d += s;
  110. } while( --h );
  111. }
  112. #endif
  113. }
  114. else
  115. assert(0);
  116. }
  117. #define x264_cache_mv_func_table x264_template(cache_mv_func_table)
  118. extern void (*x264_cache_mv_func_table[10])(void *, uint32_t);
  119. #define x264_cache_mvd_func_table x264_template(cache_mvd_func_table)
  120. extern void (*x264_cache_mvd_func_table[10])(void *, uint32_t);
  121. #define x264_cache_ref_func_table x264_template(cache_ref_func_table)
  122. extern void (*x264_cache_ref_func_table[10])(void *, uint32_t);
  123. #define x264_macroblock_cache_mv_ptr( a, x, y, w, h, l, mv ) x264_macroblock_cache_mv( a, x, y, w, h, l, M32( mv ) )
  124. static ALWAYS_INLINE void x264_macroblock_cache_mv( x264_t *h, int x, int y, int width, int height, int i_list, uint32_t mv )
  125. {
  126. void *mv_cache = &h->mb.cache.mv[i_list][X264_SCAN8_0+x+8*y];
  127. if( x264_nonconstant_p( width ) || x264_nonconstant_p( height ) )
  128. x264_cache_mv_func_table[width + (height<<1)-3]( mv_cache, mv );
  129. else
  130. x264_macroblock_cache_rect( mv_cache, width*4, height, 4, mv );
  131. }
  132. static ALWAYS_INLINE void x264_macroblock_cache_mvd( x264_t *h, int x, int y, int width, int height, int i_list, uint16_t mvd )
  133. {
  134. void *mvd_cache = &h->mb.cache.mvd[i_list][X264_SCAN8_0+x+8*y];
  135. if( x264_nonconstant_p( width ) || x264_nonconstant_p( height ) )
  136. x264_cache_mvd_func_table[width + (height<<1)-3]( mvd_cache, mvd );
  137. else
  138. x264_macroblock_cache_rect( mvd_cache, width*2, height, 2, mvd );
  139. }
  140. static ALWAYS_INLINE void x264_macroblock_cache_ref( x264_t *h, int x, int y, int width, int height, int i_list, uint8_t ref )
  141. {
  142. void *ref_cache = &h->mb.cache.ref[i_list][X264_SCAN8_0+x+8*y];
  143. if( x264_nonconstant_p( width ) || x264_nonconstant_p( height ) )
  144. x264_cache_ref_func_table[width + (height<<1)-3]( ref_cache, ref );
  145. else
  146. x264_macroblock_cache_rect( ref_cache, width, height, 1, ref );
  147. }
  148. static ALWAYS_INLINE void x264_macroblock_cache_skip( x264_t *h, int x, int y, int width, int height, int b_skip )
  149. {
  150. x264_macroblock_cache_rect( &h->mb.cache.skip[X264_SCAN8_0+x+8*y], width, height, 1, b_skip );
  151. }
  152. static ALWAYS_INLINE void x264_macroblock_cache_intra8x8_pred( x264_t *h, int x, int y, int i_mode )
  153. {
  154. x264_macroblock_cache_rect( &h->mb.cache.intra4x4_pred_mode[X264_SCAN8_0+x+8*y], 2, 2, 1, i_mode );
  155. }