asm.S 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. /*****************************************************************************
  2. * asm.S: arm utility macros
  3. *****************************************************************************
  4. * Copyright (C) 2008-2018 x264 project
  5. *
  6. * Authors: Mans Rullgard <mans@mansr.com>
  7. * David Conrad <lessen42@gmail.com>
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU General Public License as published by
  11. * the Free Software Foundation; either version 2 of the License, or
  12. * (at your option) any later version.
  13. *
  14. * This program is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. * GNU General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU General Public License
  20. * along with this program; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
  22. *
  23. * This program is also available under a commercial proprietary license.
  24. * For more information, contact us at licensing@x264.com.
  25. *****************************************************************************/
  26. #include "config.h"
  27. .syntax unified
  28. #ifdef __ELF__
  29. .arch armv7-a
  30. .fpu neon
  31. #endif
  32. #define GLUE(a, b) a ## b
  33. #define JOIN(a, b) GLUE(a, b)
  34. #ifdef PREFIX
  35. # define BASE _x264_
  36. # define SYM_PREFIX _
  37. #else
  38. # define BASE x264_
  39. # define SYM_PREFIX
  40. #endif
  41. #ifdef BIT_DEPTH
  42. # define EXTERN_ASM JOIN(JOIN(BASE, BIT_DEPTH), _)
  43. #else
  44. # define EXTERN_ASM BASE
  45. #endif
  46. #define X(s) JOIN(EXTERN_ASM, s)
  47. #define X264(s) JOIN(BASE, s)
  48. #define EXT(s) JOIN(SYM_PREFIX, s)
  49. #ifdef __ELF__
  50. # define ELF
  51. #else
  52. # define ELF @
  53. #endif
  54. #ifdef __MACH__
  55. # define MACH
  56. # define NONMACH @
  57. #else
  58. # define MACH @
  59. # define NONMACH
  60. #endif
  61. #if HAVE_AS_FUNC
  62. # define FUNC
  63. #else
  64. # define FUNC @
  65. #endif
  66. #if SYS_LINUX
  67. #define HAVE_SECTION_DATA_REL_RO 1
  68. #else
  69. #define HAVE_SECTION_DATA_REL_RO 0
  70. #endif
  71. .macro require8, val=1
  72. ELF .eabi_attribute 24, \val
  73. .endm
  74. .macro preserve8, val=1
  75. ELF .eabi_attribute 25, \val
  76. .endm
  77. .macro function name, export=1
  78. .macro endfunc
  79. .if \export
  80. ELF .size EXTERN_ASM\name, . - EXTERN_ASM\name
  81. .else
  82. ELF .size \name, . - \name
  83. .endif
  84. FUNC .endfunc
  85. .purgem endfunc
  86. .endm
  87. .text
  88. .align 2
  89. .if \export == 1
  90. .global EXTERN_ASM\name
  91. ELF .hidden EXTERN_ASM\name
  92. ELF .type EXTERN_ASM\name, %function
  93. FUNC .func EXTERN_ASM\name
  94. EXTERN_ASM\name:
  95. .else
  96. ELF .hidden \name
  97. ELF .type \name, %function
  98. FUNC .func \name
  99. \name:
  100. .endif
  101. .endm
  102. .macro const name, align=2, relocate=0
  103. .macro endconst
  104. ELF .size \name, . - \name
  105. .purgem endconst
  106. .endm
  107. .if HAVE_SECTION_DATA_REL_RO && \relocate
  108. .section .data.rel.ro
  109. .else
  110. NONMACH .section .rodata
  111. MACH .const_data
  112. .endif
  113. .align \align
  114. \name:
  115. .endm
  116. .macro movrel rd, val
  117. #if defined(PIC)
  118. ldr \rd, 1f
  119. b 2f
  120. 1:
  121. @ FIXME: thumb
  122. .word \val - (2f + 8)
  123. 2:
  124. add \rd, \rd, pc
  125. #elif HAVE_ARMV6T2
  126. movw \rd, #:lower16:\val
  127. movt \rd, #:upper16:\val
  128. #else
  129. ldr \rd, =\val
  130. #endif
  131. .endm
  132. .macro movrelx rd, val, got
  133. #if defined(PIC) && defined(__ELF__)
  134. ldr \got, 2f
  135. ldr \rd, 1f
  136. b 3f
  137. 1:
  138. @ FIXME: thumb
  139. .word \val(GOT)
  140. 2:
  141. .word _GLOBAL_OFFSET_TABLE_ - (3f + 8)
  142. 3:
  143. add \got, \got, pc
  144. ldr \rd, [\got, \rd]
  145. #elif defined(PIC) && defined(__APPLE__)
  146. ldr \rd, 1f
  147. b 2f
  148. 1:
  149. @ FIXME: thumb
  150. .word 3f - (2f + 8)
  151. 2:
  152. ldr \rd, [pc, \rd]
  153. .non_lazy_symbol_pointer
  154. 3:
  155. .indirect_symbol \val
  156. .word 0
  157. .text
  158. #else
  159. movrel \rd, \val
  160. #endif
  161. .endm
  162. .macro movconst rd, val
  163. #if HAVE_ARMV6T2
  164. movw \rd, #:lower16:\val
  165. .if \val >> 16
  166. movt \rd, #:upper16:\val
  167. .endif
  168. #else
  169. ldr \rd, =\val
  170. #endif
  171. .endm
  172. #define FENC_STRIDE 16
  173. #define FDEC_STRIDE 32
  174. .macro HORIZ_ADD dest, a, b
  175. .ifnb \b
  176. vadd.u16 \a, \a, \b
  177. .endif
  178. vpaddl.u16 \a, \a
  179. vpaddl.u32 \dest, \a
  180. .endm
  181. .macro SUMSUB_AB sum, diff, a, b
  182. vadd.s16 \sum, \a, \b
  183. vsub.s16 \diff, \a, \b
  184. .endm
  185. .macro SUMSUB_ABCD s1, d1, s2, d2, a, b, c, d
  186. SUMSUB_AB \s1, \d1, \a, \b
  187. SUMSUB_AB \s2, \d2, \c, \d
  188. .endm
  189. .macro ABS2 a b
  190. vabs.s16 \a, \a
  191. vabs.s16 \b, \b
  192. .endm
  193. // dist = distance in elements (0 for vertical pass, 1/2 for horizontal passes)
  194. // op = sumsub/amax (sum and diff / maximum of absolutes)
  195. // d1/2 = destination registers
  196. // s1/2 = source registers
  197. .macro HADAMARD dist, op, d1, d2, s1, s2
  198. .if \dist == 1
  199. vtrn.16 \s1, \s2
  200. .else
  201. vtrn.32 \s1, \s2
  202. .endif
  203. .ifc \op, sumsub
  204. SUMSUB_AB \d1, \d2, \s1, \s2
  205. .else
  206. vabs.s16 \s1, \s1
  207. vabs.s16 \s2, \s2
  208. vmax.s16 \d1, \s1, \s2
  209. .endif
  210. .endm
  211. .macro TRANSPOSE8x8 r0 r1 r2 r3 r4 r5 r6 r7
  212. vtrn.32 \r0, \r4
  213. vtrn.32 \r1, \r5
  214. vtrn.32 \r2, \r6
  215. vtrn.32 \r3, \r7
  216. vtrn.16 \r0, \r2
  217. vtrn.16 \r1, \r3
  218. vtrn.16 \r4, \r6
  219. vtrn.16 \r5, \r7
  220. vtrn.8 \r0, \r1
  221. vtrn.8 \r2, \r3
  222. vtrn.8 \r4, \r5
  223. vtrn.8 \r6, \r7
  224. .endm
  225. .macro TRANSPOSE4x4 r0 r1 r2 r3
  226. vtrn.16 \r0, \r2
  227. vtrn.16 \r1, \r3
  228. vtrn.8 \r0, \r1
  229. vtrn.8 \r2, \r3
  230. .endm
  231. .macro TRANSPOSE4x4_16 d0 d1 d2 d3
  232. vtrn.32 \d0, \d2
  233. vtrn.32 \d1, \d3
  234. vtrn.16 \d0, \d1
  235. vtrn.16 \d2, \d3
  236. .endm