checkasm-arm.S 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. /****************************************************************************
  2. * checkasm-arm.S: assembly check tool
  3. *****************************************************************************
  4. * Copyright (C) 2015-2018 x264 project
  5. *
  6. * Authors: Martin Storsjo <martin@martin.st>
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
  21. *
  22. * This program is also available under a commercial proprietary license.
  23. * For more information, contact us at licensing@x264.com.
  24. *****************************************************************************/
  25. #include "../common/arm/asm.S"
  26. const register_init, align=4
  27. .quad 0x21f86d66c8ca00ce
  28. .quad 0x75b6ba21077c48ad
  29. .quad 0xed56bb2dcb3c7736
  30. .quad 0x8bda43d3fd1a7e06
  31. .quad 0xb64a9c9e5d318408
  32. .quad 0xdf9a54b303f1d3a3
  33. .quad 0x4a75479abd64e097
  34. .quad 0x249214109d5d1c88
  35. endconst
  36. const error_message
  37. .asciz "failed to preserve register"
  38. endconst
  39. .text
  40. @ max number of args used by any x264 asm function.
  41. #define MAX_ARGS 15
  42. #define ARG_STACK 4*(MAX_ARGS - 4)
  43. @ align the used stack space to 8 to preserve the stack alignment
  44. #define ARG_STACK_A (((ARG_STACK + pushed + 7) & ~7) - pushed)
  45. .macro clobbercheck variant
  46. .equ pushed, 4*10
  47. function checkasm_call_\variant
  48. push {r4-r11, lr}
  49. .ifc \variant, neon
  50. vpush {q4-q7}
  51. .equ pushed, pushed + 16*4
  52. .endif
  53. movrel r12, register_init
  54. .ifc \variant, neon
  55. vldm r12, {q4-q7}
  56. .endif
  57. ldm r12, {r4-r11}
  58. push {r1}
  59. sub sp, sp, #ARG_STACK_A
  60. .equ pos, 0
  61. .rept MAX_ARGS-4
  62. ldr r12, [sp, #ARG_STACK_A + pushed + 8 + pos]
  63. str r12, [sp, #pos]
  64. .equ pos, pos + 4
  65. .endr
  66. mov r12, r0
  67. mov r0, r2
  68. mov r1, r3
  69. ldrd r2, r3, [sp, #ARG_STACK_A + pushed]
  70. blx r12
  71. add sp, sp, #ARG_STACK_A
  72. pop {r2}
  73. push {r0, r1}
  74. movrel r12, register_init
  75. .ifc \variant, neon
  76. vldm r12, {q0-q3}
  77. veor q0, q0, q4
  78. veor q1, q1, q5
  79. veor q2, q2, q6
  80. veor q3, q3, q7
  81. vorr q0, q0, q1
  82. vorr q0, q0, q2
  83. vorr q0, q0, q3
  84. vorr d0, d0, d1
  85. vrev64.32 d1, d0
  86. vorr d0, d0, d1
  87. vmov.32 r3, d0[0]
  88. .else
  89. mov r3, #0
  90. .endif
  91. .macro check_reg reg1, reg2=
  92. ldrd r0, r1, [r12], #8
  93. eor r0, r0, \reg1
  94. orr r3, r3, r0
  95. .ifnb \reg2
  96. eor r1, r1, \reg2
  97. orr r3, r3, r1
  98. .endif
  99. .endm
  100. check_reg r4, r5
  101. check_reg r6, r7
  102. @ r9 is a volatile register in the ios ABI
  103. #if SYS_MACOSX
  104. check_reg r8
  105. #else
  106. check_reg r8, r9
  107. #endif
  108. check_reg r10, r11
  109. .purgem check_reg
  110. cmp r3, #0
  111. beq 0f
  112. mov r12, #0
  113. str r12, [r2]
  114. movrel r0, error_message
  115. blx EXT(puts)
  116. 0:
  117. pop {r0, r1}
  118. .ifc \variant, neon
  119. vpop {q4-q7}
  120. .endif
  121. pop {r4-r11, pc}
  122. endfunc
  123. .endm
  124. clobbercheck neon
  125. clobbercheck noneon