cpu-a.S 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. /*****************************************************************************
  2. * cpu-a.S: arm cpu detection
  3. *****************************************************************************
  4. * Copyright (C) 2009-2018 x264 project
  5. *
  6. * Authors: David Conrad <lessen42@gmail.com>
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
  21. *
  22. * This program is also available under a commercial proprietary license.
  23. * For more information, contact us at licensing@x264.com.
  24. *****************************************************************************/
  25. #include "asm.S"
  26. .align 2
  27. // done in gas because .fpu neon overrides the refusal to assemble
  28. // instructions the selected -march/-mcpu doesn't support
  29. function cpu_neon_test
  30. vadd.i16 q0, q0, q0
  31. bx lr
  32. endfunc
  33. // return: 0 on success
  34. // 1 if counters were already enabled
  35. // 9 if lo-res counters were already enabled
  36. function cpu_enable_armv7_counter, export=0
  37. mrc p15, 0, r2, c9, c12, 0 // read PMNC
  38. ands r0, r2, #1
  39. andne r0, r2, #9
  40. orr r2, r2, #1 // enable counters
  41. bic r2, r2, #8 // full resolution
  42. mcreq p15, 0, r2, c9, c12, 0 // write PMNC
  43. mov r2, #1 << 31 // enable cycle counter
  44. mcr p15, 0, r2, c9, c12, 1 // write CNTENS
  45. bx lr
  46. endfunc
  47. function cpu_disable_armv7_counter, export=0
  48. mrc p15, 0, r0, c9, c12, 0 // read PMNC
  49. bic r0, r0, #1 // disable counters
  50. mcr p15, 0, r0, c9, c12, 0 // write PMNC
  51. bx lr
  52. endfunc
  53. .macro READ_TIME r
  54. mrc p15, 0, \r, c9, c13, 0
  55. .endm
  56. // return: 0 if transfers neon -> arm transfers take more than 10 cycles
  57. // nonzero otherwise
  58. function cpu_fast_neon_mrc_test
  59. // check for user access to performance counters
  60. mrc p15, 0, r0, c9, c14, 0
  61. cmp r0, #0
  62. bxeq lr
  63. push {r4-r6,lr}
  64. bl cpu_enable_armv7_counter
  65. ands r1, r0, #8
  66. mov r3, #0
  67. mov ip, #4
  68. mov r6, #4
  69. moveq r5, #1
  70. movne r5, #64
  71. average_loop:
  72. mov r4, r5
  73. READ_TIME r1
  74. 1: subs r4, r4, #1
  75. .rept 8
  76. vmov.u32 lr, d0[0]
  77. add lr, lr, lr
  78. .endr
  79. bgt 1b
  80. READ_TIME r2
  81. subs r6, r6, #1
  82. sub r2, r2, r1
  83. cmpgt r2, #30 << 3 // assume context switch if it took over 30 cycles
  84. addle r3, r3, r2
  85. subsle ip, ip, #1
  86. bgt average_loop
  87. // disable counters if we enabled them
  88. ands r0, r0, #1
  89. bleq cpu_disable_armv7_counter
  90. lsr r0, r3, #5
  91. cmp r0, #10
  92. movgt r0, #0
  93. pop {r4-r6,pc}
  94. endfunc