cabac-a.S 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. /*****************************************************************************
  2. * cabac-a.S: aarch64 cabac
  3. *****************************************************************************
  4. * Copyright (C) 2014-2018 x264 project
  5. *
  6. * Authors: Janne Grunau <janne-x264@jannau.net>
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
  21. *
  22. * This program is also available under a commercial proprietary license.
  23. * For more information, contact us at licensing@x264.com.
  24. *****************************************************************************/
  25. #include "asm.S"
  26. #include "asm-offsets.h"
  27. // w11 holds x264_cabac_t.i_low
  28. // w12 holds x264_cabac_t.i_range
  29. function cabac_encode_decision_asm, export=1
  30. movrel x8, X264(cabac_range_lps)
  31. movrel x9, X264(cabac_transition)
  32. add w10, w1, #CABAC_STATE
  33. ldrb w3, [x0, x10] // i_state
  34. ldr w12, [x0, #CABAC_I_RANGE]
  35. and x4, x3, #~1
  36. asr w5, w12, #6
  37. add x8, x8, x4, lsl #1
  38. sub w5, w5, #4
  39. eor w6, w2, w3 // b ^ i_state
  40. ldrb w4, [x8, x5] // i_range_lps
  41. ldr w11, [x0, #CABAC_I_LOW]
  42. sub w12, w12, w4
  43. tbz w6, #0, 1f // (b ^ i_state) & 1
  44. add w11, w11, w12
  45. mov w12, w4
  46. 1:
  47. orr w4, w2, w3, lsl #1
  48. ldrb w9, [x9, x4]
  49. strb w9, [x0, x10] // i_state
  50. cabac_encode_renorm:
  51. clz w5, w12
  52. ldr w2, [x0, #CABAC_I_QUEUE]
  53. sub w5, w5, #23
  54. lsl w12, w12, w5
  55. lsl w11, w11, w5
  56. 2:
  57. adds w2, w2, w5
  58. str w12, [x0, #CABAC_I_RANGE]
  59. b.lt 0f
  60. cabac_putbyte:
  61. mov w13, #0x400
  62. add w12, w2, #10
  63. lsl w13, w13, w2
  64. asr w4, w11, w12 // out
  65. sub w2, w2, #8
  66. sub w13, w13, #1
  67. subs w5, w4, #0xff
  68. and w11, w11, w13
  69. ldr w6, [x0, #CABAC_I_BYTES_OUTSTANDING]
  70. str w2, [x0, #CABAC_I_QUEUE]
  71. b.ne 1f
  72. add w6, w6, #1
  73. str w11, [x0, #CABAC_I_LOW]
  74. str w6, [x0, #CABAC_I_BYTES_OUTSTANDING]
  75. ret
  76. 1:
  77. ldr x7, [x0, #CABAC_P]
  78. asr w5, w4, #8 // carry
  79. ldurb w8, [x7, #-1]
  80. add w8, w8, w5
  81. sub w5, w5, #1
  82. sturb w8, [x7, #-1]
  83. cbz w6, 3f
  84. 2:
  85. subs w6, w6, #1
  86. strb w5, [x7], #1
  87. b.gt 2b
  88. 3:
  89. strb w4, [x7], #1
  90. str wzr, [x0, #CABAC_I_BYTES_OUTSTANDING]
  91. str x7, [x0, #CABAC_P]
  92. 0:
  93. str w11, [x0, #CABAC_I_LOW]
  94. str w2, [x0, #CABAC_I_QUEUE]
  95. ret
  96. endfunc
  97. function cabac_encode_bypass_asm, export=1
  98. ldr w12, [x0, #CABAC_I_RANGE]
  99. ldr w11, [x0, #CABAC_I_LOW]
  100. ldr w2, [x0, #CABAC_I_QUEUE]
  101. and w1, w1, w12
  102. add w11, w1, w11, lsl #1
  103. adds w2, w2, #1
  104. b.ge cabac_putbyte
  105. str w11, [x0, #CABAC_I_LOW]
  106. str w2, [x0, #CABAC_I_QUEUE]
  107. ret
  108. endfunc
  109. function cabac_encode_terminal_asm, export=1
  110. ldr w12, [x0, #CABAC_I_RANGE]
  111. ldr w11, [x0, #CABAC_I_LOW]
  112. sub w12, w12, #2
  113. b cabac_encode_renorm
  114. endfunc