bitstream-a.S 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. /*****************************************************************************
  2. * bitstream-a.S: aarch64 bitstream functions
  3. *****************************************************************************
  4. * Copyright (C) 2014-2018 x264 project
  5. *
  6. * Authors: Janne Grunau <janne-x264@jannau.net>
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
  21. *
  22. * This program is also available under a commercial proprietary license.
  23. * For more information, contact us at licensing@x264.com.
  24. *****************************************************************************/
  25. #include "asm.S"
  26. function nal_escape_neon, export=1
  27. movi v0.16b, #0xff
  28. movi v4.16b, #4
  29. mov w3, #3
  30. subs x6, x1, x2
  31. cbz x6, 99f
  32. 0:
  33. cmn x6, #15
  34. b.lt 16f
  35. mov x1, x2
  36. b 100f
  37. 16:
  38. ld1 {v1.16b}, [x1], #16
  39. ext v2.16b, v0.16b, v1.16b, #14
  40. ext v3.16b, v0.16b, v1.16b, #15
  41. cmhi v7.16b, v4.16b, v1.16b
  42. cmeq v5.16b, v2.16b, #0
  43. cmeq v6.16b, v3.16b, #0
  44. and v5.16b, v5.16b, v7.16b
  45. and v5.16b, v5.16b, v6.16b
  46. shrn v7.8b, v5.8h, #4
  47. mov x7, v7.d[0]
  48. cbz x7, 16f
  49. mov x6, #-16
  50. 100:
  51. umov w5, v0.b[14]
  52. umov w4, v0.b[15]
  53. orr w5, w4, w5, lsl #8
  54. 101:
  55. ldrb w4, [x1, x6]
  56. orr w9, w4, w5, lsl #16
  57. cmp w9, #3
  58. b.hi 102f
  59. strb w3, [x0], #1
  60. orr w5, w3, w5, lsl #8
  61. 102:
  62. adds x6, x6, #1
  63. strb w4, [x0], #1
  64. orr w5, w4, w5, lsl #8
  65. b.lt 101b
  66. subs x6, x1, x2
  67. lsr w9, w5, #8
  68. mov v0.b[14], w9
  69. mov v0.b[15], w5
  70. b.lt 0b
  71. ret
  72. 16:
  73. subs x6, x1, x2
  74. st1 {v1.16b}, [x0], #16
  75. mov v0.16b, v1.16b
  76. b.lt 0b
  77. 99:
  78. ret
  79. endfunc