12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- /*****************************************************************************
- * bitstream-a.S: aarch64 bitstream functions
- *****************************************************************************
- * Copyright (C) 2014-2018 x264 project
- *
- * Authors: Janne Grunau <janne-x264@jannau.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
- #include "asm.S"
- function nal_escape_neon, export=1
- movi v0.16b, #0xff
- movi v4.16b, #4
- mov w3, #3
- subs x6, x1, x2
- cbz x6, 99f
- 0:
- cmn x6, #15
- b.lt 16f
- mov x1, x2
- b 100f
- 16:
- ld1 {v1.16b}, [x1], #16
- ext v2.16b, v0.16b, v1.16b, #14
- ext v3.16b, v0.16b, v1.16b, #15
- cmhi v7.16b, v4.16b, v1.16b
- cmeq v5.16b, v2.16b, #0
- cmeq v6.16b, v3.16b, #0
- and v5.16b, v5.16b, v7.16b
- and v5.16b, v5.16b, v6.16b
- shrn v7.8b, v5.8h, #4
- mov x7, v7.d[0]
- cbz x7, 16f
- mov x6, #-16
- 100:
- umov w5, v0.b[14]
- umov w4, v0.b[15]
- orr w5, w4, w5, lsl #8
- 101:
- ldrb w4, [x1, x6]
- orr w9, w4, w5, lsl #16
- cmp w9, #3
- b.hi 102f
- strb w3, [x0], #1
- orr w5, w3, w5, lsl #8
- 102:
- adds x6, x6, #1
- strb w4, [x0], #1
- orr w5, w4, w5, lsl #8
- b.lt 101b
- subs x6, x1, x2
- lsr w9, w5, #8
- mov v0.b[14], w9
- mov v0.b[15], w5
- b.lt 0b
- ret
- 16:
- subs x6, x1, x2
- st1 {v1.16b}, [x0], #16
- mov v0.16b, v1.16b
- b.lt 0b
- 99:
- ret
- endfunc
|