123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008 |
- /****************************************************************************
- * dct-a.S: aarch64 transform and zigzag
- *****************************************************************************
- * Copyright (C) 2009-2018 x264 project
- *
- * Authors: David Conrad <lessen42@gmail.com>
- * Janne Grunau <janne-x264@jannau.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
- *
- * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing@x264.com.
- *****************************************************************************/
- #include "asm.S"
- const scan4x4_frame, align=4
- .byte 0,1, 8,9, 2,3, 4,5
- .byte 10,11, 16,17, 24,25, 18,19
- .byte 12,13, 6,7, 14,15, 20,21
- .byte 26,27, 28,29, 22,23, 30,31
- endconst
- const scan4x4_field, align=4
- .byte 0,1, 2,3, 8,9, 4,5
- .byte 6,7, 10,11, 12,13, 14,15
- endconst
- const sub4x4_frame, align=4
- .byte 0, 1, 4, 8
- .byte 5, 2, 3, 6
- .byte 9, 12, 13, 10
- .byte 7, 11, 14, 15
- endconst
- const sub4x4_field, align=4
- .byte 0, 4, 1, 8
- .byte 12, 5, 9, 13
- .byte 2, 6, 10, 14
- .byte 3, 7, 11, 15
- endconst
- // sum = a + (b>>shift) sub = (a>>shift) - b
- .macro SUMSUB_SHR shift sum sub a b t0 t1
- sshr \t0, \b, #\shift
- sshr \t1, \a, #\shift
- add \sum, \a, \t0
- sub \sub, \t1, \b
- .endm
- // sum = (a>>shift) + b sub = a - (b>>shift)
- .macro SUMSUB_SHR2 shift sum sub a b t0 t1
- sshr \t0, \a, #\shift
- sshr \t1, \b, #\shift
- add \sum, \t0, \b
- sub \sub, \a, \t1
- .endm
- // a += 1.5*ma b -= 1.5*mb
- .macro SUMSUB_15 a b ma mb t0 t1
- sshr \t0, \ma, #1
- sshr \t1, \mb, #1
- add \t0, \t0, \ma
- add \t1, \t1, \mb
- add \a, \a, \t0
- sub \b, \b, \t1
- .endm
- function dct4x4dc_neon, export=1
- ld1 {v0.4h,v1.4h,v2.4h,v3.4h}, [x0]
- movi v31.4h, #1
- SUMSUB_AB v4.4h, v5.4h, v0.4h, v1.4h
- SUMSUB_AB v6.4h, v7.4h, v2.4h, v3.4h
- SUMSUB_AB v0.4h, v2.4h, v4.4h, v6.4h
- SUMSUB_AB v3.4h, v1.4h, v5.4h, v7.4h
- transpose v4.4h, v6.4h, v0.4h, v2.4h
- transpose v5.4h, v7.4h, v1.4h, v3.4h
- SUMSUB_AB v0.4h, v2.4h, v4.4h, v6.4h
- SUMSUB_AB v1.4h, v3.4h, v5.4h, v7.4h
- transpose v4.2s, v5.2s, v0.2s, v1.2s
- transpose v6.2s, v7.2s, v2.2s, v3.2s
- add v16.4h, v4.4h, v31.4h
- add v17.4h, v6.4h, v31.4h
- srhadd v0.4h, v4.4h, v5.4h
- shsub v1.4h, v16.4h, v5.4h
- shsub v2.4h, v17.4h, v7.4h
- srhadd v3.4h, v6.4h, v7.4h
- st1 {v0.4h,v1.4h,v2.4h,v3.4h}, [x0]
- ret
- endfunc
- function idct4x4dc_neon, export=1
- ld1 {v0.4h,v1.4h,v2.4h,v3.4h}, [x0]
- SUMSUB_AB v4.4h, v5.4h, v0.4h, v1.4h
- SUMSUB_AB v6.4h, v7.4h, v2.4h, v3.4h
- SUMSUB_AB v0.4h, v2.4h, v4.4h, v6.4h
- SUMSUB_AB v3.4h, v1.4h, v5.4h, v7.4h
- transpose v4.4h, v6.4h, v0.4h, v2.4h
- transpose v5.4h, v7.4h, v1.4h, v3.4h
- SUMSUB_AB v0.4h, v2.4h, v4.4h, v6.4h
- SUMSUB_AB v1.4h, v3.4h, v5.4h, v7.4h
- transpose v4.2s, v5.2s, v0.2s, v1.2s
- transpose v6.2s, v7.2s, v2.2s, v3.2s
- SUMSUB_AB v0.4h, v1.4h, v4.4h, v5.4h
- SUMSUB_AB v3.4h, v2.4h, v6.4h, v7.4h
- st1 {v0.4h,v1.4h,v2.4h,v3.4h}, [x0]
- ret
- endfunc
- .macro DCT_1D v0 v1 v2 v3 v4 v5 v6 v7
- SUMSUB_AB \v1, \v6, \v5, \v6
- SUMSUB_AB \v3, \v7, \v4, \v7
- add \v0, \v3, \v1
- add \v4, \v7, \v7
- add \v5, \v6, \v6
- sub \v2, \v3, \v1
- add \v1, \v4, \v6
- sub \v3, \v7, \v5
- .endm
- function sub4x4_dct_neon, export=1
- mov x3, #FENC_STRIDE
- mov x4, #FDEC_STRIDE
- ld1 {v0.s}[0], [x1], x3
- ld1 {v1.s}[0], [x2], x4
- ld1 {v2.s}[0], [x1], x3
- usubl v16.8h, v0.8b, v1.8b
- ld1 {v3.s}[0], [x2], x4
- ld1 {v4.s}[0], [x1], x3
- usubl v17.8h, v2.8b, v3.8b
- ld1 {v5.s}[0], [x2], x4
- ld1 {v6.s}[0], [x1], x3
- usubl v18.8h, v4.8b, v5.8b
- ld1 {v7.s}[0], [x2], x4
- usubl v19.8h, v6.8b, v7.8b
- DCT_1D v0.4h, v1.4h, v2.4h, v3.4h, v16.4h, v17.4h, v18.4h, v19.4h
- transpose4x4.h v0, v1, v2, v3, v4, v5, v6, v7
- DCT_1D v4.4h, v5.4h, v6.4h, v7.4h, v0.4h, v1.4h, v2.4h, v3.4h
- st1 {v4.4h,v5.4h,v6.4h,v7.4h}, [x0]
- ret
- endfunc
- function sub8x4_dct_neon
- ld1 {v0.8b}, [x1], x3
- ld1 {v1.8b}, [x2], x4
- usubl v16.8h, v0.8b, v1.8b
- ld1 {v2.8b}, [x1], x3
- ld1 {v3.8b}, [x2], x4
- usubl v17.8h, v2.8b, v3.8b
- ld1 {v4.8b}, [x1], x3
- ld1 {v5.8b}, [x2], x4
- usubl v18.8h, v4.8b, v5.8b
- ld1 {v6.8b}, [x1], x3
- ld1 {v7.8b}, [x2], x4
- usubl v19.8h, v6.8b, v7.8b
- DCT_1D v0.8h, v1.8h, v2.8h, v3.8h, v16.8h, v17.8h, v18.8h, v19.8h
- transpose4x8.h v0, v1, v2, v3, v4, v5, v6, v7
- SUMSUB_AB v16.8h, v19.8h, v0.8h, v3.8h
- SUMSUB_AB v17.8h, v18.8h, v1.8h, v2.8h
- add v22.8h, v19.8h, v19.8h
- add v21.8h, v18.8h, v18.8h
- add v0.8h, v16.8h, v17.8h
- sub v1.8h, v16.8h, v17.8h
- add v2.8h, v22.8h, v18.8h
- sub v3.8h, v19.8h, v21.8h
- zip1 v4.2d, v0.2d, v2.2d
- zip2 v6.2d, v0.2d, v2.2d
- zip1 v5.2d, v1.2d, v3.2d
- zip2 v7.2d, v1.2d, v3.2d
- st1 {v4.8h}, [x0], #16
- st1 {v5.8h}, [x0], #16
- st1 {v6.8h}, [x0], #16
- st1 {v7.8h}, [x0], #16
- ret
- endfunc
- function sub8x8_dct_neon, export=1
- mov x5, x30
- mov x3, #FENC_STRIDE
- mov x4, #FDEC_STRIDE
- bl sub8x4_dct_neon
- mov x30, x5
- b sub8x4_dct_neon
- endfunc
- function sub16x16_dct_neon, export=1
- mov x5, x30
- mov x3, #FENC_STRIDE
- mov x4, #FDEC_STRIDE
- bl sub8x4_dct_neon
- bl sub8x4_dct_neon
- sub x1, x1, #8*FENC_STRIDE-8
- sub x2, x2, #8*FDEC_STRIDE-8
- bl sub8x4_dct_neon
- bl sub8x4_dct_neon
- sub x1, x1, #8
- sub x2, x2, #8
- bl sub8x4_dct_neon
- bl sub8x4_dct_neon
- sub x1, x1, #8*FENC_STRIDE-8
- sub x2, x2, #8*FDEC_STRIDE-8
- bl sub8x4_dct_neon
- mov x30, x5
- b sub8x4_dct_neon
- endfunc
- .macro DCT8_1D type
- SUMSUB_AB v18.8h, v17.8h, v3.8h, v4.8h // s34/d34
- SUMSUB_AB v19.8h, v16.8h, v2.8h, v5.8h // s25/d25
- SUMSUB_AB v22.8h, v21.8h, v1.8h, v6.8h // s16/d16
- SUMSUB_AB v23.8h, v20.8h, v0.8h, v7.8h // s07/d07
- SUMSUB_AB v24.8h, v26.8h, v23.8h, v18.8h // a0/a2
- SUMSUB_AB v25.8h, v27.8h, v22.8h, v19.8h // a1/a3
- SUMSUB_AB v30.8h, v29.8h, v20.8h, v17.8h // a6/a5
- sshr v23.8h, v21.8h, #1
- sshr v18.8h, v16.8h, #1
- add v23.8h, v23.8h, v21.8h
- add v18.8h, v18.8h, v16.8h
- sub v30.8h, v30.8h, v23.8h
- sub v29.8h, v29.8h, v18.8h
- SUMSUB_AB v28.8h, v31.8h, v21.8h, v16.8h // a4/a7
- sshr v22.8h, v20.8h, #1
- sshr v19.8h, v17.8h, #1
- add v22.8h, v22.8h, v20.8h
- add v19.8h, v19.8h, v17.8h
- add v22.8h, v28.8h, v22.8h
- add v31.8h, v31.8h, v19.8h
- SUMSUB_AB v0.8h, v4.8h, v24.8h, v25.8h
- SUMSUB_SHR 2, v1.8h, v7.8h, v22.8h, v31.8h, v16.8h, v17.8h
- SUMSUB_SHR 1, v2.8h, v6.8h, v26.8h, v27.8h, v18.8h, v19.8h
- SUMSUB_SHR2 2, v3.8h, v5.8h, v30.8h, v29.8h, v20.8h, v21.8h
- .endm
- function sub8x8_dct8_neon, export=1
- mov x3, #FENC_STRIDE
- mov x4, #FDEC_STRIDE
- ld1 {v16.8b}, [x1], x3
- ld1 {v17.8b}, [x2], x4
- ld1 {v18.8b}, [x1], x3
- ld1 {v19.8b}, [x2], x4
- usubl v0.8h, v16.8b, v17.8b
- ld1 {v20.8b}, [x1], x3
- ld1 {v21.8b}, [x2], x4
- usubl v1.8h, v18.8b, v19.8b
- ld1 {v22.8b}, [x1], x3
- ld1 {v23.8b}, [x2], x4
- usubl v2.8h, v20.8b, v21.8b
- ld1 {v24.8b}, [x1], x3
- ld1 {v25.8b}, [x2], x4
- usubl v3.8h, v22.8b, v23.8b
- ld1 {v26.8b}, [x1], x3
- ld1 {v27.8b}, [x2], x4
- usubl v4.8h, v24.8b, v25.8b
- ld1 {v28.8b}, [x1], x3
- ld1 {v29.8b}, [x2], x4
- usubl v5.8h, v26.8b, v27.8b
- ld1 {v30.8b}, [x1], x3
- ld1 {v31.8b}, [x2], x4
- usubl v6.8h, v28.8b, v29.8b
- usubl v7.8h, v30.8b, v31.8b
- DCT8_1D row
- transpose8x8.h v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
- DCT8_1D col
- st1 {v0.8h,v1.8h,v2.8h,v3.8h}, [x0], #64
- st1 {v4.8h,v5.8h,v6.8h,v7.8h}, [x0], #64
- ret
- endfunc
- function sub16x16_dct8_neon, export=1
- mov x7, x30
- bl X(sub8x8_dct8_neon)
- sub x1, x1, #FENC_STRIDE*8 - 8
- sub x2, x2, #FDEC_STRIDE*8 - 8
- bl X(sub8x8_dct8_neon)
- sub x1, x1, #8
- sub x2, x2, #8
- bl X(sub8x8_dct8_neon)
- mov x30, x7
- sub x1, x1, #FENC_STRIDE*8 - 8
- sub x2, x2, #FDEC_STRIDE*8 - 8
- b X(sub8x8_dct8_neon)
- endfunc
- // First part of IDCT (minus final SUMSUB_BA)
- .macro IDCT_1D d4 d5 d6 d7 d0 d1 d2 d3
- SUMSUB_AB \d4, \d5, \d0, \d2
- sshr \d7, \d1, #1
- sshr \d6, \d3, #1
- sub \d7, \d7, \d3
- add \d6, \d6, \d1
- .endm
- function add4x4_idct_neon, export=1
- mov x2, #FDEC_STRIDE
- ld1 {v0.4h,v1.4h,v2.4h,v3.4h}, [x1]
- IDCT_1D v4.4h, v5.4h, v6.4h, v7.4h, v0.4h, v1.4h, v2.4h, v3.4h
- ld1 {v28.s}[0], [x0], x2
- SUMSUB_AB v0.4h, v2.4h, v4.4h, v6.4h
- SUMSUB_AB v1.4h, v3.4h, v5.4h, v7.4h
- transpose4x4.h v0, v1, v3, v2, v16, v17, v18, v19
- IDCT_1D v4.4h, v5.4h, v6.4h, v7.4h, v0.4h, v1.4h, v3.4h, v2.4h
- ld1 {v29.s}[0], [x0], x2
- SUMSUB_AB v0.4h, v2.4h, v4.4h, v6.4h
- SUMSUB_AB v1.4h, v3.4h, v5.4h, v7.4h
- srshr v0.4h, v0.4h, #6
- srshr v1.4h, v1.4h, #6
- ld1 {v31.s}[0], [x0], x2
- srshr v2.4h, v2.4h, #6
- srshr v3.4h, v3.4h, #6
- ld1 {v30.s}[0], [x0], x2
- sub x0, x0, x2, lsl #2
- uaddw v0.8h, v0.8h, v28.8b
- uaddw v1.8h, v1.8h, v29.8b
- uaddw v2.8h, v2.8h, v30.8b
- uaddw v3.8h, v3.8h, v31.8b
- sqxtun v0.8b, v0.8h
- sqxtun v1.8b, v1.8h
- sqxtun v2.8b, v2.8h
- sqxtun v3.8b, v3.8h
- st1 {v0.s}[0], [x0], x2
- st1 {v1.s}[0], [x0], x2
- st1 {v3.s}[0], [x0], x2
- st1 {v2.s}[0], [x0], x2
- ret
- endfunc
- function add8x4_idct_neon, export=1
- ld1 {v0.8h,v1.8h}, [x1], #32
- ld1 {v2.8h,v3.8h}, [x1], #32
- transpose v20.2d, v21.2d, v0.2d, v2.2d
- transpose v22.2d, v23.2d, v1.2d, v3.2d
- IDCT_1D v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h
- SUMSUB_AB v0.8h, v3.8h, v16.8h, v18.8h
- SUMSUB_AB v1.8h, v2.8h, v17.8h, v19.8h
- transpose4x8.h v0, v1, v2, v3, v4, v5, v6, v7
- IDCT_1D v16.8h, v17.8h, v18.8h, v19.8h, v0.8h, v1.8h, v2.8h, v3.8h
- SUMSUB_AB v0.8h, v3.8h, v16.8h, v18.8h
- SUMSUB_AB v1.8h, v2.8h, v17.8h, v19.8h
- srshr v0.8h, v0.8h, #6
- ld1 {v28.8b}, [x0], x2
- srshr v1.8h, v1.8h, #6
- ld1 {v29.8b}, [x0], x2
- srshr v2.8h, v2.8h, #6
- ld1 {v30.8b}, [x0], x2
- srshr v3.8h, v3.8h, #6
- ld1 {v31.8b}, [x0], x2
- sub x0, x0, x2, lsl #2
- uaddw v0.8h, v0.8h, v28.8b
- uaddw v1.8h, v1.8h, v29.8b
- uaddw v2.8h, v2.8h, v30.8b
- uaddw v3.8h, v3.8h, v31.8b
- sqxtun v0.8b, v0.8h
- sqxtun v1.8b, v1.8h
- st1 {v0.8b}, [x0], x2
- sqxtun v2.8b, v2.8h
- st1 {v1.8b}, [x0], x2
- sqxtun v3.8b, v3.8h
- st1 {v2.8b}, [x0], x2
- st1 {v3.8b}, [x0], x2
- ret
- endfunc
- function add8x8_idct_neon, export=1
- mov x2, #FDEC_STRIDE
- mov x5, x30
- bl X(add8x4_idct_neon)
- mov x30, x5
- b X(add8x4_idct_neon)
- endfunc
- function add16x16_idct_neon, export=1
- mov x2, #FDEC_STRIDE
- mov x5, x30
- bl X(add8x4_idct_neon)
- bl X(add8x4_idct_neon)
- sub x0, x0, #8*FDEC_STRIDE-8
- bl X(add8x4_idct_neon)
- bl X(add8x4_idct_neon)
- sub x0, x0, #8
- bl X(add8x4_idct_neon)
- bl X(add8x4_idct_neon)
- sub x0, x0, #8*FDEC_STRIDE-8
- bl X(add8x4_idct_neon)
- mov x30, x5
- b X(add8x4_idct_neon)
- endfunc
- .macro IDCT8_1D type
- SUMSUB_AB v0.8h, v1.8h, v16.8h, v20.8h // a0/a2
- .ifc \type, row
- ld1 {v22.8h,v23.8h}, [x1], #32
- .endif
- SUMSUB_SHR 1, v2.8h, v3.8h, v18.8h, v22.8h, v16.8h, v20.8h // a6/a4
- SUMSUB_AB v16.8h, v18.8h, v21.8h, v19.8h
- SUMSUB_15 v16.8h, v18.8h, v17.8h, v23.8h, v20.8h, v22.8h // a7/a1
- SUMSUB_AB v22.8h, v23.8h, v23.8h, v17.8h
- SUMSUB_15 v23.8h, v22.8h, v21.8h, v19.8h, v20.8h, v17.8h // a5/a3
- SUMSUB_SHR 2, v21.8h, v22.8h, v22.8h, v23.8h, v19.8h, v17.8h // b3/b5
- SUMSUB_SHR2 2, v20.8h, v23.8h, v16.8h, v18.8h, v19.8h, v17.8h // b1/b7
- SUMSUB_AB v18.8h, v2.8h, v0.8h, v2.8h // b0/b6
- SUMSUB_AB v19.8h, v3.8h, v1.8h, v3.8h // b2/b4
- SUMSUB_AB v16.8h, v23.8h, v18.8h, v23.8h
- SUMSUB_AB v17.8h, v22.8h, v19.8h, v22.8h
- SUMSUB_AB v18.8h, v21.8h, v3.8h, v21.8h
- SUMSUB_AB v19.8h, v20.8h, v2.8h, v20.8h
- .endm
- function add8x8_idct8_neon, export=1
- mov x2, #FDEC_STRIDE
- ld1 {v16.8h,v17.8h}, [x1], #32
- ld1 {v18.8h,v19.8h}, [x1], #32
- ld1 {v20.8h,v21.8h}, [x1], #32
- IDCT8_1D row
- transpose8x8.h v16, v17, v18, v19, v20, v21, v22, v23, v30, v31
- IDCT8_1D col
- ld1 {v0.8b}, [x0], x2
- srshr v16.8h, v16.8h, #6
- ld1 {v1.8b}, [x0], x2
- srshr v17.8h, v17.8h, #6
- ld1 {v2.8b}, [x0], x2
- srshr v18.8h, v18.8h, #6
- ld1 {v3.8b}, [x0], x2
- srshr v19.8h, v19.8h, #6
- ld1 {v4.8b}, [x0], x2
- srshr v20.8h, v20.8h, #6
- ld1 {v5.8b}, [x0], x2
- srshr v21.8h, v21.8h, #6
- ld1 {v6.8b}, [x0], x2
- srshr v22.8h, v22.8h, #6
- ld1 {v7.8b}, [x0], x2
- srshr v23.8h, v23.8h, #6
- sub x0, x0, x2, lsl #3
- uaddw v16.8h, v16.8h, v0.8b
- uaddw v17.8h, v17.8h, v1.8b
- uaddw v18.8h, v18.8h, v2.8b
- sqxtun v0.8b, v16.8h
- sqxtun v1.8b, v17.8h
- sqxtun v2.8b, v18.8h
- uaddw v19.8h, v19.8h, v3.8b
- st1 {v0.8b}, [x0], x2
- uaddw v20.8h, v20.8h, v4.8b
- st1 {v1.8b}, [x0], x2
- uaddw v21.8h, v21.8h, v5.8b
- st1 {v2.8b}, [x0], x2
- sqxtun v3.8b, v19.8h
- sqxtun v4.8b, v20.8h
- uaddw v22.8h, v22.8h, v6.8b
- uaddw v23.8h, v23.8h, v7.8b
- st1 {v3.8b}, [x0], x2
- sqxtun v5.8b, v21.8h
- st1 {v4.8b}, [x0], x2
- sqxtun v6.8b, v22.8h
- sqxtun v7.8b, v23.8h
- st1 {v5.8b}, [x0], x2
- st1 {v6.8b}, [x0], x2
- st1 {v7.8b}, [x0], x2
- ret
- endfunc
- function add16x16_idct8_neon, export=1
- mov x7, x30
- bl X(add8x8_idct8_neon)
- sub x0, x0, #8*FDEC_STRIDE-8
- bl X(add8x8_idct8_neon)
- sub x0, x0, #8
- bl X(add8x8_idct8_neon)
- sub x0, x0, #8*FDEC_STRIDE-8
- mov x30, x7
- b X(add8x8_idct8_neon)
- endfunc
- function add8x8_idct_dc_neon, export=1
- mov x2, #FDEC_STRIDE
- ld1 {v16.4h}, [x1]
- ld1 {v0.8b}, [x0], x2
- srshr v16.4h, v16.4h, #6
- ld1 {v1.8b}, [x0], x2
- dup v20.8h, v16.h[0]
- dup v21.8h, v16.h[1]
- ld1 {v2.8b}, [x0], x2
- dup v22.8h, v16.h[2]
- dup v23.8h, v16.h[3]
- ld1 {v3.8b}, [x0], x2
- trn1 v20.2d, v20.2d, v21.2d
- ld1 {v4.8b}, [x0], x2
- trn1 v21.2d, v22.2d, v23.2d
- ld1 {v5.8b}, [x0], x2
- neg v22.8h, v20.8h
- ld1 {v6.8b}, [x0], x2
- neg v23.8h, v21.8h
- ld1 {v7.8b}, [x0], x2
- sub x0, x0, #8*FDEC_STRIDE
- sqxtun v20.8b, v20.8h
- sqxtun v21.8b, v21.8h
- sqxtun v22.8b, v22.8h
- sqxtun v23.8b, v23.8h
- uqadd v0.8b, v0.8b, v20.8b
- uqadd v1.8b, v1.8b, v20.8b
- uqadd v2.8b, v2.8b, v20.8b
- uqadd v3.8b, v3.8b, v20.8b
- uqadd v4.8b, v4.8b, v21.8b
- uqadd v5.8b, v5.8b, v21.8b
- uqadd v6.8b, v6.8b, v21.8b
- uqadd v7.8b, v7.8b, v21.8b
- uqsub v0.8b, v0.8b, v22.8b
- uqsub v1.8b, v1.8b, v22.8b
- uqsub v2.8b, v2.8b, v22.8b
- uqsub v3.8b, v3.8b, v22.8b
- uqsub v4.8b, v4.8b, v23.8b
- uqsub v5.8b, v5.8b, v23.8b
- uqsub v6.8b, v6.8b, v23.8b
- uqsub v7.8b, v7.8b, v23.8b
- st1 {v0.8b}, [x0], x2
- st1 {v1.8b}, [x0], x2
- st1 {v2.8b}, [x0], x2
- st1 {v3.8b}, [x0], x2
- st1 {v4.8b}, [x0], x2
- st1 {v5.8b}, [x0], x2
- st1 {v6.8b}, [x0], x2
- st1 {v7.8b}, [x0], x2
- ret
- endfunc
- .macro ADD16x4_IDCT_DC dc
- ld1 {v4.16b}, [x0], x3
- dup v24.8h, \dc[0]
- dup v25.8h, \dc[1]
- ld1 {v5.16b}, [x0], x3
- dup v26.8h, \dc[2]
- dup v27.8h, \dc[3]
- ld1 {v6.16b}, [x0], x3
- trn1 v24.2d, v24.2d, v25.2d
- ld1 {v7.16b}, [x0], x3
- trn1 v25.2d, v26.2d, v27.2d
- neg v26.8h, v24.8h
- neg v27.8h, v25.8h
- sqxtun v20.8b, v24.8h
- sqxtun v21.8b, v26.8h
- sqxtun2 v20.16b, v25.8h
- sqxtun2 v21.16b, v27.8h
- uqadd v4.16b, v4.16b, v20.16b
- uqadd v5.16b, v5.16b, v20.16b
- uqadd v6.16b, v6.16b, v20.16b
- uqadd v7.16b, v7.16b, v20.16b
- uqsub v4.16b, v4.16b, v21.16b
- uqsub v5.16b, v5.16b, v21.16b
- uqsub v6.16b, v6.16b, v21.16b
- st1 {v4.16b}, [x2], x3
- uqsub v7.16b, v7.16b, v21.16b
- st1 {v5.16b}, [x2], x3
- st1 {v6.16b}, [x2], x3
- st1 {v7.16b}, [x2], x3
- .endm
- function add16x16_idct_dc_neon, export=1
- mov x2, x0
- mov x3, #FDEC_STRIDE
- ld1 {v0.4h,v1.4h,v2.4h,v3.4h}, [x1]
- srshr v0.4h, v0.4h, #6
- srshr v1.4h, v1.4h, #6
- ADD16x4_IDCT_DC v0.h
- srshr v2.4h, v2.4h, #6
- ADD16x4_IDCT_DC v1.h
- srshr v3.4h, v3.4h, #6
- ADD16x4_IDCT_DC v2.h
- ADD16x4_IDCT_DC v3.h
- ret
- endfunc
- .macro sub4x4x2_dct_dc, dst, t0, t1, t2, t3, t4, t5, t6, t7
- ld1 {\t0\().8b}, [x1], x3
- ld1 {\t1\().8b}, [x2], x4
- ld1 {\t2\().8b}, [x1], x3
- ld1 {\t3\().8b}, [x2], x4
- usubl \t0\().8h, \t0\().8b, \t1\().8b
- ld1 {\t4\().8b}, [x1], x3
- ld1 {\t5\().8b}, [x2], x4
- usubl \t1\().8h, \t2\().8b, \t3\().8b
- ld1 {\t6\().8b}, [x1], x3
- ld1 {\t7\().8b}, [x2], x4
- add \dst\().8h, \t0\().8h, \t1\().8h
- usubl \t2\().8h, \t4\().8b, \t5\().8b
- usubl \t3\().8h, \t6\().8b, \t7\().8b
- add \dst\().8h, \dst\().8h, \t2\().8h
- add \dst\().8h, \dst\().8h, \t3\().8h
- .endm
- function sub8x8_dct_dc_neon, export=1
- mov x3, #FENC_STRIDE
- mov x4, #FDEC_STRIDE
- sub4x4x2_dct_dc v0, v16, v17, v18, v19, v20, v21, v22, v23
- sub4x4x2_dct_dc v1, v24, v25, v26, v27, v28, v29, v30, v31
- transpose v2.2d, v3.2d, v0.2d, v1.2d
- SUMSUB_AB v0.8h, v1.8h, v2.8h, v3.8h
- transpose v2.2d, v3.2d, v0.2d, v1.2d
- SUMSUB_AB v0.8h, v1.8h, v2.8h, v3.8h
- transpose v2.2d, v3.2d, v0.2d, v1.2d
- addp v0.8h, v2.8h, v3.8h
- addp v0.8h, v0.8h, v0.8h
- st1 {v0.4h}, [x0]
- ret
- endfunc
- function sub8x16_dct_dc_neon, export=1
- mov x3, #FENC_STRIDE
- mov x4, #FDEC_STRIDE
- sub4x4x2_dct_dc v0, v16, v17, v18, v19, v20, v21, v22, v23
- sub4x4x2_dct_dc v1, v24, v25, v26, v27, v28, v29, v30, v31
- sub4x4x2_dct_dc v2, v16, v17, v18, v19, v20, v21, v22, v23
- sub4x4x2_dct_dc v3, v24, v25, v26, v27, v28, v29, v30, v31
- addp v4.8h, v0.8h, v2.8h
- addp v5.8h, v1.8h, v3.8h
- transpose v2.4s, v3.4s, v4.4s, v5.4s
- SUMSUB_AB v0.8h, v1.8h, v2.8h, v3.8h
- transpose v2.4s, v3.4s, v0.4s, v1.4s
- SUMSUB_AB v0.8h, v1.8h, v2.8h, v3.8h
- transpose v2.2d, v3.2d, v0.2d, v1.2d
- SUMSUB_AB v0.8h, v1.8h, v2.8h, v3.8h
- trn1 v2.2d, v0.2d, v1.2d
- trn2 v3.2d, v1.2d, v0.2d
- addp v0.8h, v2.8h, v3.8h
- st1 {v0.8h}, [x0]
- ret
- endfunc
- function zigzag_interleave_8x8_cavlc_neon, export=1
- mov x3, #7
- movi v31.4s, #1
- ld4 {v0.8h,v1.8h,v2.8h,v3.8h}, [x1], #64
- ld4 {v4.8h,v5.8h,v6.8h,v7.8h}, [x1], #64
- umax v16.8h, v0.8h, v4.8h
- umax v17.8h, v1.8h, v5.8h
- umax v18.8h, v2.8h, v6.8h
- umax v19.8h, v3.8h, v7.8h
- st1 {v0.8h}, [x0], #16
- st1 {v4.8h}, [x0], #16
- umaxp v16.8h, v16.8h, v17.8h
- umaxp v18.8h, v18.8h, v19.8h
- st1 {v1.8h}, [x0], #16
- st1 {v5.8h}, [x0], #16
- umaxp v16.8h, v16.8h, v18.8h
- st1 {v2.8h}, [x0], #16
- st1 {v6.8h}, [x0], #16
- cmhi v16.4s, v16.4s, v31.4s
- st1 {v3.8h}, [x0], #16
- and v16.16b, v16.16b, v31.16b
- st1 {v7.8h}, [x0], #16
- st1 {v16.b}[0], [x2], #1
- st1 {v16.b}[4], [x2], x3
- st1 {v16.b}[8], [x2], #1
- st1 {v16.b}[12], [x2]
- ret
- endfunc
- function zigzag_scan_4x4_frame_neon, export=1
- movrel x2, scan4x4_frame
- ld1 {v0.16b,v1.16b}, [x1]
- ld1 {v16.16b,v17.16b}, [x2]
- tbl v2.16b, {v0.16b,v1.16b}, v16.16b
- tbl v3.16b, {v0.16b,v1.16b}, v17.16b
- st1 {v2.16b,v3.16b}, [x0]
- ret
- endfunc
- .macro zigzag_sub_4x4 f ac
- function zigzag_sub_4x4\ac\()_\f\()_neon, export=1
- mov x9, #FENC_STRIDE
- mov x4, #FDEC_STRIDE
- movrel x5, sub4x4_\f
- mov x6, x2
- ld1 {v0.s}[0], [x1], x9
- ld1 {v0.s}[1], [x1], x9
- ld1 {v0.s}[2], [x1], x9
- ld1 {v0.s}[3], [x1], x9
- ld1 {v16.16b}, [x5]
- ld1 {v1.s}[0], [x2], x4
- ld1 {v1.s}[1], [x2], x4
- ld1 {v1.s}[2], [x2], x4
- ld1 {v1.s}[3], [x2], x4
- tbl v2.16b, {v0.16b}, v16.16b
- tbl v3.16b, {v1.16b}, v16.16b
- st1 {v0.s}[0], [x6], x4
- usubl v4.8h, v2.8b, v3.8b
- .ifc \ac, ac
- dup h7, v4.h[0]
- ins v4.h[0], wzr
- fmov w5, s7
- strh w5, [x3]
- .endif
- usubl2 v5.8h, v2.16b, v3.16b
- st1 {v0.s}[1], [x6], x4
- umax v6.8h, v4.8h, v5.8h
- umaxv h6, v6.8h
- st1 {v0.s}[2], [x6], x4
- fmov w7, s6
- st1 {v0.s}[3], [x6], x4
- cmp w7, #0
- st1 {v4.8h,v5.8h}, [x0]
- cset w0, ne
- ret
- endfunc
- .endm
- zigzag_sub_4x4 field
- zigzag_sub_4x4 field, ac
- zigzag_sub_4x4 frame
- zigzag_sub_4x4 frame, ac
- function zigzag_scan_4x4_field_neon, export=1
- movrel x2, scan4x4_field
- ld1 {v0.8h,v1.8h}, [x1]
- ld1 {v16.16b}, [x2]
- tbl v0.16b, {v0.16b}, v16.16b
- st1 {v0.8h,v1.8h}, [x0]
- ret
- endfunc
- function zigzag_scan_8x8_frame_neon, export=1
- movrel x2, scan8x8_frame
- ld1 {v0.8h,v1.8h}, [x1], #32
- ld1 {v2.8h,v3.8h}, [x1], #32
- ld1 {v4.8h,v5.8h}, [x1], #32
- ld1 {v6.8h,v7.8h}, [x1]
- ld1 {v16.16b,v17.16b}, [x2], #32
- ld1 {v18.16b,v19.16b}, [x2], #32
- ld1 {v20.16b,v21.16b}, [x2], #32
- ld1 {v22.16b,v23.16b}, [x2], #32
- tbl v24.16b, {v0.16b,v1.16b,v2.16b,v3.16b}, v16.16b
- tbl v25.16b, {v0.16b,v1.16b,v2.16b,v3.16b}, v17.16b
- tbl v26.16b, {v0.16b,v1.16b,v2.16b,v3.16b}, v18.16b
- tbl v27.16b, {v3.16b,v4.16b,v5.16b,v6.16b}, v19.16b
- tbl v28.16b, {v0.16b,v1.16b,v2.16b,v3.16b}, v20.16b
- tbl v29.16b, {v4.16b,v5.16b,v6.16b,v7.16b}, v21.16b
- tbl v30.16b, {v4.16b,v5.16b,v6.16b,v7.16b}, v22.16b
- tbl v31.16b, {v4.16b,v5.16b,v6.16b,v7.16b}, v23.16b
- mov v25.h[6], v4.h[0]
- mov v25.h[7], v5.h[0]
- mov v26.h[0], v4.h[1]
- mov v27.h[4], v7.h[0]
- mov v28.h[7], v4.h[4]
- mov v29.h[7], v3.h[6]
- mov v30.h[0], v2.h[7]
- mov v30.h[1], v3.h[7]
- st1 {v24.8h,v25.8h}, [x0], #32
- st1 {v26.8h,v27.8h}, [x0], #32
- st1 {v28.8h,v29.8h}, [x0], #32
- st1 {v30.8h,v31.8h}, [x0]
- ret
- endfunc
- #define Z(z) 2*(z), 2*(z)+1
- #define T(x,y) Z(x*8+y)
- const scan8x8_frame, align=5
- .byte T(0,0), T(1,0), T(0,1), T(0,2)
- .byte T(1,1), T(2,0), T(3,0), T(2,1)
- .byte T(1,2), T(0,3), T(0,4), T(1,3)
- .byte T(2,2), T(3,1), T(4,0), T(5,0)
- .byte T(4,1), T(3,2), T(2,3), T(1,4)
- .byte T(0,5), T(0,6), T(1,5), T(2,4)
- #undef T
- #define T(x,y) Z((x-3)*8+y)
- .byte T(3,3), T(4,2), T(5,1), T(6,0)
- .byte T(7,0), T(6,1), T(5,2), T(4,3)
- #undef T
- #define T(x,y) Z((x-0)*8+y)
- .byte T(3,4), T(2,5), T(1,6), T(0,7)
- .byte T(1,7), T(2,6), T(3,5), T(4,4)
- #undef T
- #define T(x,y) Z((x-4)*8+y)
- .byte T(5,3), T(6,2), T(7,1), T(7,2)
- .byte T(6,3), T(5,4), T(4,5), T(3,6)
- .byte T(2,7), T(3,7), T(4,6), T(5,5)
- .byte T(6,4), T(7,3), T(7,4), T(6,5)
- .byte T(5,6), T(4,7), T(5,7), T(6,6)
- .byte T(7,5), T(7,6), T(6,7), T(7,7)
- endconst
- function zigzag_scan_8x8_field_neon, export=1
- movrel x2, scan8x8_field
- ld1 {v0.8h,v1.8h}, [x1], #32
- ld1 {v2.8h,v3.8h}, [x1], #32
- ld1 {v4.8h,v5.8h}, [x1], #32
- ld1 {v6.8h,v7.8h}, [x1]
- ld1 {v16.16b,v17.16b}, [x2], #32
- ld1 {v18.16b,v19.16b}, [x2], #32
- ld1 {v20.16b,v21.16b}, [x2], #32
- ld1 {v22.16b}, [x2]
- ext v31.16b, v7.16b, v7.16b, #4
- tbl v24.16b, {v0.16b,v1.16b}, v16.16b
- tbl v25.16b, {v0.16b,v1.16b,v2.16b,v3.16b}, v17.16b
- tbl v26.16b, {v1.16b,v2.16b,v3.16b,v4.16b}, v18.16b
- tbl v27.16b, {v2.16b,v3.16b,v4.16b,v5.16b}, v19.16b
- tbl v28.16b, {v3.16b,v4.16b,v5.16b,v6.16b}, v20.16b
- tbl v29.16b, {v4.16b,v5.16b,v6.16b}, v21.16b
- tbl v30.16b, {v5.16b,v6.16b,v7.16b}, v22.16b
- ext v31.16b, v6.16b, v31.16b, #12
- st1 {v24.8h,v25.8h}, [x0], #32
- st1 {v26.8h,v27.8h}, [x0], #32
- st1 {v28.8h,v29.8h}, [x0], #32
- st1 {v30.8h,v31.8h}, [x0]
- ret
- endfunc
- .macro zigzag_sub8x8 f
- function zigzag_sub_8x8_\f\()_neon, export=1
- movrel x4, sub8x8_\f
- mov x5, #FENC_STRIDE
- mov x6, #FDEC_STRIDE
- mov x7, x2
- ld1 {v0.d}[0], [x1], x5
- ld1 {v0.d}[1], [x1], x5
- ld1 {v1.d}[0], [x1], x5
- ld1 {v1.d}[1], [x1], x5
- ld1 {v2.d}[0], [x1], x5
- ld1 {v2.d}[1], [x1], x5
- ld1 {v3.d}[0], [x1], x5
- ld1 {v3.d}[1], [x1]
- ld1 {v4.d}[0], [x2], x6
- ld1 {v4.d}[1], [x2], x6
- ld1 {v5.d}[0], [x2], x6
- ld1 {v5.d}[1], [x2], x6
- ld1 {v6.d}[0], [x2], x6
- ld1 {v6.d}[1], [x2], x6
- ld1 {v7.d}[0], [x2], x6
- ld1 {v7.d}[1], [x2]
- ld1 {v16.16b,v17.16b}, [x4], #32
- ld1 {v18.16b,v19.16b}, [x4], #32
- tbl v24.16b, {v0.16b,v1.16b,v2.16b,v3.16b}, v16.16b
- tbl v25.16b, {v0.16b,v1.16b,v2.16b,v3.16b}, v17.16b
- tbl v26.16b, {v0.16b,v1.16b,v2.16b,v3.16b}, v18.16b
- tbl v27.16b, {v0.16b,v1.16b,v2.16b,v3.16b}, v19.16b
- tbl v28.16b, {v4.16b,v5.16b,v6.16b,v7.16b}, v16.16b
- tbl v29.16b, {v4.16b,v5.16b,v6.16b,v7.16b}, v17.16b
- tbl v30.16b, {v4.16b,v5.16b,v6.16b,v7.16b}, v18.16b
- tbl v31.16b, {v4.16b,v5.16b,v6.16b,v7.16b}, v19.16b
- usubl v4.8h, v24.8b, v28.8b
- usubl2 v5.8h, v24.16b, v28.16b
- usubl v6.8h, v25.8b, v29.8b
- usubl2 v7.8h, v25.16b, v29.16b
- usubl v16.8h, v26.8b, v30.8b
- usubl2 v17.8h, v26.16b, v30.16b
- usubl v18.8h, v27.8b, v31.8b
- usubl2 v19.8h, v27.16b, v31.16b
- umax v20.8h, v4.8h, v5.8h
- umax v21.8h, v6.8h, v7.8h
- umax v22.8h, v16.8h, v17.8h
- umax v23.8h, v18.8h, v19.8h
- umax v20.8h, v20.8h, v21.8h
- umax v21.8h, v22.8h, v23.8h
- umax v20.8h, v20.8h, v21.8h
- umaxv h22, v20.8h
- st1 {v0.d}[0], [x7], x6
- st1 {v0.d}[1], [x7], x6
- st1 {v1.d}[0], [x7], x6
- st1 {v1.d}[1], [x7], x6
- st1 {v2.d}[0], [x7], x6
- st1 {v2.d}[1], [x7], x6
- st1 {v3.d}[0], [x7], x6
- st1 {v3.d}[1], [x7]
- st1 {v4.8h,v5.8h}, [x0], #32
- st1 {v6.8h,v7.8h}, [x0], #32
- st1 {v16.8h,v17.8h}, [x0], #32
- st1 {v18.8h,v19.8h}, [x0]
- fmov w9, s22
- cmp w9, #0
- cset w0, ne
- ret
- endfunc
- .endm
- zigzag_sub8x8 field
- zigzag_sub8x8 frame
- #undef T
- #define T(x,y) Z(x*8+y)
- const scan8x8_field, align=5
- .byte T(0,0), T(0,1), T(0,2), T(1,0)
- .byte T(1,1), T(0,3), T(0,4), T(1,2)
- .byte T(2,0), T(1,3), T(0,5), T(0,6)
- .byte T(0,7), T(1,4), T(2,1), T(3,0)
- #undef T
- #define T(x,y) Z((x-1)*8+y)
- .byte T(2,2), T(1,5), T(1,6), T(1,7)
- .byte T(2,3), T(3,1), T(4,0), T(3,2)
- #undef T
- #define T(x,y) Z((x-2)*8+y)
- .byte T(2,4), T(2,5), T(2,6), T(2,7)
- .byte T(3,3), T(4,1), T(5,0), T(4,2)
- #undef T
- #define T(x,y) Z((x-3)*8+y)
- .byte T(3,4), T(3,5), T(3,6), T(3,7)
- .byte T(4,3), T(5,1), T(6,0), T(5,2)
- #undef T
- #define T(x,y) Z((x-4)*8+y)
- .byte T(4,4), T(4,5), T(4,6), T(4,7)
- .byte T(5,3), T(6,1), T(6,2), T(5,4)
- #undef T
- #define T(x,y) Z((x-5)*8+y)
- .byte T(5,5), T(5,6), T(5,7), T(6,3)
- .byte T(7,0), T(7,1), T(6,4), T(6,5)
- endconst
- #undef T
- #define T(y,x) x*8+y
- const sub8x8_frame, align=5
- .byte T(0,0), T(1,0), T(0,1), T(0,2)
- .byte T(1,1), T(2,0), T(3,0), T(2,1)
- .byte T(1,2), T(0,3), T(0,4), T(1,3)
- .byte T(2,2), T(3,1), T(4,0), T(5,0)
- .byte T(4,1), T(3,2), T(2,3), T(1,4)
- .byte T(0,5), T(0,6), T(1,5), T(2,4)
- .byte T(3,3), T(4,2), T(5,1), T(6,0)
- .byte T(7,0), T(6,1), T(5,2), T(4,3)
- .byte T(3,4), T(2,5), T(1,6), T(0,7)
- .byte T(1,7), T(2,6), T(3,5), T(4,4)
- .byte T(5,3), T(6,2), T(7,1), T(7,2)
- .byte T(6,3), T(5,4), T(4,5), T(3,6)
- .byte T(2,7), T(3,7), T(4,6), T(5,5)
- .byte T(6,4), T(7,3), T(7,4), T(6,5)
- .byte T(5,6), T(4,7), T(5,7), T(6,6)
- .byte T(7,5), T(7,6), T(6,7), T(7,7)
- endconst
- const sub8x8_field, align=5
- .byte T(0,0), T(0,1), T(0,2), T(1,0)
- .byte T(1,1), T(0,3), T(0,4), T(1,2)
- .byte T(2,0), T(1,3), T(0,5), T(0,6)
- .byte T(0,7), T(1,4), T(2,1), T(3,0)
- .byte T(2,2), T(1,5), T(1,6), T(1,7)
- .byte T(2,3), T(3,1), T(4,0), T(3,2)
- .byte T(2,4), T(2,5), T(2,6), T(2,7)
- .byte T(3,3), T(4,1), T(5,0), T(4,2)
- .byte T(3,4), T(3,5), T(3,6), T(3,7)
- .byte T(4,3), T(5,1), T(6,0), T(5,2)
- .byte T(4,4), T(4,5), T(4,6), T(4,7)
- .byte T(5,3), T(6,1), T(6,2), T(5,4)
- .byte T(5,5), T(5,6), T(5,7), T(6,3)
- .byte T(7,0), T(7,1), T(6,4), T(6,5)
- .byte T(6,6), T(6,7), T(7,2), T(7,3)
- .byte T(7,4), T(7,5), T(7,6), T(7,7)
- endconst
|