Created
March 3, 2015 16:30
-
-
Save ijsf/9ee4dc523f2fd71c0259 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* autogenerated from gstaudiopack.orc */ | |
#ifdef HAVE_CONFIG_H | |
#include "config.h" | |
#endif | |
#include <glib.h> | |
#ifndef _ORC_INTEGER_TYPEDEFS_ | |
#define _ORC_INTEGER_TYPEDEFS_ | |
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L | |
#include <stdint.h> | |
typedef int8_t orc_int8; | |
typedef int16_t orc_int16; | |
typedef int32_t orc_int32; | |
typedef int64_t orc_int64; | |
typedef uint8_t orc_uint8; | |
typedef uint16_t orc_uint16; | |
typedef uint32_t orc_uint32; | |
typedef uint64_t orc_uint64; | |
#define ORC_UINT64_C(x) UINT64_C(x) | |
#elif defined(_MSC_VER) | |
typedef signed __int8 orc_int8; | |
typedef signed __int16 orc_int16; | |
typedef signed __int32 orc_int32; | |
typedef signed __int64 orc_int64; | |
typedef unsigned __int8 orc_uint8; | |
typedef unsigned __int16 orc_uint16; | |
typedef unsigned __int32 orc_uint32; | |
typedef unsigned __int64 orc_uint64; | |
#define ORC_UINT64_C(x) (x##Ui64) | |
#define inline __inline | |
#else | |
#include <limits.h> | |
typedef signed char orc_int8; | |
typedef short orc_int16; | |
typedef int orc_int32; | |
typedef unsigned char orc_uint8; | |
typedef unsigned short orc_uint16; | |
typedef unsigned int orc_uint32; | |
#if INT_MAX == LONG_MAX | |
typedef long long orc_int64; | |
typedef unsigned long long orc_uint64; | |
#define ORC_UINT64_C(x) (x##ULL) | |
#else | |
typedef long orc_int64; | |
typedef unsigned long orc_uint64; | |
#define ORC_UINT64_C(x) (x##UL) | |
#endif | |
#endif | |
typedef union { orc_int16 i; orc_int8 x2[2]; } orc_union16; | |
typedef union { orc_int32 i; float f; orc_int16 x2[2]; orc_int8 x4[4]; } orc_union32; | |
typedef union { orc_int64 i; double f; orc_int32 x2[2]; float x2f[2]; orc_int16 x4[4]; } orc_union64; | |
#endif | |
#ifndef ORC_RESTRICT | |
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L | |
#define ORC_RESTRICT restrict | |
#elif defined(__GNUC__) && __GNUC__ >= 4 | |
#define ORC_RESTRICT __restrict__ | |
#else | |
#define ORC_RESTRICT | |
#endif | |
#endif | |
#ifndef ORC_INTERNAL | |
#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590) | |
#define ORC_INTERNAL __attribute__((visibility("hidden"))) | |
#elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550) | |
#define ORC_INTERNAL __hidden | |
#elif defined (__GNUC__) | |
#define ORC_INTERNAL __attribute__((visibility("hidden"))) | |
#else | |
#define ORC_INTERNAL | |
#endif | |
#endif | |
#ifndef DISABLE_ORC | |
#include <orc/orc.h> | |
#endif | |
void audio_orc_unpack_u8 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_s8 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_u16 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_s16 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_u16_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_s16_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_u24_32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_s24_32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_u24_32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_s24_32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_u32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_u32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_s32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_s32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_f32 (gdouble * ORC_RESTRICT d1, const gfloat * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_f32_swap (gdouble * ORC_RESTRICT d1, const gfloat * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_f64 (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_f64_swap (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_u8 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_s8 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_u16 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_s16 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_u16_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_s16_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_u24_32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_s24_32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_u24_32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_s24_32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_u32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_s32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_u32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_s32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_f32 (gfloat * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_f32_swap (gfloat * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_f64 (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_f64_swap (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n); | |
void audio_orc_splat_u16 (guint16 * ORC_RESTRICT d1, int p1, int n); | |
void audio_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n); | |
void audio_orc_splat_u64 (guint64 * ORC_RESTRICT d1, int p1, int n); | |
/* begin Orc C target preamble */ | |
#define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x))) | |
#define ORC_ABS(a) ((a)<0 ? -(a) : (a)) | |
#define ORC_MIN(a,b) ((a)<(b) ? (a) : (b)) | |
#define ORC_MAX(a,b) ((a)>(b) ? (a) : (b)) | |
#define ORC_SB_MAX 127 | |
#define ORC_SB_MIN (-1-ORC_SB_MAX) | |
#define ORC_UB_MAX 255 | |
#define ORC_UB_MIN 0 | |
#define ORC_SW_MAX 32767 | |
#define ORC_SW_MIN (-1-ORC_SW_MAX) | |
#define ORC_UW_MAX 65535 | |
#define ORC_UW_MIN 0 | |
#define ORC_SL_MAX 2147483647 | |
#define ORC_SL_MIN (-1-ORC_SL_MAX) | |
#define ORC_UL_MAX 4294967295U | |
#define ORC_UL_MIN 0 | |
#define ORC_CLAMP_SB(x) ORC_CLAMP(x,ORC_SB_MIN,ORC_SB_MAX) | |
#define ORC_CLAMP_UB(x) ORC_CLAMP(x,ORC_UB_MIN,ORC_UB_MAX) | |
#define ORC_CLAMP_SW(x) ORC_CLAMP(x,ORC_SW_MIN,ORC_SW_MAX) | |
#define ORC_CLAMP_UW(x) ORC_CLAMP(x,ORC_UW_MIN,ORC_UW_MAX) | |
#define ORC_CLAMP_SL(x) ORC_CLAMP(x,ORC_SL_MIN,ORC_SL_MAX) | |
#define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX) | |
#define ORC_SWAP_W(x) ((((x)&0xffU)<<8) | (((x)&0xff00U)>>8)) | |
#define ORC_SWAP_L(x) ((((x)&0xffU)<<24) | (((x)&0xff00U)<<8) | (((x)&0xff0000U)>>8) | (((x)&0xff000000U)>>24)) | |
#define ORC_SWAP_Q(x) ((((x)&ORC_UINT64_C(0xff))<<56) | (((x)&ORC_UINT64_C(0xff00))<<40) | (((x)&ORC_UINT64_C(0xff0000))<<24) | (((x)&ORC_UINT64_C(0xff000000))<<8) | (((x)&ORC_UINT64_C(0xff00000000))>>8) | (((x)&ORC_UINT64_C(0xff0000000000))>>24) | (((x)&ORC_UINT64_C(0xff000000000000))>>40) | (((x)&ORC_UINT64_C(0xff00000000000000))>>56)) | |
#define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset))) | |
#define ORC_DENORMAL(x) ((x) & ((((x)&0x7f800000) == 0) ? 0xff800000 : 0xffffffff)) | |
#define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0)) | |
#define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff))) | |
#define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0)) | |
#ifndef ORC_RESTRICT | |
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L | |
#define ORC_RESTRICT restrict | |
#elif defined(__GNUC__) && __GNUC__ >= 4 | |
#define ORC_RESTRICT __restrict__ | |
#else | |
#define ORC_RESTRICT | |
#endif | |
#endif | |
/* end Orc C target preamble */ | |
/* audio_orc_unpack_u8 (static implementation) */ | |
asm("\ | |
.global _audio_orc_unpack_u8 \n\ | |
_audio_orc_unpack_u8: \n\ | |
# 4: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadb \n\ | |
vld1.8 d6[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q3, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 d6, d6, #24 \n\ | |
# 5: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 6: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000001 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadb \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q3, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 q3, q3, #24 \n\ | |
# 5: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 6: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadb \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q3, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 q3, q3, #24 \n\ | |
# 5: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 6: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadb \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q3, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 q3, q3, #24 \n\ | |
# 5: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 6: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadb \n\ | |
vld1.8 d6[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q3, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 d6, d6, #24 \n\ | |
# 5: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 6: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000001 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
"); | |
/* audio_orc_unpack_s8 (static implementation) */ | |
asm("\ | |
.global _audio_orc_unpack_s8 \n\ | |
_audio_orc_unpack_s8: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadb \n\ | |
vld1.8 d4[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q2, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 d4, d4, #24 \n\ | |
# 4: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000001 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadb \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q2, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 q2, q2, #24 \n\ | |
# 4: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadb \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q2, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 q2, q2, #24 \n\ | |
# 4: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadb \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q2, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 q2, q2, #24 \n\ | |
# 4: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadb \n\ | |
vld1.8 d4[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q2, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 d4, d4, #24 \n\ | |
# 4: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000001 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
"); | |
/* audio_orc_unpack_u16 (static implementation) */ | |
asm("\ | |
.global _audio_orc_unpack_u16 \n\ | |
_audio_orc_unpack_u16: \n\ | |
# 3: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadw \n\ | |
vld1.16 d6[0], [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 2: shll \n\ | |
vshl.i32 d6, d6, #16 \n\ | |
# 4: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 5: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000002 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d6, [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 2: shll \n\ | |
vshl.i32 q3, q3, #16 \n\ | |
# 4: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 5: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d6, [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 2: shll \n\ | |
vshl.i32 q3, q3, #16 \n\ | |
# 4: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 5: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d6, [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 2: shll \n\ | |
vshl.i32 q3, q3, #16 \n\ | |
# 4: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 5: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadw \n\ | |
vld1.16 d6[0], [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 2: shll \n\ | |
vshl.i32 d6, d6, #16 \n\ | |
# 4: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 5: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000002 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
nop \n\ | |
"); | |
/* audio_orc_unpack_s16 (static implementation) */ | |
asm("\ | |
.global _audio_orc_unpack_s16 \n\ | |
_audio_orc_unpack_s16: \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #2 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #3 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 2: shll \n\ | |
vshl.i32 q2, q2, #16 \n\ | |
# 3: storel \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadw \n\ | |
vld1.16 d4[0], [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 2: shll \n\ | |
vshl.i32 d4, d4, #16 \n\ | |
# 3: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000002 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 2: shll \n\ | |
vshl.i32 q2, q2, #16 \n\ | |
# 3: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 2: shll \n\ | |
vshl.i32 q2, q2, #16 \n\ | |
# 3: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 2: shll \n\ | |
vshl.i32 q2, q2, #16 \n\ | |
# 3: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadw \n\ | |
vld1.16 d4[0], [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 2: shll \n\ | |
vshl.i32 d4, d4, #16 \n\ | |
# 3: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000002 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
"); | |
/* audio_orc_unpack_u16_swap (static implementation) */ | |
asm("\ | |
.global _audio_orc_unpack_u16_swap \n\ | |
_audio_orc_unpack_u16_swap: \n\ | |
# 4: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadw \n\ | |
vld1.16 d6[0], [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 d6, d6, #16 \n\ | |
# 5: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 6: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000002 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d6, [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 q3, q3, #16 \n\ | |
# 5: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 6: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d6, [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 q3, q3, #16 \n\ | |
# 5: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 6: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d6, [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 q3, q3, #16 \n\ | |
# 5: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 6: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadw \n\ | |
vld1.16 d6[0], [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 d6, d6, #16 \n\ | |
# 5: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 6: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000002 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
"); | |
/* audio_orc_unpack_s16_swap (static implementation) */ | |
asm("\ | |
.global _audio_orc_unpack_s16_swap \n\ | |
_audio_orc_unpack_s16_swap: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadw \n\ | |
vld1.16 d4[0], [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 d4, d4, #16 \n\ | |
# 4: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000002 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 q2, q2, #16 \n\ | |
# 4: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 q2, q2, #16 \n\ | |
# 4: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 q2, q2, #16 \n\ | |
# 4: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadw \n\ | |
vld1.16 d4[0], [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 d4, d4, #16 \n\ | |
# 4: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000002 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
"); | |
/* audio_orc_unpack_u24_32 (static implementation) */ | |
asm("\ | |
.global _audio_orc_unpack_u24_32 \n\ | |
_audio_orc_unpack_u24_32: \n\ | |
# 2: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 d6, d6, #8 \n\ | |
# 3: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 4: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 q3, q3, #8 \n\ | |
# 3: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 4: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 q3, q3, #8 \n\ | |
# 3: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 4: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 q3, q3, #8 \n\ | |
# 3: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 4: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 d6, d6, #8 \n\ | |
# 3: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 4: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
nop \n\ | |
nop \n\ | |
"); | |
/* audio_orc_unpack_s24_32 (static implementation) */ | |
asm("\ | |
.global _audio_orc_unpack_s24_32 \n\ | |
_audio_orc_unpack_s24_32: \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #2 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #3 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 q2, q2, #8 \n\ | |
# 2: storel \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 d4, d4, #8 \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 q2, q2, #8 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 q2, q2, #8 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 q2, q2, #8 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 d4, d4, #8 \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
nop \n\ | |
"); | |
/* audio_orc_unpack_u24_32_swap (static implementation) */ | |
asm("\ | |
.global _audio_orc_unpack_u24_32_swap \n\ | |
_audio_orc_unpack_u24_32_swap: \n\ | |
# 3: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d6, d6 \n\ | |
# 2: shll \n\ | |
vshl.i32 d6, d6, #8 \n\ | |
# 4: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 5: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q3, q3 \n\ | |
# 2: shll \n\ | |
vshl.i32 q3, q3, #8 \n\ | |
# 4: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 5: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q3, q3 \n\ | |
# 2: shll \n\ | |
vshl.i32 q3, q3, #8 \n\ | |
# 4: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 5: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q3, q3 \n\ | |
# 2: shll \n\ | |
vshl.i32 q3, q3, #8 \n\ | |
# 4: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 5: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d6, d6 \n\ | |
# 2: shll \n\ | |
vshl.i32 d6, d6, #8 \n\ | |
# 4: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 5: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
nop \n\ | |
"); | |
/* audio_orc_unpack_s24_32_swap (static implementation) */ | |
asm("\ | |
.global _audio_orc_unpack_s24_32_swap \n\ | |
_audio_orc_unpack_s24_32_swap: \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #2 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #3 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: shll \n\ | |
vshl.i32 q2, q2, #8 \n\ | |
# 3: storel \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d4, d4 \n\ | |
# 2: shll \n\ | |
vshl.i32 d4, d4, #8 \n\ | |
# 3: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: shll \n\ | |
vshl.i32 q2, q2, #8 \n\ | |
# 3: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: shll \n\ | |
vshl.i32 q2, q2, #8 \n\ | |
# 3: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: shll \n\ | |
vshl.i32 q2, q2, #8 \n\ | |
# 3: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d4, d4 \n\ | |
# 2: shll \n\ | |
vshl.i32 d4, d4, #8 \n\ | |
# 3: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
"); | |
/* audio_orc_unpack_u32 (static implementation) */ | |
asm("\ | |
.global _audio_orc_unpack_u32 \n\ | |
_audio_orc_unpack_u32: \n\ | |
# 1: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #2 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #3 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.8 { d6, d7 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
"); | |
/* audio_orc_unpack_u32_swap (static implementation) */ | |
asm("\ | |
.global _audio_orc_unpack_u32_swap \n\ | |
_audio_orc_unpack_u32_swap: \n\ | |
# 2: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d6, d6 \n\ | |
# 3: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 4: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q3, q3 \n\ | |
# 3: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 4: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q3, q3 \n\ | |
# 3: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 4: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q3, q3 \n\ | |
# 3: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 4: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d6, d6 \n\ | |
# 3: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 4: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
nop \n\ | |
nop \n\ | |
"); | |
/* audio_orc_unpack_s32 (static implementation) */ | |
asm("\ | |
.global _audio_orc_unpack_s32 \n\ | |
_audio_orc_unpack_s32: \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #2 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #3 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
"); | |
/* audio_orc_unpack_s32_swap (static implementation) */ | |
asm("\ | |
.global _audio_orc_unpack_s32_swap \n\ | |
_audio_orc_unpack_s32_swap: \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #2 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #3 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d4, d4 \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d4, d4 \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
nop \n\ | |
"); | |
/* audio_orc_unpack_f32 (static implementation) */ | |
void | |
audio_orc_unpack_f32 (gdouble * ORC_RESTRICT d1, const gfloat * ORC_RESTRICT s1, int n){ | |
int i; | |
orc_union64 * ORC_RESTRICT ptr0; | |
const orc_union32 * ORC_RESTRICT ptr4; | |
orc_union32 var32; | |
orc_union64 var33; | |
ptr0 = (orc_union64 *)d1; | |
ptr4 = (orc_union32 *)s1; | |
for (i = 0; i < n; i++) { | |
/* 0: loadl */ | |
var32 = ptr4[i]; | |
/* 1: convfd */ | |
{ | |
orc_union32 _src1; | |
_src1.i = ORC_DENORMAL(var32.i); | |
var33.f = _src1.f; | |
} | |
/* 2: storeq */ | |
ptr0[i] = var33; | |
} | |
} | |
/* audio_orc_unpack_f32_swap (static implementation) */ | |
void | |
audio_orc_unpack_f32_swap (gdouble * ORC_RESTRICT d1, const gfloat * ORC_RESTRICT s1, int n){ | |
int i; | |
orc_union64 * ORC_RESTRICT ptr0; | |
const orc_union32 * ORC_RESTRICT ptr4; | |
orc_union32 var33; | |
orc_union64 var34; | |
orc_union32 var35; | |
ptr0 = (orc_union64 *)d1; | |
ptr4 = (orc_union32 *)s1; | |
for (i = 0; i < n; i++) { | |
/* 0: loadl */ | |
var33 = ptr4[i]; | |
/* 1: swapl */ | |
var35.i = ORC_SWAP_L(var33.i); | |
/* 2: convfd */ | |
{ | |
orc_union32 _src1; | |
_src1.i = ORC_DENORMAL(var35.i); | |
var34.f = _src1.f; | |
} | |
/* 3: storeq */ | |
ptr0[i] = var34; | |
} | |
} | |
/* audio_orc_unpack_f64 (static implementation) */ | |
asm("\ | |
.global _audio_orc_unpack_f64 \n\ | |
_audio_orc_unpack_f64: \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #1 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #1 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #3 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #1 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #1 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadq \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.8 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #19 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #15 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadq \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.8 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
"); | |
/* audio_orc_unpack_f64_swap (static implementation) */ | |
asm("\ | |
.global _audio_orc_unpack_f64_swap \n\ | |
_audio_orc_unpack_f64_swap: \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #1 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #1 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #3 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #1 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #1 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadq \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 d4, d4 \n\ | |
# 2: storeq \n\ | |
vst1.8 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #19 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #15 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadq \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 d4, d4 \n\ | |
# 2: storeq \n\ | |
vst1.8 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
nop \n\ | |
"); | |
/* audio_orc_pack_u8 (static implementation) */ | |
asm("\ | |
.global _audio_orc_pack_u8 \n\ | |
_audio_orc_pack_u8: \n\ | |
# 1: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #24 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: convwb \n\ | |
vmovn.i16 d6, q3 \n\ | |
# 6: storeb \n\ | |
vst1.8 d6[0], [r2] \n\ | |
add r2, r2, #0x00000001 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #15 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #24 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: convwb \n\ | |
vmovn.i16 d6, q3 \n\ | |
# 6: storeb \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #11 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #24 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: convwb \n\ | |
vmovn.i16 d6, q3 \n\ | |
# 6: storeb \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #24 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: convwb \n\ | |
vmovn.i16 d6, q3 \n\ | |
# 6: storeb \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #24 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: convwb \n\ | |
vmovn.i16 d6, q3 \n\ | |
# 6: storeb \n\ | |
vst1.8 d6[0], [r2] \n\ | |
add r2, r2, #0x00000001 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
nop \n\ | |
"); | |
/* audio_orc_pack_s8 (static implementation) */ | |
asm("\ | |
.global _audio_orc_pack_s8 \n\ | |
_audio_orc_pack_s8: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #24 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: convwb \n\ | |
vmovn.i16 d4, q2 \n\ | |
# 4: storeb \n\ | |
vst1.8 d4[0], [r2] \n\ | |
add r2, r2, #0x00000001 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #15 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #24 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: convwb \n\ | |
vmovn.i16 d4, q2 \n\ | |
# 4: storeb \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #11 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #24 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: convwb \n\ | |
vmovn.i16 d4, q2 \n\ | |
# 4: storeb \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #24 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: convwb \n\ | |
vmovn.i16 d4, q2 \n\ | |
# 4: storeb \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #24 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: convwb \n\ | |
vmovn.i16 d4, q2 \n\ | |
# 4: storeb \n\ | |
vst1.8 d4[0], [r2] \n\ | |
add r2, r2, #0x00000001 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
"); | |
/* audio_orc_pack_u16 (static implementation) */ | |
asm("\ | |
.global _audio_orc_pack_u16 \n\ | |
_audio_orc_pack_u16: \n\ | |
# 1: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #1 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: storew \n\ | |
vst1.16 d6[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #16 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: storew \n\ | |
vst1.64 d6, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #12 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: storew \n\ | |
vst1.64 d6, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: storew \n\ | |
vst1.64 d6, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: storew \n\ | |
vst1.16 d6[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
nop \n\ | |
"); | |
/* audio_orc_pack_s16 (static implementation) */ | |
asm("\ | |
.global _audio_orc_pack_s16 \n\ | |
_audio_orc_pack_s16: \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #2 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #3 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: storew \n\ | |
vst1.8 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #1 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: storew \n\ | |
vst1.16 d4[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #16 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: storew \n\ | |
vst1.64 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #12 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: storew \n\ | |
vst1.64 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: storew \n\ | |
vst1.64 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: storew \n\ | |
vst1.16 d4[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
"); | |
/* audio_orc_pack_u16_swap (static implementation) */ | |
asm("\ | |
.global _audio_orc_pack_u16_swap \n\ | |
_audio_orc_pack_u16_swap: \n\ | |
# 1: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #1 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 6: storew \n\ | |
vst1.16 d6[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #16 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 6: storew \n\ | |
vst1.64 d6, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #12 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 6: storew \n\ | |
vst1.64 d6, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 6: storew \n\ | |
vst1.64 d6, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 6: storew \n\ | |
vst1.16 d6[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
"); | |
/* audio_orc_pack_s16_swap (static implementation) */ | |
asm("\ | |
.global _audio_orc_pack_s16_swap \n\ | |
_audio_orc_pack_s16_swap: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #1 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 4: storew \n\ | |
vst1.16 d4[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #16 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 4: storew \n\ | |
vst1.64 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #12 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 4: storew \n\ | |
vst1.64 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 4: storew \n\ | |
vst1.64 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 4: storew \n\ | |
vst1.16 d4[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
"); | |
/* audio_orc_pack_u24_32 (static implementation) */ | |
asm("\ | |
.global _audio_orc_pack_u24_32 \n\ | |
_audio_orc_pack_u24_32: \n\ | |
# 1: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #8 \n\ | |
# 4: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #8 \n\ | |
# 4: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #8 \n\ | |
# 4: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #8 \n\ | |
# 4: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #8 \n\ | |
# 4: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
nop \n\ | |
nop \n\ | |
"); | |
/* audio_orc_pack_s24_32 (static implementation) */ | |
asm("\ | |
.global _audio_orc_pack_s24_32 \n\ | |
_audio_orc_pack_s24_32: \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #2 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #3 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #8 \n\ | |
# 2: storel \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #8 \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #8 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #8 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #8 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #8 \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
nop \n\ | |
"); | |
/* audio_orc_pack_u24_32_swap (static implementation) */ | |
asm("\ | |
.global _audio_orc_pack_u24_32_swap \n\ | |
_audio_orc_pack_u24_32_swap: \n\ | |
# 1: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #8 \n\ | |
# 4: swapl \n\ | |
vrev32.i8 d6, d6 \n\ | |
# 5: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #8 \n\ | |
# 4: swapl \n\ | |
vrev32.i8 q3, q3 \n\ | |
# 5: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #8 \n\ | |
# 4: swapl \n\ | |
vrev32.i8 q3, q3 \n\ | |
# 5: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #8 \n\ | |
# 4: swapl \n\ | |
vrev32.i8 q3, q3 \n\ | |
# 5: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #8 \n\ | |
# 4: swapl \n\ | |
vrev32.i8 d6, d6 \n\ | |
# 5: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
nop \n\ | |
"); | |
/* audio_orc_pack_s24_32_swap (static implementation) */ | |
asm("\ | |
.global _audio_orc_pack_s24_32_swap \n\ | |
_audio_orc_pack_s24_32_swap: \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #2 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #3 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #8 \n\ | |
# 2: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 3: storel \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #8 \n\ | |
# 2: swapl \n\ | |
vrev32.i8 d4, d4 \n\ | |
# 3: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #8 \n\ | |
# 2: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #8 \n\ | |
# 2: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #8 \n\ | |
# 2: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #8 \n\ | |
# 2: swapl \n\ | |
vrev32.i8 d4, d4 \n\ | |
# 3: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
"); | |
/* audio_orc_pack_u32 (static implementation) */ | |
asm("\ | |
.global _audio_orc_pack_u32 \n\ | |
_audio_orc_pack_u32: \n\ | |
# 1: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #2 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #3 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.8 { d6, d7 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
"); | |
/* audio_orc_pack_s32 (static implementation) */ | |
asm("\ | |
.global _audio_orc_pack_s32 \n\ | |
_audio_orc_pack_s32: \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #2 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #3 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
"); | |
/* audio_orc_pack_u32_swap (static implementation) */ | |
asm("\ | |
.global _audio_orc_pack_u32_swap \n\ | |
_audio_orc_pack_u32_swap: \n\ | |
# 1: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #2 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #3 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.8 { d6, d7 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
"); | |
/* audio_orc_pack_s32_swap (static implementation) */ | |
asm("\ | |
.global _audio_orc_pack_s32_swap \n\ | |
_audio_orc_pack_s32_swap: \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #2 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #3 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d4, d4 \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d4, d4 \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
nop \n\ | |
"); | |
/* audio_orc_pack_f32 (static implementation) */ | |
void | |
audio_orc_pack_f32 (gfloat * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n){ | |
int i; | |
orc_union32 * ORC_RESTRICT ptr0; | |
const orc_union64 * ORC_RESTRICT ptr4; | |
orc_union64 var32; | |
orc_union32 var33; | |
ptr0 = (orc_union32 *)d1; | |
ptr4 = (orc_union64 *)s1; | |
for (i = 0; i < n; i++) { | |
/* 0: loadq */ | |
var32 = ptr4[i]; | |
/* 1: convdf */ | |
{ | |
orc_union64 _src1; | |
orc_union32 _dest; | |
_src1.i = ORC_DENORMAL_DOUBLE(var32.i); | |
_dest.f = _src1.f; | |
var33.i = ORC_DENORMAL(_dest.i); | |
} | |
/* 2: storel */ | |
ptr0[i] = var33; | |
} | |
} | |
/* audio_orc_pack_f32_swap (static implementation) */ | |
void | |
audio_orc_pack_f32_swap (gfloat * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n){ | |
int i; | |
orc_union32 * ORC_RESTRICT ptr0; | |
const orc_union64 * ORC_RESTRICT ptr4; | |
orc_union64 var33; | |
orc_union32 var34; | |
orc_union32 var35; | |
ptr0 = (orc_union32 *)d1; | |
ptr4 = (orc_union64 *)s1; | |
for (i = 0; i < n; i++) { | |
/* 0: loadq */ | |
var33 = ptr4[i]; | |
/* 1: convdf */ | |
{ | |
orc_union64 _src1; | |
orc_union32 _dest; | |
_src1.i = ORC_DENORMAL_DOUBLE(var33.i); | |
_dest.f = _src1.f; | |
var35.i = ORC_DENORMAL(_dest.i); | |
} | |
/* 2: swapl */ | |
var34.i = ORC_SWAP_L(var35.i); | |
/* 3: storel */ | |
ptr0[i] = var34; | |
} | |
} | |
/* audio_orc_pack_f64 (static implementation) */ | |
asm("\ | |
.global _audio_orc_pack_f64 \n\ | |
_audio_orc_pack_f64: \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #1 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #1 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #3 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #1 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #1 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadq \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.8 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #19 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #15 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadq \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.8 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
"); | |
/* audio_orc_pack_f64_swap (static implementation) */ | |
asm("\ | |
.global _audio_orc_pack_f64_swap \n\ | |
_audio_orc_pack_f64_swap: \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #1 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #1 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #3 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #1 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #1 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr r3, [r0, #56] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadq \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 d4, d4 \n\ | |
# 2: storeq \n\ | |
vst1.8 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #19 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #15 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadq \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 d4, d4 \n\ | |
# 2: storeq \n\ | |
vst1.8 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
nop \n\ | |
"); | |
/* audio_orc_splat_u16 (static implementation) */ | |
asm("\ | |
.global _audio_orc_splat_u16 \n\ | |
_audio_orc_splat_u16: \n\ | |
# 0: loadpw \n\ | |
add r1, r0, #0x00000278 \n\ | |
vld1.16 {d4[],d5[]}, [r1] \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #3 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #7 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 3 \n\ | |
# 1: copyw \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storew \n\ | |
vst1.8 { d6, d7 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #1 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #3 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #7 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 1: copyw \n\ | |
vorr d6, d4, d4 \n\ | |
# 2: storew \n\ | |
vst1.16 d6[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #15 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 3 \n\ | |
# 1: copyw \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storew \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #11 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 3 \n\ | |
# 1: copyw \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storew \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 3 \n\ | |
# 1: copyw \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storew \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 1: copyw \n\ | |
vorr d6, d4, d4 \n\ | |
# 2: storew \n\ | |
vst1.16 d6[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
nop \n\ | |
"); | |
/* audio_orc_splat_u32 (static implementation) */ | |
asm("\ | |
.global _audio_orc_splat_u32 \n\ | |
_audio_orc_splat_u32: \n\ | |
# 0: loadpl \n\ | |
add r1, r0, #0x00000278 \n\ | |
vld1.32 {d4[],d5[]}, [r1] \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #2 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #3 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 1: copyl \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.8 { d6, d7 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #3 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 1: copyl \n\ | |
vorr d6, d4, d4 \n\ | |
# 2: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 1: copyl \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 1: copyl \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 1: copyl \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 1: copyl \n\ | |
vorr d6, d4, d4 \n\ | |
# 2: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
nop \n\ | |
"); | |
/* audio_orc_splat_u64 (static implementation) */ | |
asm("\ | |
.global _audio_orc_splat_u64 \n\ | |
_audio_orc_splat_u64: \n\ | |
# 0: loadpq \n\ | |
add r1, r0, #0x00000278 \n\ | |
vld1.32 d4[0], [r1] \n\ | |
vld1.32 d5[0], [r1] \n\ | |
add r1, r0, #0x00000298 \n\ | |
vld1.32 d4[1], [r1] \n\ | |
vld1.32 d5[1], [r1] \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt 4f \n\ | |
asr r1, r2, #1 \n\ | |
str r1, [r0, #16] \n\ | |
and r2, r2, #1 \n\ | |
str r2, [r0, #20] \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 1: copyq \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.8 { d6, d7 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne 3b \n\ | |
b 12f \n\ | |
4: \n\ | |
mov ip, #0x00000010 \n\ | |
ldr r1, [r0, #24] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #3 \n\ | |
ldr r2, [r0, #8] \n\ | |
cmp r2, ip \n\ | |
ble 1f \n\ | |
str ip, [r0, #12] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #1 \n\ | |
str r2, [r0, #16] \n\ | |
and r2, r1, #1 \n\ | |
str r2, [r0, #20] \n\ | |
b 2f \n\ | |
1: \n\ | |
str r2, [r0, #12] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, [r0, #16] \n\ | |
str r2, [r0, #20] \n\ | |
2: \n\ | |
ldr r2, [r0, #24] \n\ | |
ldr ip, [r0, #12] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 6f \n\ | |
5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 1: copyq \n\ | |
vorr d6, d4, d4 \n\ | |
# 2: storeq \n\ | |
vst1.8 d6, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
bne 5b \n\ | |
6: \n\ | |
ldr ip, [r0, #16] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 12f \n\ | |
asr r1, ip, #19 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 11f \n\ | |
9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 1: copyq \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne 9b \n\ | |
b 12f \n\ | |
11: \n\ | |
asr r1, ip, #15 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq 10f \n\ | |
8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 1: copyq \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne 8b \n\ | |
b 12f \n\ | |
10: \n\ | |
7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 1: copyq \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne 7b \n\ | |
12: \n\ | |
ldr ip, [r0, #20] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq 14f \n\ | |
13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 1: copyq \n\ | |
vorr d6, d4, d4 \n\ | |
# 2: storeq \n\ | |
vst1.8 d6, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
bne 13b \n\ | |
14: \n\ | |
bx lr \n\ | |
nop \n\ | |
nop \n\ | |
"); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment