Skip to content

Instantly share code, notes, and snippets.

@ijsf
Created March 4, 2015 01:28
Show Gist options
  • Save ijsf/0d61f27ec065ca601e8d to your computer and use it in GitHub Desktop.
Save ijsf/0d61f27ec065ca601e8d to your computer and use it in GitHub Desktop.
/* autogenerated from gstaudiopack.orc */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <glib.h>
#ifndef _ORC_INTEGER_TYPEDEFS_
#define _ORC_INTEGER_TYPEDEFS_
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#include <stdint.h>
typedef int8_t orc_int8;
typedef int16_t orc_int16;
typedef int32_t orc_int32;
typedef int64_t orc_int64;
typedef uint8_t orc_uint8;
typedef uint16_t orc_uint16;
typedef uint32_t orc_uint32;
typedef uint64_t orc_uint64;
#define ORC_UINT64_C(x) UINT64_C(x)
#elif defined(_MSC_VER)
typedef signed __int8 orc_int8;
typedef signed __int16 orc_int16;
typedef signed __int32 orc_int32;
typedef signed __int64 orc_int64;
typedef unsigned __int8 orc_uint8;
typedef unsigned __int16 orc_uint16;
typedef unsigned __int32 orc_uint32;
typedef unsigned __int64 orc_uint64;
#define ORC_UINT64_C(x) (x##Ui64)
#define inline __inline
#else
#include <limits.h>
typedef signed char orc_int8;
typedef short orc_int16;
typedef int orc_int32;
typedef unsigned char orc_uint8;
typedef unsigned short orc_uint16;
typedef unsigned int orc_uint32;
#if INT_MAX == LONG_MAX
typedef long long orc_int64;
typedef unsigned long long orc_uint64;
#define ORC_UINT64_C(x) (x##ULL)
#else
typedef long orc_int64;
typedef unsigned long orc_uint64;
#define ORC_UINT64_C(x) (x##UL)
#endif
#endif
typedef union { orc_int16 i; orc_int8 x2[2]; } orc_union16;
typedef union { orc_int32 i; float f; orc_int16 x2[2]; orc_int8 x4[4]; } orc_union32;
typedef union { orc_int64 i; double f; orc_int32 x2[2]; float x2f[2]; orc_int16 x4[4]; } orc_union64;
#endif
#ifndef ORC_RESTRICT
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#define ORC_RESTRICT restrict
#elif defined(__GNUC__) && __GNUC__ >= 4
#define ORC_RESTRICT __restrict__
#else
#define ORC_RESTRICT
#endif
#endif
#ifndef ORC_INTERNAL
#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
#define ORC_INTERNAL __attribute__((visibility("hidden")))
#elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
#define ORC_INTERNAL __hidden
#elif defined (__GNUC__)
#define ORC_INTERNAL __attribute__((visibility("hidden")))
#else
#define ORC_INTERNAL
#endif
#endif
#ifndef DISABLE_ORC
#include <orc/orc.h>
#endif
void audio_orc_unpack_u8 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n);
void audio_orc_unpack_s8 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n);
void audio_orc_unpack_u16 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n);
void audio_orc_unpack_s16 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n);
void audio_orc_unpack_u16_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n);
void audio_orc_unpack_s16_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n);
void audio_orc_unpack_u24_32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n);
void audio_orc_unpack_s24_32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n);
void audio_orc_unpack_u24_32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n);
void audio_orc_unpack_s24_32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n);
void audio_orc_unpack_u32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n);
void audio_orc_unpack_u32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n);
void audio_orc_unpack_s32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n);
void audio_orc_unpack_s32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n);
void audio_orc_unpack_f32 (gdouble * ORC_RESTRICT d1, const gfloat * ORC_RESTRICT s1, int n);
void audio_orc_unpack_f32_swap (gdouble * ORC_RESTRICT d1, const gfloat * ORC_RESTRICT s1, int n);
void audio_orc_unpack_f64 (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n);
void audio_orc_unpack_f64_swap (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n);
void audio_orc_pack_u8 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n);
void audio_orc_pack_s8 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n);
void audio_orc_pack_u16 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n);
void audio_orc_pack_s16 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n);
void audio_orc_pack_u16_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n);
void audio_orc_pack_s16_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n);
void audio_orc_pack_u24_32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n);
void audio_orc_pack_s24_32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n);
void audio_orc_pack_u24_32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n);
void audio_orc_pack_s24_32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n);
void audio_orc_pack_u32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n);
void audio_orc_pack_s32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n);
void audio_orc_pack_u32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n);
void audio_orc_pack_s32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n);
void audio_orc_pack_f32 (gfloat * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n);
void audio_orc_pack_f32_swap (gfloat * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n);
void audio_orc_pack_f64 (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n);
void audio_orc_pack_f64_swap (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n);
void audio_orc_splat_u16 (guint16 * ORC_RESTRICT d1, int p1, int n);
void audio_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n);
void audio_orc_splat_u64 (guint64 * ORC_RESTRICT d1, int p1, int n);
/* begin Orc C target preamble */
#define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x)))
#define ORC_ABS(a) ((a)<0 ? -(a) : (a))
#define ORC_MIN(a,b) ((a)<(b) ? (a) : (b))
#define ORC_MAX(a,b) ((a)>(b) ? (a) : (b))
#define ORC_SB_MAX 127
#define ORC_SB_MIN (-1-ORC_SB_MAX)
#define ORC_UB_MAX 255
#define ORC_UB_MIN 0
#define ORC_SW_MAX 32767
#define ORC_SW_MIN (-1-ORC_SW_MAX)
#define ORC_UW_MAX 65535
#define ORC_UW_MIN 0
#define ORC_SL_MAX 2147483647
#define ORC_SL_MIN (-1-ORC_SL_MAX)
#define ORC_UL_MAX 4294967295U
#define ORC_UL_MIN 0
#define ORC_CLAMP_SB(x) ORC_CLAMP(x,ORC_SB_MIN,ORC_SB_MAX)
#define ORC_CLAMP_UB(x) ORC_CLAMP(x,ORC_UB_MIN,ORC_UB_MAX)
#define ORC_CLAMP_SW(x) ORC_CLAMP(x,ORC_SW_MIN,ORC_SW_MAX)
#define ORC_CLAMP_UW(x) ORC_CLAMP(x,ORC_UW_MIN,ORC_UW_MAX)
#define ORC_CLAMP_SL(x) ORC_CLAMP(x,ORC_SL_MIN,ORC_SL_MAX)
#define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX)
#define ORC_SWAP_W(x) ((((x)&0xffU)<<8) | (((x)&0xff00U)>>8))
#define ORC_SWAP_L(x) ((((x)&0xffU)<<24) | (((x)&0xff00U)<<8) | (((x)&0xff0000U)>>8) | (((x)&0xff000000U)>>24))
#define ORC_SWAP_Q(x) ((((x)&ORC_UINT64_C(0xff))<<56) | (((x)&ORC_UINT64_C(0xff00))<<40) | (((x)&ORC_UINT64_C(0xff0000))<<24) | (((x)&ORC_UINT64_C(0xff000000))<<8) | (((x)&ORC_UINT64_C(0xff00000000))>>8) | (((x)&ORC_UINT64_C(0xff0000000000))>>24) | (((x)&ORC_UINT64_C(0xff000000000000))>>40) | (((x)&ORC_UINT64_C(0xff00000000000000))>>56))
#define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset)))
#define ORC_DENORMAL(x) ((x) & ((((x)&0x7f800000) == 0) ? 0xff800000 : 0xffffffff))
#define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0))
#define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff)))
#define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0))
#ifndef ORC_RESTRICT
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#define ORC_RESTRICT restrict
#elif defined(__GNUC__) && __GNUC__ >= 4
#define ORC_RESTRICT __restrict__
#else
#define ORC_RESTRICT
#endif
#endif
/* end Orc C target preamble */
/* audio_orc_unpack_u8 (static implementation) */
void
audio_orc_unpack_u8 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
# 4: loadpl \n\
vmov.i32 q2, #0x00000000 \n\
vorr.i32 q2, #0x80000000 \n\
Laudio_orc_unpack_u815: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_unpack_u81 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_unpack_u82 \n\
Laudio_orc_unpack_u81: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_unpack_u82: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u86 \n\
Laudio_orc_unpack_u85: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadb \n\
vld1.8 d6[0], [r3] \n\
# 1: convubw \n\
vmovl.u8 q3, d6 \n\
# 2: convuwl \n\
vmovl.u16 q3, d6 \n\
# 3: shll \n\
vshl.i32 d6, d6, #24 \n\
# 5: xorl \n\
veor d6, d6, d4 \n\
# 6: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000001 \n\
bne Laudio_orc_unpack_u85 \n\
Laudio_orc_unpack_u86: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u812 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_u811 \n\
Laudio_orc_unpack_u89: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadb \n\
vld1.32 d6[0], [r3] \n\
# 1: convubw \n\
vmovl.u8 q3, d6 \n\
# 2: convuwl \n\
vmovl.u16 q3, d6 \n\
# 3: shll \n\
vshl.i32 q3, q3, #24 \n\
# 5: xorl \n\
veor q3, q3, q2 \n\
# 6: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_u89 \n\
b Laudio_orc_unpack_u812 \n\
Laudio_orc_unpack_u811: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_u810 \n\
Laudio_orc_unpack_u88: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadb \n\
vld1.32 d6[0], [r3] \n\
# 1: convubw \n\
vmovl.u8 q3, d6 \n\
# 2: convuwl \n\
vmovl.u16 q3, d6 \n\
# 3: shll \n\
vshl.i32 q3, q3, #24 \n\
# 5: xorl \n\
veor q3, q3, q2 \n\
# 6: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_u88 \n\
b Laudio_orc_unpack_u812 \n\
Laudio_orc_unpack_u810: \n\
Laudio_orc_unpack_u87: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadb \n\
vld1.32 d6[0], [r3] \n\
# 1: convubw \n\
vmovl.u8 q3, d6 \n\
# 2: convuwl \n\
vmovl.u16 q3, d6 \n\
# 3: shll \n\
vshl.i32 q3, q3, #24 \n\
# 5: xorl \n\
veor q3, q3, q2 \n\
# 6: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_u87 \n\
Laudio_orc_unpack_u812: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u814 \n\
Laudio_orc_unpack_u813: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadb \n\
vld1.8 d6[0], [r3] \n\
# 1: convubw \n\
vmovl.u8 q3, d6 \n\
# 2: convuwl \n\
vmovl.u16 q3, d6 \n\
# 3: shll \n\
vshl.i32 d6, d6, #24 \n\
# 5: xorl \n\
veor d6, d6, d4 \n\
# 6: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000001 \n\
bne Laudio_orc_unpack_u813 \n\
Laudio_orc_unpack_u814: \n\
nop \n\
nop \n\
Laudio_orc_unpack_u820: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_unpack_s8 (static implementation) */
void
audio_orc_unpack_s8 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
Laudio_orc_unpack_s815: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_unpack_s81 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_unpack_s82 \n\
Laudio_orc_unpack_s81: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_unpack_s82: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s86 \n\
Laudio_orc_unpack_s85: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadb \n\
vld1.8 d4[0], [r3] \n\
# 1: convubw \n\
vmovl.u8 q2, d4 \n\
# 2: convuwl \n\
vmovl.u16 q2, d4 \n\
# 3: shll \n\
vshl.i32 d4, d4, #24 \n\
# 4: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000001 \n\
bne Laudio_orc_unpack_s85 \n\
Laudio_orc_unpack_s86: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s812 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_s811 \n\
Laudio_orc_unpack_s89: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadb \n\
vld1.32 d4[0], [r3] \n\
# 1: convubw \n\
vmovl.u8 q2, d4 \n\
# 2: convuwl \n\
vmovl.u16 q2, d4 \n\
# 3: shll \n\
vshl.i32 q2, q2, #24 \n\
# 4: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_s89 \n\
b Laudio_orc_unpack_s812 \n\
Laudio_orc_unpack_s811: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_s810 \n\
Laudio_orc_unpack_s88: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadb \n\
vld1.32 d4[0], [r3] \n\
# 1: convubw \n\
vmovl.u8 q2, d4 \n\
# 2: convuwl \n\
vmovl.u16 q2, d4 \n\
# 3: shll \n\
vshl.i32 q2, q2, #24 \n\
# 4: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_s88 \n\
b Laudio_orc_unpack_s812 \n\
Laudio_orc_unpack_s810: \n\
Laudio_orc_unpack_s87: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadb \n\
vld1.32 d4[0], [r3] \n\
# 1: convubw \n\
vmovl.u8 q2, d4 \n\
# 2: convuwl \n\
vmovl.u16 q2, d4 \n\
# 3: shll \n\
vshl.i32 q2, q2, #24 \n\
# 4: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_s87 \n\
Laudio_orc_unpack_s812: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s814 \n\
Laudio_orc_unpack_s813: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadb \n\
vld1.8 d4[0], [r3] \n\
# 1: convubw \n\
vmovl.u8 q2, d4 \n\
# 2: convuwl \n\
vmovl.u16 q2, d4 \n\
# 3: shll \n\
vshl.i32 d4, d4, #24 \n\
# 4: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000001 \n\
bne Laudio_orc_unpack_s813 \n\
Laudio_orc_unpack_s814: \n\
nop \n\
Laudio_orc_unpack_s820: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_unpack_u16 (static implementation) */
void
audio_orc_unpack_u16 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
# 3: loadpl \n\
vmov.i32 q2, #0x00000000 \n\
vorr.i32 q2, #0x80000000 \n\
Laudio_orc_unpack_u1615: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_unpack_u161 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_unpack_u162 \n\
Laudio_orc_unpack_u161: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_unpack_u162: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u166 \n\
Laudio_orc_unpack_u165: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadw \n\
vld1.16 d6[0], [r3] \n\
# 1: convuwl \n\
vmovl.u16 q3, d6 \n\
# 2: shll \n\
vshl.i32 d6, d6, #16 \n\
# 4: xorl \n\
veor d6, d6, d4 \n\
# 5: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000002 \n\
bne Laudio_orc_unpack_u165 \n\
Laudio_orc_unpack_u166: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u1612 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_u1611 \n\
Laudio_orc_unpack_u169: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadw \n\
vld1.8 d6, [r3] \n\
# 1: convuwl \n\
vmovl.u16 q3, d6 \n\
# 2: shll \n\
vshl.i32 q3, q3, #16 \n\
# 4: xorl \n\
veor q3, q3, q2 \n\
# 5: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_unpack_u169 \n\
b Laudio_orc_unpack_u1612 \n\
Laudio_orc_unpack_u1611: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_u1610 \n\
Laudio_orc_unpack_u168: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadw \n\
vld1.8 d6, [r3] \n\
# 1: convuwl \n\
vmovl.u16 q3, d6 \n\
# 2: shll \n\
vshl.i32 q3, q3, #16 \n\
# 4: xorl \n\
veor q3, q3, q2 \n\
# 5: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_unpack_u168 \n\
b Laudio_orc_unpack_u1612 \n\
Laudio_orc_unpack_u1610: \n\
Laudio_orc_unpack_u167: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadw \n\
vld1.8 d6, [r3] \n\
# 1: convuwl \n\
vmovl.u16 q3, d6 \n\
# 2: shll \n\
vshl.i32 q3, q3, #16 \n\
# 4: xorl \n\
veor q3, q3, q2 \n\
# 5: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_unpack_u167 \n\
Laudio_orc_unpack_u1612: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u1614 \n\
Laudio_orc_unpack_u1613: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadw \n\
vld1.16 d6[0], [r3] \n\
# 1: convuwl \n\
vmovl.u16 q3, d6 \n\
# 2: shll \n\
vshl.i32 d6, d6, #16 \n\
# 4: xorl \n\
veor d6, d6, d4 \n\
# 5: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000002 \n\
bne Laudio_orc_unpack_u1613 \n\
Laudio_orc_unpack_u1614: \n\
nop \n\
nop \n\
nop \n\
Laudio_orc_unpack_u1620: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_unpack_s16 (static implementation) */
void
audio_orc_unpack_s16 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
Laudio_orc_unpack_s1615: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_unpack_s164 \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s1612 \n\
Laudio_orc_unpack_s163: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadw \n\
vld1.8 d4, [r3] \n\
# 1: convuwl \n\
vmovl.u16 q2, d4 \n\
# 2: shll \n\
vshl.i32 q2, q2, #16 \n\
# 3: storel \n\
vst1.8 { d4, d5 }, [r2] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_unpack_s163 \n\
b Laudio_orc_unpack_s1612 \n\
Laudio_orc_unpack_s164: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_unpack_s161 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_unpack_s162 \n\
Laudio_orc_unpack_s161: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_unpack_s162: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s166 \n\
Laudio_orc_unpack_s165: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadw \n\
vld1.16 d4[0], [r3] \n\
# 1: convuwl \n\
vmovl.u16 q2, d4 \n\
# 2: shll \n\
vshl.i32 d4, d4, #16 \n\
# 3: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000002 \n\
bne Laudio_orc_unpack_s165 \n\
Laudio_orc_unpack_s166: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s1612 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_s1611 \n\
Laudio_orc_unpack_s169: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadw \n\
vld1.8 d4, [r3] \n\
# 1: convuwl \n\
vmovl.u16 q2, d4 \n\
# 2: shll \n\
vshl.i32 q2, q2, #16 \n\
# 3: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_unpack_s169 \n\
b Laudio_orc_unpack_s1612 \n\
Laudio_orc_unpack_s1611: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_s1610 \n\
Laudio_orc_unpack_s168: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadw \n\
vld1.8 d4, [r3] \n\
# 1: convuwl \n\
vmovl.u16 q2, d4 \n\
# 2: shll \n\
vshl.i32 q2, q2, #16 \n\
# 3: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_unpack_s168 \n\
b Laudio_orc_unpack_s1612 \n\
Laudio_orc_unpack_s1610: \n\
Laudio_orc_unpack_s167: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadw \n\
vld1.8 d4, [r3] \n\
# 1: convuwl \n\
vmovl.u16 q2, d4 \n\
# 2: shll \n\
vshl.i32 q2, q2, #16 \n\
# 3: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_unpack_s167 \n\
Laudio_orc_unpack_s1612: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s1614 \n\
Laudio_orc_unpack_s1613: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadw \n\
vld1.16 d4[0], [r3] \n\
# 1: convuwl \n\
vmovl.u16 q2, d4 \n\
# 2: shll \n\
vshl.i32 d4, d4, #16 \n\
# 3: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000002 \n\
bne Laudio_orc_unpack_s1613 \n\
Laudio_orc_unpack_s1614: \n\
nop \n\
Laudio_orc_unpack_s1620: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_unpack_u16_swap (static implementation) */
void
audio_orc_unpack_u16_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
# 4: loadpl \n\
vmov.i32 q2, #0x00000000 \n\
vorr.i32 q2, #0x80000000 \n\
Laudio_orc_unpack_u16_swap15: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_unpack_u16_swap1 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_unpack_u16_swap2 \n\
Laudio_orc_unpack_u16_swap1: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_unpack_u16_swap2: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u16_swap6 \n\
Laudio_orc_unpack_u16_swap5: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadw \n\
vld1.16 d6[0], [r3] \n\
# 1: swapw \n\
vrev16.i8 d6, d6 \n\
# 2: convuwl \n\
vmovl.u16 q3, d6 \n\
# 3: shll \n\
vshl.i32 d6, d6, #16 \n\
# 5: xorl \n\
veor d6, d6, d4 \n\
# 6: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000002 \n\
bne Laudio_orc_unpack_u16_swap5 \n\
Laudio_orc_unpack_u16_swap6: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u16_swap12 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_u16_swap11 \n\
Laudio_orc_unpack_u16_swap9: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadw \n\
vld1.8 d6, [r3] \n\
# 1: swapw \n\
vrev16.i8 d6, d6 \n\
# 2: convuwl \n\
vmovl.u16 q3, d6 \n\
# 3: shll \n\
vshl.i32 q3, q3, #16 \n\
# 5: xorl \n\
veor q3, q3, q2 \n\
# 6: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_unpack_u16_swap9 \n\
b Laudio_orc_unpack_u16_swap12 \n\
Laudio_orc_unpack_u16_swap11: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_u16_swap10 \n\
Laudio_orc_unpack_u16_swap8: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadw \n\
vld1.8 d6, [r3] \n\
# 1: swapw \n\
vrev16.i8 d6, d6 \n\
# 2: convuwl \n\
vmovl.u16 q3, d6 \n\
# 3: shll \n\
vshl.i32 q3, q3, #16 \n\
# 5: xorl \n\
veor q3, q3, q2 \n\
# 6: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_unpack_u16_swap8 \n\
b Laudio_orc_unpack_u16_swap12 \n\
Laudio_orc_unpack_u16_swap10: \n\
Laudio_orc_unpack_u16_swap7: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadw \n\
vld1.8 d6, [r3] \n\
# 1: swapw \n\
vrev16.i8 d6, d6 \n\
# 2: convuwl \n\
vmovl.u16 q3, d6 \n\
# 3: shll \n\
vshl.i32 q3, q3, #16 \n\
# 5: xorl \n\
veor q3, q3, q2 \n\
# 6: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_unpack_u16_swap7 \n\
Laudio_orc_unpack_u16_swap12: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u16_swap14 \n\
Laudio_orc_unpack_u16_swap13: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadw \n\
vld1.16 d6[0], [r3] \n\
# 1: swapw \n\
vrev16.i8 d6, d6 \n\
# 2: convuwl \n\
vmovl.u16 q3, d6 \n\
# 3: shll \n\
vshl.i32 d6, d6, #16 \n\
# 5: xorl \n\
veor d6, d6, d4 \n\
# 6: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000002 \n\
bne Laudio_orc_unpack_u16_swap13 \n\
Laudio_orc_unpack_u16_swap14: \n\
nop \n\
nop \n\
Laudio_orc_unpack_u16_swap20: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_unpack_s16_swap (static implementation) */
void
audio_orc_unpack_s16_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
Laudio_orc_unpack_s16_swap15: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_unpack_s16_swap1 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_unpack_s16_swap2 \n\
Laudio_orc_unpack_s16_swap1: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_unpack_s16_swap2: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s16_swap6 \n\
Laudio_orc_unpack_s16_swap5: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadw \n\
vld1.16 d4[0], [r3] \n\
# 1: swapw \n\
vrev16.i8 d4, d4 \n\
# 2: convuwl \n\
vmovl.u16 q2, d4 \n\
# 3: shll \n\
vshl.i32 d4, d4, #16 \n\
# 4: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000002 \n\
bne Laudio_orc_unpack_s16_swap5 \n\
Laudio_orc_unpack_s16_swap6: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s16_swap12 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_s16_swap11 \n\
Laudio_orc_unpack_s16_swap9: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadw \n\
vld1.8 d4, [r3] \n\
# 1: swapw \n\
vrev16.i8 d4, d4 \n\
# 2: convuwl \n\
vmovl.u16 q2, d4 \n\
# 3: shll \n\
vshl.i32 q2, q2, #16 \n\
# 4: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_unpack_s16_swap9 \n\
b Laudio_orc_unpack_s16_swap12 \n\
Laudio_orc_unpack_s16_swap11: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_s16_swap10 \n\
Laudio_orc_unpack_s16_swap8: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadw \n\
vld1.8 d4, [r3] \n\
# 1: swapw \n\
vrev16.i8 d4, d4 \n\
# 2: convuwl \n\
vmovl.u16 q2, d4 \n\
# 3: shll \n\
vshl.i32 q2, q2, #16 \n\
# 4: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_unpack_s16_swap8 \n\
b Laudio_orc_unpack_s16_swap12 \n\
Laudio_orc_unpack_s16_swap10: \n\
Laudio_orc_unpack_s16_swap7: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadw \n\
vld1.8 d4, [r3] \n\
# 1: swapw \n\
vrev16.i8 d4, d4 \n\
# 2: convuwl \n\
vmovl.u16 q2, d4 \n\
# 3: shll \n\
vshl.i32 q2, q2, #16 \n\
# 4: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_unpack_s16_swap7 \n\
Laudio_orc_unpack_s16_swap12: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s16_swap14 \n\
Laudio_orc_unpack_s16_swap13: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadw \n\
vld1.16 d4[0], [r3] \n\
# 1: swapw \n\
vrev16.i8 d4, d4 \n\
# 2: convuwl \n\
vmovl.u16 q2, d4 \n\
# 3: shll \n\
vshl.i32 d4, d4, #16 \n\
# 4: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000002 \n\
bne Laudio_orc_unpack_s16_swap13 \n\
Laudio_orc_unpack_s16_swap14: \n\
nop \n\
Laudio_orc_unpack_s16_swap20: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_unpack_u24_32 (static implementation) */
void
audio_orc_unpack_u24_32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
# 2: loadpl \n\
vmov.i32 q2, #0x00000000 \n\
vorr.i32 q2, #0x80000000 \n\
Laudio_orc_unpack_u24_3215: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_unpack_u24_321 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_unpack_u24_322 \n\
Laudio_orc_unpack_u24_321: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_unpack_u24_322: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u24_326 \n\
Laudio_orc_unpack_u24_325: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 1: shll \n\
vshl.i32 d6, d6, #8 \n\
# 3: xorl \n\
veor d6, d6, d4 \n\
# 4: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_u24_325 \n\
Laudio_orc_unpack_u24_326: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u24_3212 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_u24_3211 \n\
Laudio_orc_unpack_u24_329: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 1: shll \n\
vshl.i32 q3, q3, #8 \n\
# 3: xorl \n\
veor q3, q3, q2 \n\
# 4: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_u24_329 \n\
b Laudio_orc_unpack_u24_3212 \n\
Laudio_orc_unpack_u24_3211: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_u24_3210 \n\
Laudio_orc_unpack_u24_328: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 1: shll \n\
vshl.i32 q3, q3, #8 \n\
# 3: xorl \n\
veor q3, q3, q2 \n\
# 4: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_u24_328 \n\
b Laudio_orc_unpack_u24_3212 \n\
Laudio_orc_unpack_u24_3210: \n\
Laudio_orc_unpack_u24_327: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 1: shll \n\
vshl.i32 q3, q3, #8 \n\
# 3: xorl \n\
veor q3, q3, q2 \n\
# 4: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_u24_327 \n\
Laudio_orc_unpack_u24_3212: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u24_3214 \n\
Laudio_orc_unpack_u24_3213: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 1: shll \n\
vshl.i32 d6, d6, #8 \n\
# 3: xorl \n\
veor d6, d6, d4 \n\
# 4: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_u24_3213 \n\
Laudio_orc_unpack_u24_3214: \n\
Laudio_orc_unpack_u24_3220: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_unpack_s24_32 (static implementation) */
void
audio_orc_unpack_s24_32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
Laudio_orc_unpack_s24_3215: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_unpack_s24_324 \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s24_3212 \n\
Laudio_orc_unpack_s24_323: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shll \n\
vshl.i32 q2, q2, #8 \n\
# 2: storel \n\
vst1.8 { d4, d5 }, [r2] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_s24_323 \n\
b Laudio_orc_unpack_s24_3212 \n\
Laudio_orc_unpack_s24_324: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_unpack_s24_321 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_unpack_s24_322 \n\
Laudio_orc_unpack_s24_321: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_unpack_s24_322: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s24_326 \n\
Laudio_orc_unpack_s24_325: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: shll \n\
vshl.i32 d4, d4, #8 \n\
# 2: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_s24_325 \n\
Laudio_orc_unpack_s24_326: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s24_3212 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_s24_3211 \n\
Laudio_orc_unpack_s24_329: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shll \n\
vshl.i32 q2, q2, #8 \n\
# 2: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_s24_329 \n\
b Laudio_orc_unpack_s24_3212 \n\
Laudio_orc_unpack_s24_3211: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_s24_3210 \n\
Laudio_orc_unpack_s24_328: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shll \n\
vshl.i32 q2, q2, #8 \n\
# 2: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_s24_328 \n\
b Laudio_orc_unpack_s24_3212 \n\
Laudio_orc_unpack_s24_3210: \n\
Laudio_orc_unpack_s24_327: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shll \n\
vshl.i32 q2, q2, #8 \n\
# 2: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_s24_327 \n\
Laudio_orc_unpack_s24_3212: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s24_3214 \n\
Laudio_orc_unpack_s24_3213: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: shll \n\
vshl.i32 d4, d4, #8 \n\
# 2: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_s24_3213 \n\
Laudio_orc_unpack_s24_3214: \n\
nop \n\
nop \n\
nop \n\
Laudio_orc_unpack_s24_3220: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_unpack_u24_32_swap (static implementation) */
void
audio_orc_unpack_u24_32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
# 3: loadpl \n\
vmov.i32 q2, #0x00000000 \n\
vorr.i32 q2, #0x80000000 \n\
Laudio_orc_unpack_u24_32_swap15: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_unpack_u24_32_swap1 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_unpack_u24_32_swap2 \n\
Laudio_orc_unpack_u24_32_swap1: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_unpack_u24_32_swap2: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u24_32_swap6 \n\
Laudio_orc_unpack_u24_32_swap5: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 1: swapl \n\
vrev32.i8 d6, d6 \n\
# 2: shll \n\
vshl.i32 d6, d6, #8 \n\
# 4: xorl \n\
veor d6, d6, d4 \n\
# 5: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_u24_32_swap5 \n\
Laudio_orc_unpack_u24_32_swap6: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u24_32_swap12 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_u24_32_swap11 \n\
Laudio_orc_unpack_u24_32_swap9: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 1: swapl \n\
vrev32.i8 q3, q3 \n\
# 2: shll \n\
vshl.i32 q3, q3, #8 \n\
# 4: xorl \n\
veor q3, q3, q2 \n\
# 5: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_u24_32_swap9 \n\
b Laudio_orc_unpack_u24_32_swap12 \n\
Laudio_orc_unpack_u24_32_swap11: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_u24_32_swap10 \n\
Laudio_orc_unpack_u24_32_swap8: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 1: swapl \n\
vrev32.i8 q3, q3 \n\
# 2: shll \n\
vshl.i32 q3, q3, #8 \n\
# 4: xorl \n\
veor q3, q3, q2 \n\
# 5: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_u24_32_swap8 \n\
b Laudio_orc_unpack_u24_32_swap12 \n\
Laudio_orc_unpack_u24_32_swap10: \n\
Laudio_orc_unpack_u24_32_swap7: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 1: swapl \n\
vrev32.i8 q3, q3 \n\
# 2: shll \n\
vshl.i32 q3, q3, #8 \n\
# 4: xorl \n\
veor q3, q3, q2 \n\
# 5: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_u24_32_swap7 \n\
Laudio_orc_unpack_u24_32_swap12: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u24_32_swap14 \n\
Laudio_orc_unpack_u24_32_swap13: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 1: swapl \n\
vrev32.i8 d6, d6 \n\
# 2: shll \n\
vshl.i32 d6, d6, #8 \n\
# 4: xorl \n\
veor d6, d6, d4 \n\
# 5: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_u24_32_swap13 \n\
Laudio_orc_unpack_u24_32_swap14: \n\
nop \n\
nop \n\
nop \n\
Laudio_orc_unpack_u24_32_swap20: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_unpack_s24_32_swap (static implementation) */
void
audio_orc_unpack_s24_32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
Laudio_orc_unpack_s24_32_swap15: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_unpack_s24_32_swap4 \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s24_32_swap12 \n\
Laudio_orc_unpack_s24_32_swap3: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapl \n\
vrev32.i8 q2, q2 \n\
# 2: shll \n\
vshl.i32 q2, q2, #8 \n\
# 3: storel \n\
vst1.8 { d4, d5 }, [r2] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_s24_32_swap3 \n\
b Laudio_orc_unpack_s24_32_swap12 \n\
Laudio_orc_unpack_s24_32_swap4: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_unpack_s24_32_swap1 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_unpack_s24_32_swap2 \n\
Laudio_orc_unpack_s24_32_swap1: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_unpack_s24_32_swap2: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s24_32_swap6 \n\
Laudio_orc_unpack_s24_32_swap5: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: swapl \n\
vrev32.i8 d4, d4 \n\
# 2: shll \n\
vshl.i32 d4, d4, #8 \n\
# 3: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_s24_32_swap5 \n\
Laudio_orc_unpack_s24_32_swap6: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s24_32_swap12 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_s24_32_swap11 \n\
Laudio_orc_unpack_s24_32_swap9: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapl \n\
vrev32.i8 q2, q2 \n\
# 2: shll \n\
vshl.i32 q2, q2, #8 \n\
# 3: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_s24_32_swap9 \n\
b Laudio_orc_unpack_s24_32_swap12 \n\
Laudio_orc_unpack_s24_32_swap11: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_s24_32_swap10 \n\
Laudio_orc_unpack_s24_32_swap8: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapl \n\
vrev32.i8 q2, q2 \n\
# 2: shll \n\
vshl.i32 q2, q2, #8 \n\
# 3: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_s24_32_swap8 \n\
b Laudio_orc_unpack_s24_32_swap12 \n\
Laudio_orc_unpack_s24_32_swap10: \n\
Laudio_orc_unpack_s24_32_swap7: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapl \n\
vrev32.i8 q2, q2 \n\
# 2: shll \n\
vshl.i32 q2, q2, #8 \n\
# 3: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_s24_32_swap7 \n\
Laudio_orc_unpack_s24_32_swap12: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s24_32_swap14 \n\
Laudio_orc_unpack_s24_32_swap13: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: swapl \n\
vrev32.i8 d4, d4 \n\
# 2: shll \n\
vshl.i32 d4, d4, #8 \n\
# 3: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_s24_32_swap13 \n\
Laudio_orc_unpack_s24_32_swap14: \n\
nop \n\
Laudio_orc_unpack_s24_32_swap20: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_unpack_u32 (static implementation) */
void
audio_orc_unpack_u32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
# 1: loadpl \n\
vmov.i32 q2, #0x00000000 \n\
vorr.i32 q2, #0x80000000 \n\
Laudio_orc_unpack_u3215: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_unpack_u324 \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u3212 \n\
Laudio_orc_unpack_u323: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: storel \n\
vst1.8 { d6, d7 }, [r2] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_u323 \n\
b Laudio_orc_unpack_u3212 \n\
Laudio_orc_unpack_u324: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_unpack_u321 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_unpack_u322 \n\
Laudio_orc_unpack_u321: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_unpack_u322: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u326 \n\
Laudio_orc_unpack_u325: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 2: xorl \n\
veor d6, d6, d4 \n\
# 3: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_u325 \n\
Laudio_orc_unpack_u326: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u3212 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_u3211 \n\
Laudio_orc_unpack_u329: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_u329 \n\
b Laudio_orc_unpack_u3212 \n\
Laudio_orc_unpack_u3211: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_u3210 \n\
Laudio_orc_unpack_u328: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_u328 \n\
b Laudio_orc_unpack_u3212 \n\
Laudio_orc_unpack_u3210: \n\
Laudio_orc_unpack_u327: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_u327 \n\
Laudio_orc_unpack_u3212: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u3214 \n\
Laudio_orc_unpack_u3213: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 2: xorl \n\
veor d6, d6, d4 \n\
# 3: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_u3213 \n\
Laudio_orc_unpack_u3214: \n\
nop \n\
Laudio_orc_unpack_u3220: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_unpack_u32_swap (static implementation) */
void
audio_orc_unpack_u32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
# 2: loadpl \n\
vmov.i32 q2, #0x00000000 \n\
vorr.i32 q2, #0x80000000 \n\
Laudio_orc_unpack_u32_swap15: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_unpack_u32_swap1 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_unpack_u32_swap2 \n\
Laudio_orc_unpack_u32_swap1: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_unpack_u32_swap2: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u32_swap6 \n\
Laudio_orc_unpack_u32_swap5: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 1: swapl \n\
vrev32.i8 d6, d6 \n\
# 3: xorl \n\
veor d6, d6, d4 \n\
# 4: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_u32_swap5 \n\
Laudio_orc_unpack_u32_swap6: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u32_swap12 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_u32_swap11 \n\
Laudio_orc_unpack_u32_swap9: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 1: swapl \n\
vrev32.i8 q3, q3 \n\
# 3: xorl \n\
veor q3, q3, q2 \n\
# 4: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_u32_swap9 \n\
b Laudio_orc_unpack_u32_swap12 \n\
Laudio_orc_unpack_u32_swap11: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_u32_swap10 \n\
Laudio_orc_unpack_u32_swap8: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 1: swapl \n\
vrev32.i8 q3, q3 \n\
# 3: xorl \n\
veor q3, q3, q2 \n\
# 4: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_u32_swap8 \n\
b Laudio_orc_unpack_u32_swap12 \n\
Laudio_orc_unpack_u32_swap10: \n\
Laudio_orc_unpack_u32_swap7: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 1: swapl \n\
vrev32.i8 q3, q3 \n\
# 3: xorl \n\
veor q3, q3, q2 \n\
# 4: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_u32_swap7 \n\
Laudio_orc_unpack_u32_swap12: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_u32_swap14 \n\
Laudio_orc_unpack_u32_swap13: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 1: swapl \n\
vrev32.i8 d6, d6 \n\
# 3: xorl \n\
veor d6, d6, d4 \n\
# 4: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_u32_swap13 \n\
Laudio_orc_unpack_u32_swap14: \n\
Laudio_orc_unpack_u32_swap20: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_unpack_s32 (static implementation) */
void
audio_orc_unpack_s32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
Laudio_orc_unpack_s3215: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_unpack_s324 \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s3212 \n\
Laudio_orc_unpack_s323: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: copyl \n\
# 2: storel \n\
vst1.8 { d4, d5 }, [r2] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_s323 \n\
b Laudio_orc_unpack_s3212 \n\
Laudio_orc_unpack_s324: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_unpack_s321 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_unpack_s322 \n\
Laudio_orc_unpack_s321: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_unpack_s322: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s326 \n\
Laudio_orc_unpack_s325: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: copyl \n\
# 2: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_s325 \n\
Laudio_orc_unpack_s326: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s3212 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_s3211 \n\
Laudio_orc_unpack_s329: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: copyl \n\
# 2: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_s329 \n\
b Laudio_orc_unpack_s3212 \n\
Laudio_orc_unpack_s3211: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_s3210 \n\
Laudio_orc_unpack_s328: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: copyl \n\
# 2: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_s328 \n\
b Laudio_orc_unpack_s3212 \n\
Laudio_orc_unpack_s3210: \n\
Laudio_orc_unpack_s327: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: copyl \n\
# 2: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_s327 \n\
Laudio_orc_unpack_s3212: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s3214 \n\
Laudio_orc_unpack_s3213: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: copyl \n\
# 2: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_s3213 \n\
Laudio_orc_unpack_s3214: \n\
nop \n\
Laudio_orc_unpack_s3220: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_unpack_s32_swap (static implementation) */
void
audio_orc_unpack_s32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
Laudio_orc_unpack_s32_swap15: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_unpack_s32_swap4 \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s32_swap12 \n\
Laudio_orc_unpack_s32_swap3: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapl \n\
vrev32.i8 q2, q2 \n\
# 2: storel \n\
vst1.8 { d4, d5 }, [r2] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_s32_swap3 \n\
b Laudio_orc_unpack_s32_swap12 \n\
Laudio_orc_unpack_s32_swap4: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_unpack_s32_swap1 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_unpack_s32_swap2 \n\
Laudio_orc_unpack_s32_swap1: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_unpack_s32_swap2: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s32_swap6 \n\
Laudio_orc_unpack_s32_swap5: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: swapl \n\
vrev32.i8 d4, d4 \n\
# 2: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_s32_swap5 \n\
Laudio_orc_unpack_s32_swap6: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s32_swap12 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_s32_swap11 \n\
Laudio_orc_unpack_s32_swap9: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapl \n\
vrev32.i8 q2, q2 \n\
# 2: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_s32_swap9 \n\
b Laudio_orc_unpack_s32_swap12 \n\
Laudio_orc_unpack_s32_swap11: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_s32_swap10 \n\
Laudio_orc_unpack_s32_swap8: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapl \n\
vrev32.i8 q2, q2 \n\
# 2: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_s32_swap8 \n\
b Laudio_orc_unpack_s32_swap12 \n\
Laudio_orc_unpack_s32_swap10: \n\
Laudio_orc_unpack_s32_swap7: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapl \n\
vrev32.i8 q2, q2 \n\
# 2: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_s32_swap7 \n\
Laudio_orc_unpack_s32_swap12: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_s32_swap14 \n\
Laudio_orc_unpack_s32_swap13: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: swapl \n\
vrev32.i8 d4, d4 \n\
# 2: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_unpack_s32_swap13 \n\
Laudio_orc_unpack_s32_swap14: \n\
nop \n\
nop \n\
nop \n\
Laudio_orc_unpack_s32_swap20: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_unpack_f32 (static implementation) */
void
audio_orc_unpack_f32 (gdouble * ORC_RESTRICT d1, const gfloat * ORC_RESTRICT s1, int n){
int i;
orc_union64 * ORC_RESTRICT ptr0;
const orc_union32 * ORC_RESTRICT ptr4;
orc_union32 var32;
orc_union64 var33;
ptr0 = (orc_union64 *)d1;
ptr4 = (orc_union32 *)s1;
for (i = 0; i < n; i++) {
/* 0: loadl */
var32 = ptr4[i];
/* 1: convfd */
{
orc_union32 _src1;
_src1.i = ORC_DENORMAL(var32.i);
var33.f = _src1.f;
}
/* 2: storeq */
ptr0[i] = var33;
}
}
/* audio_orc_unpack_f32_swap (static implementation) */
void
audio_orc_unpack_f32_swap (gdouble * ORC_RESTRICT d1, const gfloat * ORC_RESTRICT s1, int n){
int i;
orc_union64 * ORC_RESTRICT ptr0;
const orc_union32 * ORC_RESTRICT ptr4;
orc_union32 var33;
orc_union64 var34;
orc_union32 var35;
ptr0 = (orc_union64 *)d1;
ptr4 = (orc_union32 *)s1;
for (i = 0; i < n; i++) {
/* 0: loadl */
var33 = ptr4[i];
/* 1: swapl */
var35.i = ORC_SWAP_L(var33.i);
/* 2: convfd */
{
orc_union32 _src1;
_src1.i = ORC_DENORMAL(var35.i);
var34.f = _src1.f;
}
/* 3: storeq */
ptr0[i] = var34;
}
}
/* audio_orc_unpack_f64 (static implementation) */
void
audio_orc_unpack_f64 (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 1;
uint32_t counter3 = counter2 & ((1<<1)-1);
asm(" \n\
Laudio_orc_unpack_f6415: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_unpack_f644 \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_f6412 \n\
Laudio_orc_unpack_f643: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 0: loadq \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: copyq \n\
# 2: storeq \n\
vst1.8 { d4, d5 }, [r2] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_f643 \n\
b Laudio_orc_unpack_f6412 \n\
Laudio_orc_unpack_f644: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #3 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_unpack_f641 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #1 \n\
str r2, %[counter2] \n\
and r2, r1, #1 \n\
str r2, %[counter3] \n\
b Laudio_orc_unpack_f642 \n\
Laudio_orc_unpack_f641: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_unpack_f642: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_f646 \n\
Laudio_orc_unpack_f645: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadq \n\
vld1.8 d4, [r3] \n\
# 1: copyq \n\
# 2: storeq \n\
vst1.8 d4, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_unpack_f645 \n\
Laudio_orc_unpack_f646: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_f6412 \n\
asr r1, ip, #19 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_f6411 \n\
Laudio_orc_unpack_f649: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 0: loadq \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: copyq \n\
# 2: storeq \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_f649 \n\
b Laudio_orc_unpack_f6412 \n\
Laudio_orc_unpack_f6411: \n\
asr r1, ip, #15 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_f6410 \n\
Laudio_orc_unpack_f648: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 0: loadq \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: copyq \n\
# 2: storeq \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_f648 \n\
b Laudio_orc_unpack_f6412 \n\
Laudio_orc_unpack_f6410: \n\
Laudio_orc_unpack_f647: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 0: loadq \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: copyq \n\
# 2: storeq \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_f647 \n\
Laudio_orc_unpack_f6412: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_f6414 \n\
Laudio_orc_unpack_f6413: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadq \n\
vld1.8 d4, [r3] \n\
# 1: copyq \n\
# 2: storeq \n\
vst1.8 d4, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_unpack_f6413 \n\
Laudio_orc_unpack_f6414: \n\
nop \n\
Laudio_orc_unpack_f6420: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_unpack_f64_swap (static implementation) */
void
audio_orc_unpack_f64_swap (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 1;
uint32_t counter3 = counter2 & ((1<<1)-1);
asm(" \n\
Laudio_orc_unpack_f64_swap15: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_unpack_f64_swap4 \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_f64_swap12 \n\
Laudio_orc_unpack_f64_swap3: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 0: loadq \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapq \n\
vrev64.i8 q2, q2 \n\
# 2: storeq \n\
vst1.8 { d4, d5 }, [r2] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_f64_swap3 \n\
b Laudio_orc_unpack_f64_swap12 \n\
Laudio_orc_unpack_f64_swap4: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #3 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_unpack_f64_swap1 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #1 \n\
str r2, %[counter2] \n\
and r2, r1, #1 \n\
str r2, %[counter3] \n\
b Laudio_orc_unpack_f64_swap2 \n\
Laudio_orc_unpack_f64_swap1: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_unpack_f64_swap2: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_f64_swap6 \n\
Laudio_orc_unpack_f64_swap5: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadq \n\
vld1.8 d4, [r3] \n\
# 1: swapq \n\
vrev64.i8 d4, d4 \n\
# 2: storeq \n\
vst1.8 d4, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_unpack_f64_swap5 \n\
Laudio_orc_unpack_f64_swap6: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_f64_swap12 \n\
asr r1, ip, #19 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_f64_swap11 \n\
Laudio_orc_unpack_f64_swap9: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 0: loadq \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapq \n\
vrev64.i8 q2, q2 \n\
# 2: storeq \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_f64_swap9 \n\
b Laudio_orc_unpack_f64_swap12 \n\
Laudio_orc_unpack_f64_swap11: \n\
asr r1, ip, #15 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_unpack_f64_swap10 \n\
Laudio_orc_unpack_f64_swap8: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 0: loadq \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapq \n\
vrev64.i8 q2, q2 \n\
# 2: storeq \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_f64_swap8 \n\
b Laudio_orc_unpack_f64_swap12 \n\
Laudio_orc_unpack_f64_swap10: \n\
Laudio_orc_unpack_f64_swap7: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 0: loadq \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapq \n\
vrev64.i8 q2, q2 \n\
# 2: storeq \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_unpack_f64_swap7 \n\
Laudio_orc_unpack_f64_swap12: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_unpack_f64_swap14 \n\
Laudio_orc_unpack_f64_swap13: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadq \n\
vld1.8 d4, [r3] \n\
# 1: swapq \n\
vrev64.i8 d4, d4 \n\
# 2: storeq \n\
vst1.8 d4, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_unpack_f64_swap13 \n\
Laudio_orc_unpack_f64_swap14: \n\
nop \n\
nop \n\
nop \n\
Laudio_orc_unpack_f64_swap20: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_pack_u8 (static implementation) */
void
audio_orc_pack_u8 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
# 1: loadpl \n\
vmov.i32 q2, #0x00000000 \n\
vorr.i32 q2, #0x80000000 \n\
Laudio_orc_pack_u815: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_pack_u81 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_pack_u82 \n\
Laudio_orc_pack_u81: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_pack_u82: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u86 \n\
Laudio_orc_pack_u85: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 2: xorl \n\
veor d6, d6, d4 \n\
# 3: shrul \n\
vshr.u32 d6, d6, #24 \n\
# 4: convlw \n\
vmovn.i32 d6, q3 \n\
# 5: convwb \n\
vmovn.i16 d6, q3 \n\
# 6: storeb \n\
vst1.8 d6[0], [r2] \n\
add r2, r2, #0x00000001 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_u85 \n\
Laudio_orc_pack_u86: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u812 \n\
asr r1, ip, #15 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_u811 \n\
Laudio_orc_pack_u89: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: shrul \n\
vshr.u32 q3, q3, #24 \n\
# 4: convlw \n\
vmovn.i32 d6, q3 \n\
# 5: convwb \n\
vmovn.i16 d6, q3 \n\
# 6: storeb \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u89 \n\
b Laudio_orc_pack_u812 \n\
Laudio_orc_pack_u811: \n\
asr r1, ip, #11 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_u810 \n\
Laudio_orc_pack_u88: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: shrul \n\
vshr.u32 q3, q3, #24 \n\
# 4: convlw \n\
vmovn.i32 d6, q3 \n\
# 5: convwb \n\
vmovn.i16 d6, q3 \n\
# 6: storeb \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u88 \n\
b Laudio_orc_pack_u812 \n\
Laudio_orc_pack_u810: \n\
Laudio_orc_pack_u87: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: shrul \n\
vshr.u32 q3, q3, #24 \n\
# 4: convlw \n\
vmovn.i32 d6, q3 \n\
# 5: convwb \n\
vmovn.i16 d6, q3 \n\
# 6: storeb \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u87 \n\
Laudio_orc_pack_u812: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u814 \n\
Laudio_orc_pack_u813: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 2: xorl \n\
veor d6, d6, d4 \n\
# 3: shrul \n\
vshr.u32 d6, d6, #24 \n\
# 4: convlw \n\
vmovn.i32 d6, q3 \n\
# 5: convwb \n\
vmovn.i16 d6, q3 \n\
# 6: storeb \n\
vst1.8 d6[0], [r2] \n\
add r2, r2, #0x00000001 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_u813 \n\
Laudio_orc_pack_u814: \n\
nop \n\
nop \n\
nop \n\
Laudio_orc_pack_u820: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_pack_s8 (static implementation) */
void
audio_orc_pack_s8 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
Laudio_orc_pack_s815: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_pack_s81 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_pack_s82 \n\
Laudio_orc_pack_s81: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_pack_s82: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s86 \n\
Laudio_orc_pack_s85: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: shrsl \n\
vshr.s32 d4, d4, #24 \n\
# 2: convlw \n\
vmovn.i32 d4, q2 \n\
# 3: convwb \n\
vmovn.i16 d4, q2 \n\
# 4: storeb \n\
vst1.8 d4[0], [r2] \n\
add r2, r2, #0x00000001 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_s85 \n\
Laudio_orc_pack_s86: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s812 \n\
asr r1, ip, #15 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_s811 \n\
Laudio_orc_pack_s89: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shrsl \n\
vshr.s32 q2, q2, #24 \n\
# 2: convlw \n\
vmovn.i32 d4, q2 \n\
# 3: convwb \n\
vmovn.i16 d4, q2 \n\
# 4: storeb \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s89 \n\
b Laudio_orc_pack_s812 \n\
Laudio_orc_pack_s811: \n\
asr r1, ip, #11 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_s810 \n\
Laudio_orc_pack_s88: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shrsl \n\
vshr.s32 q2, q2, #24 \n\
# 2: convlw \n\
vmovn.i32 d4, q2 \n\
# 3: convwb \n\
vmovn.i16 d4, q2 \n\
# 4: storeb \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s88 \n\
b Laudio_orc_pack_s812 \n\
Laudio_orc_pack_s810: \n\
Laudio_orc_pack_s87: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shrsl \n\
vshr.s32 q2, q2, #24 \n\
# 2: convlw \n\
vmovn.i32 d4, q2 \n\
# 3: convwb \n\
vmovn.i16 d4, q2 \n\
# 4: storeb \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s87 \n\
Laudio_orc_pack_s812: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s814 \n\
Laudio_orc_pack_s813: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: shrsl \n\
vshr.s32 d4, d4, #24 \n\
# 2: convlw \n\
vmovn.i32 d4, q2 \n\
# 3: convwb \n\
vmovn.i16 d4, q2 \n\
# 4: storeb \n\
vst1.8 d4[0], [r2] \n\
add r2, r2, #0x00000001 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_s813 \n\
Laudio_orc_pack_s814: \n\
nop \n\
nop \n\
Laudio_orc_pack_s820: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_pack_u16 (static implementation) */
void
audio_orc_pack_u16 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
# 1: loadpl \n\
vmov.i32 q2, #0x00000000 \n\
vorr.i32 q2, #0x80000000 \n\
Laudio_orc_pack_u1615: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #1 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_pack_u161 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_pack_u162 \n\
Laudio_orc_pack_u161: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_pack_u162: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u166 \n\
Laudio_orc_pack_u165: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 2: xorl \n\
veor d6, d6, d4 \n\
# 3: shrul \n\
vshr.u32 d6, d6, #16 \n\
# 4: convlw \n\
vmovn.i32 d6, q3 \n\
# 5: storew \n\
vst1.16 d6[0], [r2] \n\
add r2, r2, #0x00000002 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_u165 \n\
Laudio_orc_pack_u166: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u1612 \n\
asr r1, ip, #16 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_u1611 \n\
Laudio_orc_pack_u169: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: shrul \n\
vshr.u32 q3, q3, #16 \n\
# 4: convlw \n\
vmovn.i32 d6, q3 \n\
# 5: storew \n\
vst1.64 d6, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u169 \n\
b Laudio_orc_pack_u1612 \n\
Laudio_orc_pack_u1611: \n\
asr r1, ip, #12 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_u1610 \n\
Laudio_orc_pack_u168: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: shrul \n\
vshr.u32 q3, q3, #16 \n\
# 4: convlw \n\
vmovn.i32 d6, q3 \n\
# 5: storew \n\
vst1.64 d6, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u168 \n\
b Laudio_orc_pack_u1612 \n\
Laudio_orc_pack_u1610: \n\
Laudio_orc_pack_u167: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: shrul \n\
vshr.u32 q3, q3, #16 \n\
# 4: convlw \n\
vmovn.i32 d6, q3 \n\
# 5: storew \n\
vst1.64 d6, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u167 \n\
Laudio_orc_pack_u1612: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u1614 \n\
Laudio_orc_pack_u1613: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 2: xorl \n\
veor d6, d6, d4 \n\
# 3: shrul \n\
vshr.u32 d6, d6, #16 \n\
# 4: convlw \n\
vmovn.i32 d6, q3 \n\
# 5: storew \n\
vst1.16 d6[0], [r2] \n\
add r2, r2, #0x00000002 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_u1613 \n\
Laudio_orc_pack_u1614: \n\
nop \n\
nop \n\
nop \n\
Laudio_orc_pack_u1620: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_pack_s16 (static implementation) */
void
audio_orc_pack_s16 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
Laudio_orc_pack_s1615: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_pack_s164 \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s1612 \n\
Laudio_orc_pack_s163: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shrsl \n\
vshr.s32 q2, q2, #16 \n\
# 2: convlw \n\
vmovn.i32 d4, q2 \n\
# 3: storew \n\
vst1.8 d4, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s163 \n\
b Laudio_orc_pack_s1612 \n\
Laudio_orc_pack_s164: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #1 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_pack_s161 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_pack_s162 \n\
Laudio_orc_pack_s161: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_pack_s162: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s166 \n\
Laudio_orc_pack_s165: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: shrsl \n\
vshr.s32 d4, d4, #16 \n\
# 2: convlw \n\
vmovn.i32 d4, q2 \n\
# 3: storew \n\
vst1.16 d4[0], [r2] \n\
add r2, r2, #0x00000002 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_s165 \n\
Laudio_orc_pack_s166: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s1612 \n\
asr r1, ip, #16 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_s1611 \n\
Laudio_orc_pack_s169: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shrsl \n\
vshr.s32 q2, q2, #16 \n\
# 2: convlw \n\
vmovn.i32 d4, q2 \n\
# 3: storew \n\
vst1.64 d4, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s169 \n\
b Laudio_orc_pack_s1612 \n\
Laudio_orc_pack_s1611: \n\
asr r1, ip, #12 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_s1610 \n\
Laudio_orc_pack_s168: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shrsl \n\
vshr.s32 q2, q2, #16 \n\
# 2: convlw \n\
vmovn.i32 d4, q2 \n\
# 3: storew \n\
vst1.64 d4, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s168 \n\
b Laudio_orc_pack_s1612 \n\
Laudio_orc_pack_s1610: \n\
Laudio_orc_pack_s167: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shrsl \n\
vshr.s32 q2, q2, #16 \n\
# 2: convlw \n\
vmovn.i32 d4, q2 \n\
# 3: storew \n\
vst1.64 d4, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s167 \n\
Laudio_orc_pack_s1612: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s1614 \n\
Laudio_orc_pack_s1613: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: shrsl \n\
vshr.s32 d4, d4, #16 \n\
# 2: convlw \n\
vmovn.i32 d4, q2 \n\
# 3: storew \n\
vst1.16 d4[0], [r2] \n\
add r2, r2, #0x00000002 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_s1613 \n\
Laudio_orc_pack_s1614: \n\
nop \n\
Laudio_orc_pack_s1620: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_pack_u16_swap (static implementation) */
void
audio_orc_pack_u16_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
# 1: loadpl \n\
vmov.i32 q2, #0x00000000 \n\
vorr.i32 q2, #0x80000000 \n\
Laudio_orc_pack_u16_swap15: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #1 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_pack_u16_swap1 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_pack_u16_swap2 \n\
Laudio_orc_pack_u16_swap1: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_pack_u16_swap2: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u16_swap6 \n\
Laudio_orc_pack_u16_swap5: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 2: xorl \n\
veor d6, d6, d4 \n\
# 3: shrul \n\
vshr.u32 d6, d6, #16 \n\
# 4: convlw \n\
vmovn.i32 d6, q3 \n\
# 5: swapw \n\
vrev16.i8 d6, d6 \n\
# 6: storew \n\
vst1.16 d6[0], [r2] \n\
add r2, r2, #0x00000002 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_u16_swap5 \n\
Laudio_orc_pack_u16_swap6: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u16_swap12 \n\
asr r1, ip, #16 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_u16_swap11 \n\
Laudio_orc_pack_u16_swap9: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: shrul \n\
vshr.u32 q3, q3, #16 \n\
# 4: convlw \n\
vmovn.i32 d6, q3 \n\
# 5: swapw \n\
vrev16.i8 d6, d6 \n\
# 6: storew \n\
vst1.64 d6, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u16_swap9 \n\
b Laudio_orc_pack_u16_swap12 \n\
Laudio_orc_pack_u16_swap11: \n\
asr r1, ip, #12 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_u16_swap10 \n\
Laudio_orc_pack_u16_swap8: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: shrul \n\
vshr.u32 q3, q3, #16 \n\
# 4: convlw \n\
vmovn.i32 d6, q3 \n\
# 5: swapw \n\
vrev16.i8 d6, d6 \n\
# 6: storew \n\
vst1.64 d6, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u16_swap8 \n\
b Laudio_orc_pack_u16_swap12 \n\
Laudio_orc_pack_u16_swap10: \n\
Laudio_orc_pack_u16_swap7: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: shrul \n\
vshr.u32 q3, q3, #16 \n\
# 4: convlw \n\
vmovn.i32 d6, q3 \n\
# 5: swapw \n\
vrev16.i8 d6, d6 \n\
# 6: storew \n\
vst1.64 d6, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u16_swap7 \n\
Laudio_orc_pack_u16_swap12: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u16_swap14 \n\
Laudio_orc_pack_u16_swap13: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 2: xorl \n\
veor d6, d6, d4 \n\
# 3: shrul \n\
vshr.u32 d6, d6, #16 \n\
# 4: convlw \n\
vmovn.i32 d6, q3 \n\
# 5: swapw \n\
vrev16.i8 d6, d6 \n\
# 6: storew \n\
vst1.16 d6[0], [r2] \n\
add r2, r2, #0x00000002 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_u16_swap13 \n\
Laudio_orc_pack_u16_swap14: \n\
nop \n\
nop \n\
Laudio_orc_pack_u16_swap20: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_pack_s16_swap (static implementation) */
void
audio_orc_pack_s16_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
Laudio_orc_pack_s16_swap15: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #1 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_pack_s16_swap1 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_pack_s16_swap2 \n\
Laudio_orc_pack_s16_swap1: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_pack_s16_swap2: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s16_swap6 \n\
Laudio_orc_pack_s16_swap5: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: shrsl \n\
vshr.s32 d4, d4, #16 \n\
# 2: convlw \n\
vmovn.i32 d4, q2 \n\
# 3: swapw \n\
vrev16.i8 d4, d4 \n\
# 4: storew \n\
vst1.16 d4[0], [r2] \n\
add r2, r2, #0x00000002 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_s16_swap5 \n\
Laudio_orc_pack_s16_swap6: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s16_swap12 \n\
asr r1, ip, #16 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_s16_swap11 \n\
Laudio_orc_pack_s16_swap9: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shrsl \n\
vshr.s32 q2, q2, #16 \n\
# 2: convlw \n\
vmovn.i32 d4, q2 \n\
# 3: swapw \n\
vrev16.i8 d4, d4 \n\
# 4: storew \n\
vst1.64 d4, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s16_swap9 \n\
b Laudio_orc_pack_s16_swap12 \n\
Laudio_orc_pack_s16_swap11: \n\
asr r1, ip, #12 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_s16_swap10 \n\
Laudio_orc_pack_s16_swap8: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shrsl \n\
vshr.s32 q2, q2, #16 \n\
# 2: convlw \n\
vmovn.i32 d4, q2 \n\
# 3: swapw \n\
vrev16.i8 d4, d4 \n\
# 4: storew \n\
vst1.64 d4, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s16_swap8 \n\
b Laudio_orc_pack_s16_swap12 \n\
Laudio_orc_pack_s16_swap10: \n\
Laudio_orc_pack_s16_swap7: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shrsl \n\
vshr.s32 q2, q2, #16 \n\
# 2: convlw \n\
vmovn.i32 d4, q2 \n\
# 3: swapw \n\
vrev16.i8 d4, d4 \n\
# 4: storew \n\
vst1.64 d4, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s16_swap7 \n\
Laudio_orc_pack_s16_swap12: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s16_swap14 \n\
Laudio_orc_pack_s16_swap13: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: shrsl \n\
vshr.s32 d4, d4, #16 \n\
# 2: convlw \n\
vmovn.i32 d4, q2 \n\
# 3: swapw \n\
vrev16.i8 d4, d4 \n\
# 4: storew \n\
vst1.16 d4[0], [r2] \n\
add r2, r2, #0x00000002 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_s16_swap13 \n\
Laudio_orc_pack_s16_swap14: \n\
nop \n\
Laudio_orc_pack_s16_swap20: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_pack_u24_32 (static implementation) */
void
audio_orc_pack_u24_32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
# 1: loadpl \n\
vmov.i32 q2, #0x00000000 \n\
vorr.i32 q2, #0x80000000 \n\
Laudio_orc_pack_u24_3215: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_pack_u24_321 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_pack_u24_322 \n\
Laudio_orc_pack_u24_321: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_pack_u24_322: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u24_326 \n\
Laudio_orc_pack_u24_325: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 2: xorl \n\
veor d6, d6, d4 \n\
# 3: shrul \n\
vshr.u32 d6, d6, #8 \n\
# 4: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_u24_325 \n\
Laudio_orc_pack_u24_326: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u24_3212 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_u24_3211 \n\
Laudio_orc_pack_u24_329: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: shrul \n\
vshr.u32 q3, q3, #8 \n\
# 4: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u24_329 \n\
b Laudio_orc_pack_u24_3212 \n\
Laudio_orc_pack_u24_3211: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_u24_3210 \n\
Laudio_orc_pack_u24_328: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: shrul \n\
vshr.u32 q3, q3, #8 \n\
# 4: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u24_328 \n\
b Laudio_orc_pack_u24_3212 \n\
Laudio_orc_pack_u24_3210: \n\
Laudio_orc_pack_u24_327: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: shrul \n\
vshr.u32 q3, q3, #8 \n\
# 4: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u24_327 \n\
Laudio_orc_pack_u24_3212: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u24_3214 \n\
Laudio_orc_pack_u24_3213: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 2: xorl \n\
veor d6, d6, d4 \n\
# 3: shrul \n\
vshr.u32 d6, d6, #8 \n\
# 4: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_u24_3213 \n\
Laudio_orc_pack_u24_3214: \n\
Laudio_orc_pack_u24_3220: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_pack_s24_32 (static implementation) */
void
audio_orc_pack_s24_32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
Laudio_orc_pack_s24_3215: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_pack_s24_324 \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s24_3212 \n\
Laudio_orc_pack_s24_323: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shrsl \n\
vshr.s32 q2, q2, #8 \n\
# 2: storel \n\
vst1.8 { d4, d5 }, [r2] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s24_323 \n\
b Laudio_orc_pack_s24_3212 \n\
Laudio_orc_pack_s24_324: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_pack_s24_321 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_pack_s24_322 \n\
Laudio_orc_pack_s24_321: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_pack_s24_322: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s24_326 \n\
Laudio_orc_pack_s24_325: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: shrsl \n\
vshr.s32 d4, d4, #8 \n\
# 2: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_s24_325 \n\
Laudio_orc_pack_s24_326: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s24_3212 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_s24_3211 \n\
Laudio_orc_pack_s24_329: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shrsl \n\
vshr.s32 q2, q2, #8 \n\
# 2: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s24_329 \n\
b Laudio_orc_pack_s24_3212 \n\
Laudio_orc_pack_s24_3211: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_s24_3210 \n\
Laudio_orc_pack_s24_328: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shrsl \n\
vshr.s32 q2, q2, #8 \n\
# 2: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s24_328 \n\
b Laudio_orc_pack_s24_3212 \n\
Laudio_orc_pack_s24_3210: \n\
Laudio_orc_pack_s24_327: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shrsl \n\
vshr.s32 q2, q2, #8 \n\
# 2: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s24_327 \n\
Laudio_orc_pack_s24_3212: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s24_3214 \n\
Laudio_orc_pack_s24_3213: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: shrsl \n\
vshr.s32 d4, d4, #8 \n\
# 2: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_s24_3213 \n\
Laudio_orc_pack_s24_3214: \n\
nop \n\
nop \n\
nop \n\
Laudio_orc_pack_s24_3220: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_pack_u24_32_swap (static implementation) */
void
audio_orc_pack_u24_32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
# 1: loadpl \n\
vmov.i32 q2, #0x00000000 \n\
vorr.i32 q2, #0x80000000 \n\
Laudio_orc_pack_u24_32_swap15: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_pack_u24_32_swap1 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_pack_u24_32_swap2 \n\
Laudio_orc_pack_u24_32_swap1: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_pack_u24_32_swap2: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u24_32_swap6 \n\
Laudio_orc_pack_u24_32_swap5: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 2: xorl \n\
veor d6, d6, d4 \n\
# 3: shrul \n\
vshr.u32 d6, d6, #8 \n\
# 4: swapl \n\
vrev32.i8 d6, d6 \n\
# 5: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_u24_32_swap5 \n\
Laudio_orc_pack_u24_32_swap6: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u24_32_swap12 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_u24_32_swap11 \n\
Laudio_orc_pack_u24_32_swap9: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: shrul \n\
vshr.u32 q3, q3, #8 \n\
# 4: swapl \n\
vrev32.i8 q3, q3 \n\
# 5: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u24_32_swap9 \n\
b Laudio_orc_pack_u24_32_swap12 \n\
Laudio_orc_pack_u24_32_swap11: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_u24_32_swap10 \n\
Laudio_orc_pack_u24_32_swap8: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: shrul \n\
vshr.u32 q3, q3, #8 \n\
# 4: swapl \n\
vrev32.i8 q3, q3 \n\
# 5: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u24_32_swap8 \n\
b Laudio_orc_pack_u24_32_swap12 \n\
Laudio_orc_pack_u24_32_swap10: \n\
Laudio_orc_pack_u24_32_swap7: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: shrul \n\
vshr.u32 q3, q3, #8 \n\
# 4: swapl \n\
vrev32.i8 q3, q3 \n\
# 5: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u24_32_swap7 \n\
Laudio_orc_pack_u24_32_swap12: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u24_32_swap14 \n\
Laudio_orc_pack_u24_32_swap13: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 2: xorl \n\
veor d6, d6, d4 \n\
# 3: shrul \n\
vshr.u32 d6, d6, #8 \n\
# 4: swapl \n\
vrev32.i8 d6, d6 \n\
# 5: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_u24_32_swap13 \n\
Laudio_orc_pack_u24_32_swap14: \n\
nop \n\
nop \n\
nop \n\
Laudio_orc_pack_u24_32_swap20: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_pack_s24_32_swap (static implementation) */
void
audio_orc_pack_s24_32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
Laudio_orc_pack_s24_32_swap15: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_pack_s24_32_swap4 \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s24_32_swap12 \n\
Laudio_orc_pack_s24_32_swap3: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shrsl \n\
vshr.s32 q2, q2, #8 \n\
# 2: swapl \n\
vrev32.i8 q2, q2 \n\
# 3: storel \n\
vst1.8 { d4, d5 }, [r2] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s24_32_swap3 \n\
b Laudio_orc_pack_s24_32_swap12 \n\
Laudio_orc_pack_s24_32_swap4: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_pack_s24_32_swap1 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_pack_s24_32_swap2 \n\
Laudio_orc_pack_s24_32_swap1: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_pack_s24_32_swap2: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s24_32_swap6 \n\
Laudio_orc_pack_s24_32_swap5: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: shrsl \n\
vshr.s32 d4, d4, #8 \n\
# 2: swapl \n\
vrev32.i8 d4, d4 \n\
# 3: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_s24_32_swap5 \n\
Laudio_orc_pack_s24_32_swap6: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s24_32_swap12 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_s24_32_swap11 \n\
Laudio_orc_pack_s24_32_swap9: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shrsl \n\
vshr.s32 q2, q2, #8 \n\
# 2: swapl \n\
vrev32.i8 q2, q2 \n\
# 3: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s24_32_swap9 \n\
b Laudio_orc_pack_s24_32_swap12 \n\
Laudio_orc_pack_s24_32_swap11: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_s24_32_swap10 \n\
Laudio_orc_pack_s24_32_swap8: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shrsl \n\
vshr.s32 q2, q2, #8 \n\
# 2: swapl \n\
vrev32.i8 q2, q2 \n\
# 3: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s24_32_swap8 \n\
b Laudio_orc_pack_s24_32_swap12 \n\
Laudio_orc_pack_s24_32_swap10: \n\
Laudio_orc_pack_s24_32_swap7: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: shrsl \n\
vshr.s32 q2, q2, #8 \n\
# 2: swapl \n\
vrev32.i8 q2, q2 \n\
# 3: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s24_32_swap7 \n\
Laudio_orc_pack_s24_32_swap12: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s24_32_swap14 \n\
Laudio_orc_pack_s24_32_swap13: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: shrsl \n\
vshr.s32 d4, d4, #8 \n\
# 2: swapl \n\
vrev32.i8 d4, d4 \n\
# 3: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_s24_32_swap13 \n\
Laudio_orc_pack_s24_32_swap14: \n\
nop \n\
Laudio_orc_pack_s24_32_swap20: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_pack_u32 (static implementation) */
void
audio_orc_pack_u32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
# 1: loadpl \n\
vmov.i32 q2, #0x00000000 \n\
vorr.i32 q2, #0x80000000 \n\
Laudio_orc_pack_u3215: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_pack_u324 \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u3212 \n\
Laudio_orc_pack_u323: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: storel \n\
vst1.8 { d6, d7 }, [r2] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u323 \n\
b Laudio_orc_pack_u3212 \n\
Laudio_orc_pack_u324: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_pack_u321 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_pack_u322 \n\
Laudio_orc_pack_u321: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_pack_u322: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u326 \n\
Laudio_orc_pack_u325: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 2: xorl \n\
veor d6, d6, d4 \n\
# 3: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_u325 \n\
Laudio_orc_pack_u326: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u3212 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_u3211 \n\
Laudio_orc_pack_u329: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u329 \n\
b Laudio_orc_pack_u3212 \n\
Laudio_orc_pack_u3211: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_u3210 \n\
Laudio_orc_pack_u328: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u328 \n\
b Laudio_orc_pack_u3212 \n\
Laudio_orc_pack_u3210: \n\
Laudio_orc_pack_u327: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u327 \n\
Laudio_orc_pack_u3212: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u3214 \n\
Laudio_orc_pack_u3213: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 2: xorl \n\
veor d6, d6, d4 \n\
# 3: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_u3213 \n\
Laudio_orc_pack_u3214: \n\
nop \n\
Laudio_orc_pack_u3220: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_pack_s32 (static implementation) */
void
audio_orc_pack_s32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
Laudio_orc_pack_s3215: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_pack_s324 \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s3212 \n\
Laudio_orc_pack_s323: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: copyl \n\
# 2: storel \n\
vst1.8 { d4, d5 }, [r2] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s323 \n\
b Laudio_orc_pack_s3212 \n\
Laudio_orc_pack_s324: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_pack_s321 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_pack_s322 \n\
Laudio_orc_pack_s321: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_pack_s322: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s326 \n\
Laudio_orc_pack_s325: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: copyl \n\
# 2: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_s325 \n\
Laudio_orc_pack_s326: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s3212 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_s3211 \n\
Laudio_orc_pack_s329: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: copyl \n\
# 2: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s329 \n\
b Laudio_orc_pack_s3212 \n\
Laudio_orc_pack_s3211: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_s3210 \n\
Laudio_orc_pack_s328: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: copyl \n\
# 2: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s328 \n\
b Laudio_orc_pack_s3212 \n\
Laudio_orc_pack_s3210: \n\
Laudio_orc_pack_s327: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: copyl \n\
# 2: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s327 \n\
Laudio_orc_pack_s3212: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s3214 \n\
Laudio_orc_pack_s3213: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: copyl \n\
# 2: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_s3213 \n\
Laudio_orc_pack_s3214: \n\
nop \n\
Laudio_orc_pack_s3220: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_pack_u32_swap (static implementation) */
void
audio_orc_pack_u32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
# 1: loadpl \n\
vmov.i32 q2, #0x00000000 \n\
vorr.i32 q2, #0x80000000 \n\
Laudio_orc_pack_u32_swap15: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_pack_u32_swap4 \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u32_swap12 \n\
Laudio_orc_pack_u32_swap3: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: storel \n\
vst1.8 { d6, d7 }, [r2] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u32_swap3 \n\
b Laudio_orc_pack_u32_swap12 \n\
Laudio_orc_pack_u32_swap4: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_pack_u32_swap1 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_pack_u32_swap2 \n\
Laudio_orc_pack_u32_swap1: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_pack_u32_swap2: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u32_swap6 \n\
Laudio_orc_pack_u32_swap5: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 2: xorl \n\
veor d6, d6, d4 \n\
# 3: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_u32_swap5 \n\
Laudio_orc_pack_u32_swap6: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u32_swap12 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_u32_swap11 \n\
Laudio_orc_pack_u32_swap9: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u32_swap9 \n\
b Laudio_orc_pack_u32_swap12 \n\
Laudio_orc_pack_u32_swap11: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_u32_swap10 \n\
Laudio_orc_pack_u32_swap8: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u32_swap8 \n\
b Laudio_orc_pack_u32_swap12 \n\
Laudio_orc_pack_u32_swap10: \n\
Laudio_orc_pack_u32_swap7: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d6, d7 }, [r3] \n\
# 2: xorl \n\
veor q3, q3, q2 \n\
# 3: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_u32_swap7 \n\
Laudio_orc_pack_u32_swap12: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_u32_swap14 \n\
Laudio_orc_pack_u32_swap13: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d6[0], [r3] \n\
# 2: xorl \n\
veor d6, d6, d4 \n\
# 3: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_u32_swap13 \n\
Laudio_orc_pack_u32_swap14: \n\
nop \n\
Laudio_orc_pack_u32_swap20: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_pack_s32_swap (static implementation) */
void
audio_orc_pack_s32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
Laudio_orc_pack_s32_swap15: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_pack_s32_swap4 \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s32_swap12 \n\
Laudio_orc_pack_s32_swap3: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapl \n\
vrev32.i8 q2, q2 \n\
# 2: storel \n\
vst1.8 { d4, d5 }, [r2] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s32_swap3 \n\
b Laudio_orc_pack_s32_swap12 \n\
Laudio_orc_pack_s32_swap4: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_pack_s32_swap1 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_pack_s32_swap2 \n\
Laudio_orc_pack_s32_swap1: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_pack_s32_swap2: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s32_swap6 \n\
Laudio_orc_pack_s32_swap5: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: swapl \n\
vrev32.i8 d4, d4 \n\
# 2: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_s32_swap5 \n\
Laudio_orc_pack_s32_swap6: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s32_swap12 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_s32_swap11 \n\
Laudio_orc_pack_s32_swap9: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapl \n\
vrev32.i8 q2, q2 \n\
# 2: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s32_swap9 \n\
b Laudio_orc_pack_s32_swap12 \n\
Laudio_orc_pack_s32_swap11: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_s32_swap10 \n\
Laudio_orc_pack_s32_swap8: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapl \n\
vrev32.i8 q2, q2 \n\
# 2: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s32_swap8 \n\
b Laudio_orc_pack_s32_swap12 \n\
Laudio_orc_pack_s32_swap10: \n\
Laudio_orc_pack_s32_swap7: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 0: loadl \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapl \n\
vrev32.i8 q2, q2 \n\
# 2: storel \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_s32_swap7 \n\
Laudio_orc_pack_s32_swap12: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_s32_swap14 \n\
Laudio_orc_pack_s32_swap13: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadl \n\
vld1.32 d4[0], [r3] \n\
# 1: swapl \n\
vrev32.i8 d4, d4 \n\
# 2: storel \n\
vst1.32 d4[0], [r2] \n\
add r2, r2, #0x00000004 \n\
add r3, r3, #0x00000004 \n\
bne Laudio_orc_pack_s32_swap13 \n\
Laudio_orc_pack_s32_swap14: \n\
nop \n\
nop \n\
nop \n\
Laudio_orc_pack_s32_swap20: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_pack_f32 (static implementation) */
void
audio_orc_pack_f32 (gfloat * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n){
int i;
orc_union32 * ORC_RESTRICT ptr0;
const orc_union64 * ORC_RESTRICT ptr4;
orc_union64 var32;
orc_union32 var33;
ptr0 = (orc_union32 *)d1;
ptr4 = (orc_union64 *)s1;
for (i = 0; i < n; i++) {
/* 0: loadq */
var32 = ptr4[i];
/* 1: convdf */
{
orc_union64 _src1;
orc_union32 _dest;
_src1.i = ORC_DENORMAL_DOUBLE(var32.i);
_dest.f = _src1.f;
var33.i = ORC_DENORMAL(_dest.i);
}
/* 2: storel */
ptr0[i] = var33;
}
}
/* audio_orc_pack_f32_swap (static implementation) */
void
audio_orc_pack_f32_swap (gfloat * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n){
int i;
orc_union32 * ORC_RESTRICT ptr0;
const orc_union64 * ORC_RESTRICT ptr4;
orc_union64 var33;
orc_union32 var34;
orc_union32 var35;
ptr0 = (orc_union32 *)d1;
ptr4 = (orc_union64 *)s1;
for (i = 0; i < n; i++) {
/* 0: loadq */
var33 = ptr4[i];
/* 1: convdf */
{
orc_union64 _src1;
orc_union32 _dest;
_src1.i = ORC_DENORMAL_DOUBLE(var33.i);
_dest.f = _src1.f;
var35.i = ORC_DENORMAL(_dest.i);
}
/* 2: swapl */
var34.i = ORC_SWAP_L(var35.i);
/* 3: storel */
ptr0[i] = var34;
}
}
/* audio_orc_pack_f64 (static implementation) */
void
audio_orc_pack_f64 (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 1;
uint32_t counter3 = counter2 & ((1<<1)-1);
asm(" \n\
Laudio_orc_pack_f6415: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_pack_f644 \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_f6412 \n\
Laudio_orc_pack_f643: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 0: loadq \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: copyq \n\
# 2: storeq \n\
vst1.8 { d4, d5 }, [r2] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_f643 \n\
b Laudio_orc_pack_f6412 \n\
Laudio_orc_pack_f644: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #3 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_pack_f641 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #1 \n\
str r2, %[counter2] \n\
and r2, r1, #1 \n\
str r2, %[counter3] \n\
b Laudio_orc_pack_f642 \n\
Laudio_orc_pack_f641: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_pack_f642: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_f646 \n\
Laudio_orc_pack_f645: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadq \n\
vld1.8 d4, [r3] \n\
# 1: copyq \n\
# 2: storeq \n\
vst1.8 d4, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_pack_f645 \n\
Laudio_orc_pack_f646: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_f6412 \n\
asr r1, ip, #19 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_f6411 \n\
Laudio_orc_pack_f649: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 0: loadq \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: copyq \n\
# 2: storeq \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_f649 \n\
b Laudio_orc_pack_f6412 \n\
Laudio_orc_pack_f6411: \n\
asr r1, ip, #15 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_f6410 \n\
Laudio_orc_pack_f648: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 0: loadq \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: copyq \n\
# 2: storeq \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_f648 \n\
b Laudio_orc_pack_f6412 \n\
Laudio_orc_pack_f6410: \n\
Laudio_orc_pack_f647: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 0: loadq \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: copyq \n\
# 2: storeq \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_f647 \n\
Laudio_orc_pack_f6412: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_f6414 \n\
Laudio_orc_pack_f6413: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadq \n\
vld1.8 d4, [r3] \n\
# 1: copyq \n\
# 2: storeq \n\
vst1.8 d4, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_pack_f6413 \n\
Laudio_orc_pack_f6414: \n\
nop \n\
Laudio_orc_pack_f6420: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_pack_f64_swap (static implementation) */
void
audio_orc_pack_f64_swap (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 1;
uint32_t counter3 = counter2 & ((1<<1)-1);
asm(" \n\
Laudio_orc_pack_f64_swap15: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_pack_f64_swap4 \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_f64_swap12 \n\
Laudio_orc_pack_f64_swap3: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 0: loadq \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapq \n\
vrev64.i8 q2, q2 \n\
# 2: storeq \n\
vst1.8 { d4, d5 }, [r2] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_f64_swap3 \n\
b Laudio_orc_pack_f64_swap12 \n\
Laudio_orc_pack_f64_swap4: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #3 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_pack_f64_swap1 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #1 \n\
str r2, %[counter2] \n\
and r2, r1, #1 \n\
str r2, %[counter3] \n\
b Laudio_orc_pack_f64_swap2 \n\
Laudio_orc_pack_f64_swap1: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_pack_f64_swap2: \n\
mov r2, %[d1] \n\
mov r3, %[s1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_f64_swap6 \n\
Laudio_orc_pack_f64_swap5: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadq \n\
vld1.8 d4, [r3] \n\
# 1: swapq \n\
vrev64.i8 d4, d4 \n\
# 2: storeq \n\
vst1.8 d4, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_pack_f64_swap5 \n\
Laudio_orc_pack_f64_swap6: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_f64_swap12 \n\
asr r1, ip, #19 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_f64_swap11 \n\
Laudio_orc_pack_f64_swap9: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 0: loadq \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapq \n\
vrev64.i8 q2, q2 \n\
# 2: storeq \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_f64_swap9 \n\
b Laudio_orc_pack_f64_swap12 \n\
Laudio_orc_pack_f64_swap11: \n\
asr r1, ip, #15 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_pack_f64_swap10 \n\
Laudio_orc_pack_f64_swap8: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 0: loadq \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapq \n\
vrev64.i8 q2, q2 \n\
# 2: storeq \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_f64_swap8 \n\
b Laudio_orc_pack_f64_swap12 \n\
Laudio_orc_pack_f64_swap10: \n\
Laudio_orc_pack_f64_swap7: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 0: loadq \n\
vld1.8 { d4, d5 }, [r3] \n\
# 1: swapq \n\
vrev64.i8 q2, q2 \n\
# 2: storeq \n\
vst1.64 { d4, d5 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
add r3, r3, #0x00000010 \n\
bne Laudio_orc_pack_f64_swap7 \n\
Laudio_orc_pack_f64_swap12: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_pack_f64_swap14 \n\
Laudio_orc_pack_f64_swap13: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 0: loadq \n\
vld1.8 d4, [r3] \n\
# 1: swapq \n\
vrev64.i8 d4, d4 \n\
# 2: storeq \n\
vst1.8 d4, [r2] \n\
add r2, r2, #0x00000008 \n\
add r3, r3, #0x00000008 \n\
bne Laudio_orc_pack_f64_swap13 \n\
Laudio_orc_pack_f64_swap14: \n\
nop \n\
nop \n\
nop \n\
Laudio_orc_pack_f64_swap20: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[s1] "+rm" (s1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_splat_u16 (static implementation) */
void
audio_orc_splat_u16 (guint16 * ORC_RESTRICT d1, int p1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 3;
uint32_t counter3 = counter2 & ((1<<3)-1);
asm(" \n\
# 0: loadpw \n\
add r1, r0, #0x00000278 \n\
vld1.16 {d4[],d5[]}, [r1] \n\
Laudio_orc_splat_u1615: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_splat_u164 \n\
mov r2, %[d1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_splat_u1612 \n\
Laudio_orc_splat_u163: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 3 \n\
# 1: copyw \n\
vorr q3, q2, q2 \n\
# 2: storew \n\
vst1.8 { d6, d7 }, [r2] \n\
add r2, r2, #0x00000010 \n\
bne Laudio_orc_splat_u163 \n\
b Laudio_orc_splat_u1612 \n\
Laudio_orc_splat_u164: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #1 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_splat_u161 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #3 \n\
str r2, %[counter2] \n\
and r2, r1, #7 \n\
str r2, %[counter3] \n\
b Laudio_orc_splat_u162 \n\
Laudio_orc_splat_u161: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_splat_u162: \n\
mov r2, %[d1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_splat_u166 \n\
Laudio_orc_splat_u165: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 1: copyw \n\
vorr d6, d4, d4 \n\
# 2: storew \n\
vst1.16 d6[0], [r2] \n\
add r2, r2, #0x00000002 \n\
bne Laudio_orc_splat_u165 \n\
Laudio_orc_splat_u166: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_splat_u1612 \n\
asr r1, ip, #15 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_splat_u1611 \n\
Laudio_orc_splat_u169: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 3 \n\
# 1: copyw \n\
vorr q3, q2, q2 \n\
# 2: storew \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
bne Laudio_orc_splat_u169 \n\
b Laudio_orc_splat_u1612 \n\
Laudio_orc_splat_u1611: \n\
asr r1, ip, #11 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_splat_u1610 \n\
Laudio_orc_splat_u168: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 3 \n\
# 1: copyw \n\
vorr q3, q2, q2 \n\
# 2: storew \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
bne Laudio_orc_splat_u168 \n\
b Laudio_orc_splat_u1612 \n\
Laudio_orc_splat_u1610: \n\
Laudio_orc_splat_u167: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 3 \n\
# 1: copyw \n\
vorr q3, q2, q2 \n\
# 2: storew \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
bne Laudio_orc_splat_u167 \n\
Laudio_orc_splat_u1612: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_splat_u1614 \n\
Laudio_orc_splat_u1613: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 1: copyw \n\
vorr d6, d4, d4 \n\
# 2: storew \n\
vst1.16 d6[0], [r2] \n\
add r2, r2, #0x00000002 \n\
bne Laudio_orc_splat_u1613 \n\
Laudio_orc_splat_u1614: \n\
nop \n\
nop \n\
nop \n\
Laudio_orc_splat_u1620: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_splat_u32 (static implementation) */
void
audio_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 2;
uint32_t counter3 = counter2 & ((1<<2)-1);
asm(" \n\
# 0: loadpl \n\
add r1, r0, #0x00000278 \n\
vld1.32 {d4[],d5[]}, [r1] \n\
Laudio_orc_splat_u3215: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_splat_u324 \n\
mov r2, %[d1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_splat_u3212 \n\
Laudio_orc_splat_u323: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 1: copyl \n\
vorr q3, q2, q2 \n\
# 2: storel \n\
vst1.8 { d6, d7 }, [r2] \n\
add r2, r2, #0x00000010 \n\
bne Laudio_orc_splat_u323 \n\
b Laudio_orc_splat_u3212 \n\
Laudio_orc_splat_u324: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #2 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_splat_u321 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #2 \n\
str r2, %[counter2] \n\
and r2, r1, #3 \n\
str r2, %[counter3] \n\
b Laudio_orc_splat_u322 \n\
Laudio_orc_splat_u321: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_splat_u322: \n\
mov r2, %[d1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_splat_u326 \n\
Laudio_orc_splat_u325: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 1: copyl \n\
vorr d6, d4, d4 \n\
# 2: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
bne Laudio_orc_splat_u325 \n\
Laudio_orc_splat_u326: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_splat_u3212 \n\
asr r1, ip, #17 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_splat_u3211 \n\
Laudio_orc_splat_u329: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 1: copyl \n\
vorr q3, q2, q2 \n\
# 2: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
bne Laudio_orc_splat_u329 \n\
b Laudio_orc_splat_u3212 \n\
Laudio_orc_splat_u3211: \n\
asr r1, ip, #13 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_splat_u3210 \n\
Laudio_orc_splat_u328: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 1: copyl \n\
vorr q3, q2, q2 \n\
# 2: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
bne Laudio_orc_splat_u328 \n\
b Laudio_orc_splat_u3212 \n\
Laudio_orc_splat_u3210: \n\
Laudio_orc_splat_u327: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 2 \n\
# 1: copyl \n\
vorr q3, q2, q2 \n\
# 2: storel \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
bne Laudio_orc_splat_u327 \n\
Laudio_orc_splat_u3212: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_splat_u3214 \n\
Laudio_orc_splat_u3213: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 1: copyl \n\
vorr d6, d4, d4 \n\
# 2: storel \n\
vst1.32 d6[0], [r2] \n\
add r2, r2, #0x00000004 \n\
bne Laudio_orc_splat_u3213 \n\
Laudio_orc_splat_u3214: \n\
nop \n\
nop \n\
nop \n\
Laudio_orc_splat_u3220: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
/* audio_orc_splat_u64 (static implementation) */
void
audio_orc_splat_u64 (guint64 * ORC_RESTRICT d1, int p1, int n) {
uint32_t counter1 = 0;
uint32_t counter2 = 64 >> 1;
uint32_t counter3 = counter2 & ((1<<1)-1);
asm(" \n\
# 0: loadpq \n\
add r1, r0, #0x00000278 \n\
vld1.32 d4[0], [r1] \n\
vld1.32 d5[0], [r1] \n\
add r1, r0, #0x00000298 \n\
vld1.32 d4[1], [r1] \n\
vld1.32 d5[1], [r1] \n\
Laudio_orc_splat_u6415: \n\
mov r2, %[counter2] \n\
cmp r2, #0x00000040 \n\
bgt Laudio_orc_splat_u644 \n\
mov r2, %[d1] \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_splat_u6412 \n\
Laudio_orc_splat_u643: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 1: copyq \n\
vorr q3, q2, q2 \n\
# 2: storeq \n\
vst1.8 { d6, d7 }, [r2] \n\
add r2, r2, #0x00000010 \n\
bne Laudio_orc_splat_u643 \n\
b Laudio_orc_splat_u6412 \n\
Laudio_orc_splat_u644: \n\
mov ip, #0x00000010 \n\
mov r1, %[d1] \n\
sub ip, ip, r1 \n\
and ip, ip, #15 \n\
asr ip, ip, #3 \n\
mov r2, %[n] \n\
cmp r2, ip \n\
ble Laudio_orc_splat_u641 \n\
str ip, %[counter1] \n\
sub r1, r2, ip \n\
asr r2, r1, #1 \n\
str r2, %[counter2] \n\
and r2, r1, #1 \n\
str r2, %[counter3] \n\
b Laudio_orc_splat_u642 \n\
Laudio_orc_splat_u641: \n\
str r2, %[counter1] \n\
mov r2, #0x00000000 \n\
str r2, %[counter2] \n\
str r2, %[counter3] \n\
Laudio_orc_splat_u642: \n\
mov r2, %[d1] \n\
mov ip, %[counter1] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_splat_u646 \n\
Laudio_orc_splat_u645: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 1: copyq \n\
vorr d6, d4, d4 \n\
# 2: storeq \n\
vst1.8 d6, [r2] \n\
add r2, r2, #0x00000008 \n\
bne Laudio_orc_splat_u645 \n\
Laudio_orc_splat_u646: \n\
mov ip, %[counter2] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_splat_u6412 \n\
asr r1, ip, #19 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_splat_u6411 \n\
Laudio_orc_splat_u649: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 1: copyq \n\
vorr q3, q2, q2 \n\
# 2: storeq \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
bne Laudio_orc_splat_u649 \n\
b Laudio_orc_splat_u6412 \n\
Laudio_orc_splat_u6411: \n\
asr r1, ip, #15 \n\
cmp r1, #0x00000000 \n\
beq Laudio_orc_splat_u6410 \n\
Laudio_orc_splat_u648: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 1: copyq \n\
vorr q3, q2, q2 \n\
# 2: storeq \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
bne Laudio_orc_splat_u648 \n\
b Laudio_orc_splat_u6412 \n\
Laudio_orc_splat_u6410: \n\
Laudio_orc_splat_u647: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 1 \n\
# 1: copyq \n\
vorr q3, q2, q2 \n\
# 2: storeq \n\
vst1.64 { d6, d7 }, [r2,:128] \n\
add r2, r2, #0x00000010 \n\
bne Laudio_orc_splat_u647 \n\
Laudio_orc_splat_u6412: \n\
mov ip, %[counter3] \n\
cmp ip, #0x00000000 \n\
beq Laudio_orc_splat_u6414 \n\
Laudio_orc_splat_u6413: \n\
subs ip, ip, #0x00000001 \n\
# LOOP shift 0 \n\
# 1: copyq \n\
vorr d6, d4, d4 \n\
# 2: storeq \n\
vst1.8 d6, [r2] \n\
add r2, r2, #0x00000008 \n\
bne Laudio_orc_splat_u6413 \n\
Laudio_orc_splat_u6414: \n\
nop \n\
nop \n\
nop \n\
Laudio_orc_splat_u6420: \n\
.word 0x07060706 \n\
.word 0x07060706 \n\
.word 0x0f0e0f0e \n\
.word 0x0f0e0f0e \n\
"
:
[d1] "+rm" (d1),
[counter1] "+r" (counter1),
[counter2] "+r" (counter2),
[counter3] "+r" (counter3)
:
[n] "r" (n)
);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment