Created
December 31, 2019 23:44
-
-
Save buserror/0a3a69cca927b8da6c9c7ee1605007fc to your computer and use it in GitHub Desktop.
On a G4, the pipelining was crucial. And on a 8 channel 24 bits (well 32) audio path, it was even more so.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Clip_48_8.cpp | |
* EZ8AudioDriver | |
* | |
* Created by Michel on Sun Oct 06 2002. | |
* Copyright (c) 2002 __MyCompanyName__. All rights reserved. | |
* | |
*/ | |
#include <libkern/OSTypes.h> | |
#include "ice1712.h" | |
typedef unsigned long UInt32; | |
//#undef FAST | |
#if FAST | |
#define FASTPLAY 1 | |
#define FASTCAP 1 | |
#else | |
#warning Compiling in SLOW mode! | |
#endif | |
namespace EZ8 { | |
static inline double __clip( register double B ) | |
{ | |
register double result; | |
asm( "fctiw %0, %1" : "=f" (result) : "f" (B) ); | |
return result; | |
} | |
void F32L24_48_8(double *v, float *ii, UInt32 *oo, long count) | |
{ | |
register double v0=v[0],v1=v[1],v2=v[2],v3=v[3],v4=v[4],v5=v[5],v6=v[6],v7=v[7]; | |
// better see that with tabs == 4! | |
register double scale = 2147483648.0; | |
#if FASTPLAY | |
#define _load(i) s##i = ii[i] | |
#define _volc(i) s##i *= v##i | |
#define _clip(i) __clip(s##i) | |
#define _d2l1(i) s##i *= scale | |
#define _d2l2(i) o##i = (SInt32)s##i | |
#define _stor(i) __asm__( "stwbrx %0, %1, %2" : : "r" (o##i), "b%" (i << 2), "r" (oo) : "memory" ) | |
#else | |
// this is the equivalent, without using assembly | |
#define _load(i) s##i = ii[i] | |
#define _volc(i) s##i *= v##i | |
#define _clip(i) if (s##i > 1.0) s##i = 1.0; else if (s##i < -1.0) s##i = -1.0 | |
#define _d2l1(i) o##i = (SInt32)(s##i * scale) | |
#define _d2l2(i) o##i = (((o##i >> 8) & 0xff) << 16) | (((o##i >> 16) & 0xff) << 8) | (((o##i >> 24) & 0xff)) | |
#define _stor(i) oo[i] = o##i | |
#endif | |
register double s0,s1,s2,s3,s4,s5,s6,s7; | |
register UInt32 o0,o1,o2,o3,o4,o5,o6,o7; | |
while (count--) { | |
// staged pipeline 6x8 | |
_load(0); | |
_load(1); | |
_load(2);_volc(0); | |
_load(3);_volc(1);_clip(0); | |
_load(4);_volc(2);_clip(1);_d2l1(0); | |
_load(5);_volc(3);_clip(2);_d2l1(1);_d2l2(0); | |
_load(6);_volc(4);_clip(3);_d2l1(2);_d2l2(1); | |
_load(7);_volc(5);_clip(4);_d2l1(3);_d2l2(2);_stor(0); | |
_volc(6);_clip(5);_d2l1(4);_d2l2(3);_stor(1); | |
_volc(7);_clip(6);_d2l1(5);_d2l2(4);_stor(2); | |
_clip(7);_d2l1(6);_d2l2(5);_stor(3); | |
_d2l1(7);_d2l2(6);_stor(4); | |
_d2l2(7);_stor(5); | |
_stor(6); | |
_stor(7); | |
ii += 8; | |
oo += NUM_CHANNELS_OUT; | |
} | |
} | |
#undef _load | |
#undef _volc | |
#undef _volm | |
#undef _clip | |
#undef _d2l1 | |
#undef _d2l2 | |
#undef _stor | |
static inline double __fctiw( register double B ) | |
{ | |
register double result; | |
asm( "fctiw %0, %1" : "=f" (result) : "f" (B) ); | |
return result; | |
} | |
void L24F32_48_8(double *v, UInt32 *ii, float *oo, long count) | |
{ | |
register double v0=v[0],v1=v[1],v2=v[2],v3=v[3],v4=v[4],v5=v[5],v6=v[6],v7=v[7]; | |
#if FASTCAP | |
union { double d; unsigned int i[2]; } transfer[2]; | |
transfer[0].i[0] = transfer[1].i[0] = (0x434UL - 32) << 20; | |
//0x41400000UL; | |
transfer[0].i[1] = 0x80000000; | |
double dBias = transfer[0].d; | |
#define _load(i) __asm__ __volatile__("lwbrx %0, %1, %2" : "=r" (s##i) : "b%" (i << 2), "r" (ii) : "memory" ) | |
#define _swap(i) s##i ^= 0x80000000UL | |
#define _l2d1(_i) transfer[_i & 1].i[1] = s##_i | |
#define _l2d2(_i) o##_i = transfer[_i & 1].d | |
#define _l2d3(i) o##i -= dBias | |
#define _volc(i) o##i *= v##i | |
#define _stor(i) oo[i] = o##i | |
#else | |
register double scale = 4.656612873077392578125e-10; // 1 / 2147483648.0; // prevent unnecessary divisions | |
#define _load(i) s##i = ii[i] | |
#define _swap(i) s##i = ((s##i & 0xff) << 24) | (((s##i >> 8) & 0xff) << 16) | (((s##i >> 16) & 0xff) << 8) | |
#define _l2d1(i) o##i = (((double)(SInt32)s##i) * scale) | |
#define _l2d2(i) | |
#define _l2d3(i) | |
#define _volc(i) o##i *= v##i | |
#define _stor(i) oo[i] = o##i | |
#endif | |
register UInt32 s0,s1,s2,s3,s4,s5,s6,s7; | |
register double o0,o1,o2,o3,o4,o5,o6,o7; | |
while (count--) { | |
// staged pipeline 7x8 | |
_load(0); | |
_load(1); | |
_load(2); | |
_load(3);_swap(0); | |
_load(4);_swap(1);_l2d1(0); | |
_load(5);_swap(2);_l2d1(1);_l2d2(0); | |
_load(6);_swap(3);_l2d1(2);_l2d2(1); | |
_load(7);_swap(4);_l2d1(3);_l2d2(2);_l2d3(0); | |
_swap(5);_l2d1(4);_l2d2(3);_l2d3(1);_volc(0); | |
_swap(6);_l2d1(5);_l2d2(4);_l2d3(2);_volc(1);_stor(0); | |
_swap(7);_l2d1(6);_l2d2(5);_l2d3(3);_volc(2);_stor(1); | |
_l2d1(7);_l2d2(6);_l2d3(4);_volc(3);_stor(2); | |
_l2d2(7);_l2d3(5);_volc(4);_stor(3); | |
_l2d3(6);_volc(5);_stor(4); | |
_l2d3(7);_volc(6);_stor(5); | |
_volc(7);_stor(6); | |
_stor(7); | |
ii += NUM_CHANNELS_IN; | |
oo += 8; | |
} | |
} | |
#undef _load | |
#undef _stor | |
#undef _clip | |
#undef _scal | |
#undef _swap | |
} // namespace EZ8 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment