bandito · May 1, 2012 17:44
diff --git a/icmdct.c b/icmdct.c
 #include "typedefs.h"
 #include "imdct.h"

 #define SPECTRUM_POS *(i16 *)0x1080 //

 static i16 tmp[18];

 static i16 u[2][2][17][16]; /* no v[][], it's redundant */
 static int u_start[2]={0,0}; /* first element of u[][] */
 static int u_div[2]={0,0}; /* which part of u[][] is currently used */

 static i16 *spectrum;

 #pragma DATA_SECTION(PcmBuffer, "PCM_BUFFER")
 u16 PcmBuffer[6*2304];

 extern u16 GrannulesInBuffer;
 static i16 prev[18];


 void MD_IMDCT(i16 win_type,i16 ch,i16 gr, u16 no_of_imdcts)
 {
 /*------------------------------------------------------------------*/
 /*                                                                  */
 /*    Function: Calculation of the inverse MDCT                     */
 /*    In the case of short blocks the 3 output vectors are already  */
 /*    overlapped and added in this modul.                           */
 /*                                                                  */
 /*    New layer3                                                    */
 /*                                                                  */
 /*------------------------------------------------------------------*/

 	register i16 save;
 	i16 pp1, pp2;
 	i16 i, p, ss;
 	i16 *in; 
 	i16 out[36];
 	i16 n;
   	i16 sb;
 	i16 tmp0,tmp1,tmp2,tmp3,tmp4,tmp0_,tmp1_,tmp2_,tmp3_;
 	i16 tmp0o,tmp1o,tmp2o,tmp3o,tmp4o,tmp0_o,tmp1_o,tmp2_o,tmp3_o;
 	i16 i0;
 	i16 i0p12;
 	i16 i6_;
 	i16 e,o;
    
 	for (sb=0; sb<no_of_imdcts; sb++)
 	{
 	
 		in = &spectrum[ch*576+sb*18];
 	   
 		if(win_type == 2)
 	    {
 			for(p=0;p<36;p+=9) 
 			{
 				out[p]   = out[p+1] = out[p+2] = out[p+3] =
 				out[p+4] = out[p+5] = out[p+6] = out[p+7] =
 				out[p+8] = 0;
 			}
 	
 			for(ss=0;ss<18;ss+=6) 
 			{
 	
 				/*
 				 *  12 point IMDCT
 				 */
 	
 				/* Begin 12 point IDCT */
 	
 				/* Input aliasing for 12 pt IDCT */
 				in[5+ss]+=in[4+ss];
 				in[4+ss]+=in[3+ss];
 				in[3+ss]+=in[2+ss];
 				in[2+ss]+=in[1+ss];
 				in[1+ss]+=in[0+ss];
 	
 				/* Input aliasing on odd indices (for 6 point IDCT) */
 				in[5+ss] += in[3+ss];
 				in[3+ss]  += in[1+ss];
 	
 				/* 3 point IDCT on even indices */
 				//Initially supposing that the Q15 format will never overflow
 				pp2 = q15_mul(in[4+ss],0x4000); //*0.5f
 				pp1 = q15_mul(in[2+ss],0x6ED9); //*0.866025403f; 
 				save = in[0+ss] + pp2;
 				tmp[1] = in[0+ss] - in[4+ss];
 				tmp[0] = save + pp1;
 				tmp[2] = save - pp1;
 	
 				/* End 3 point IDCT on even indices */
 	
 				/* 3 point IDCT on odd indices (for 6 point IDCT) */
 	
 				pp2 = q15_mul(in[5+ss],0x4000); //*0.5f
 				pp1 = q15_mul(in[3+ss],0x6ED9);
 				save = in[1+ss] + pp2;
 				tmp[4] = in[1+ss] - in[5+ss];
 				tmp[5] = save + pp1;
 				tmp[3] = save - pp1;
 	
 				/* End 3 point IDCT on odd indices */
 	
 				/* Twiddle factors on odd indices (for 6 point IDCT) */
 	
 				tmp[3] = q15_q13_mul(tmp[3],0x3DD1);// 1.931851653f;
 				tmp[4] = q15_mul(tmp[4],0x5A82); //0.707106781f;
 				tmp[5] = q15_mul(tmp[5],0x4241); //0.517638090f;
 	
 				/* Output butterflies on 2 3 point IDCT's (for 6 point IDCT) */
 	
 				save = tmp[0];
 				tmp[0] += tmp[5];
 				tmp[5] = save - tmp[5];
 	
 				save = tmp[1];
 				tmp[1] += tmp[4];
 				tmp[4] = save - tmp[4];
 	
 				save = tmp[2];
 				tmp[2] += tmp[3];
 				tmp[3] = save - tmp[3];
 	
 				/* End 6 point IDCT */
 	
 				/* Twiddle factors on indices (for 12 point IDCT) */
 	
 				tmp[0] = q15_mul(tmp[0],0x408D); //0.504314480f;
 				tmp[1] = q15_mul(tmp[1],0x4545); //0.541196100f;
 				tmp[2] = q15_mul(tmp[2],0x50AB); //0.630236207f;
 				tmp[3] = q15_mul(tmp[3],0x6921); //0.821339815f;
 				tmp[4] = q15_q13_mul(tmp[4],0x29CF); //1.306562965f;
 				tmp[5] = q15_q13_mul(tmp[4],0x7A94); //3.830648788f;
 	
 				/* End 12 point IDCT */
 	
 				/* Shift to 12 point modified IDCT, multiply by window type 2 */
 				tmp[8]  = q15_mul(tmp[0], 0xF9A7);	//-0.793353340f;
 				tmp[9]  = q15_mul(tmp[0], 0xB214);	// -0.608761429f;
 				tmp[7]  = q15_mul(tmp[1], 0x89BE);	// -0.923879532f;
 				tmp[10] = q15_mul(tmp[1], 0xCF04);	// -0.382683432f;
 				tmp[6]  = q15_mul(tmp[2], 0x8118); //-0.991444861f;
 				tmp[11] = q15_mul(tmp[2], 0xEF4A); // -0.130526192f;
 	
 				tmp[0]  = tmp[3];
 				tmp[1]  = q15_mul(tmp[4],0x30FB); // 0.382683432f;
 				tmp[2]  = q15_mul(tmp[5],0x4DEB); // 0.608761429f;
 	
 				tmp[3]  = q15_mul(tmp[5],0x9A73); // -0.793353340f;
 				tmp[4]  = q15_mul(tmp[4],0x89BE); // -0.923879532f;
 				tmp[5]  = q15_mul(tmp[0],0x8118); // -0.991444861f;
 	
 				tmp[0] = q15_mul(tmp[0],0x10B5); // 0.130526192f;
 	
 				
 				for (n=6; n<18; n++)
 					out[ss + n]  += tmp[n-6];
 			}
 			//overlapping
 			if (sb&1) 
 			{ //if subband 1 then don't overlap with the previous one ???
 				for (i=0;i<18;i+=2) in[i]=out[i] + prev[i];
 				for (i=1;i<18;i+=2)  in[i]=-out[i] - prev[i];
 			} 
 			else
 				for (i=0;i<18;i++)  in[i]=out[i] + prev[i]; 
 	
 			for (i=18;i<36;i++) prev[i-18]=out[i]; //create new overlap array
 	
 	    } 
 		else 
 		{
 			/*
 			 * 36 point IDCT ****************************************************************
 			 */
 	
 			  /* input aliasing for 36 point IDCT */
 			
 			for (n=17; n>0; n--)
 				in[n]+=in[n-1];
 	
 			/* 18 point IDCT for odd indices */
 	
 			/* input aliasing for 18 point IDCT */
 			in[17]+=in[15];
 			in[15]+=in[13];
 			in[13]+=in[11];
 			in[11]+=in[9];
 			in[9] +=in[7];
 			in[7] +=in[5];
 			in[5] +=in[3];
 			in[3] +=in[1];
 	  
 			{
 	
 			/* Fast 9 Point Inverse Discrete Cosine Transform
 			//
 			// By  Francois-Raymond Boyer
 			//         mailto:[email protected]
 			//         http://www.iro.umontreal.ca/~boyerf
 			//
 			// The code has been optimized for Intel processors
 			//  (takes a lot of time to convert float to and from iternal FPU representation)
 			//
 			// It is a simple "factorization" of the IDCT matrix.
 			*/
 			/* 9 point IDCT on even indices */
 				{
 				/* 5 points on odd indices (not realy an IDCT) */
 				 	i0 = in[0]+in[0];
 					i0p12 = i0 + in[12];
 	
 					tmp0 = i0p12 + q15_q13_mul(in[4],0x3C23) /*1.8793852415718f */  + q15_q13_mul(in[8],0x3106) /*1.532088886238f*/   + q15_mul(in[16],0x2C74); /* 0.34729635533386f*/
 					tmp1 = i0    + in[4]                   - in[8] - in[12] - in[12] - in[16];
 					tmp2 = i0p12 - q15_mul(in[4], 0x2C74) /*0.34729635533386f*/ - q15_q13_mul(in[8],0x3C23) /* 1.8793852415718f*/  + q15_q13_mul(in[16],0x3106); /* 1.532088886238f*/
 					tmp3 = i0p12 - q15_q13_mul(in[4],0x3106) /*1.532088886238f*/ + q15_mul(in[8], 0x2C74) /*0.34729635533386f*/ - q15_q13_mul(in[16],0x3C23); /*1.8793852415718f */
 					tmp4 = in[0] - in[4]                   + in[8] - in[12]          + in[16];
 				}
 			
 				{
 					//1.9696155060244=0x3F07
 					//1.2855752193731=0x2923
 					//0.68404028665134=0x578E
 					
 					i6_ = q15_q13_mul(in[6],0x376C); /*1.732050808f*/		
 	
 					tmp0_ = q15_q13_mul(in[2],0x3F07) /*1.9696155060244f*/  + i6_ + q15_q13_mul(in[10],0x2923) /*1.2855752193731f*/  + q15_mul(in[14],0x578E); /*0.68404028665134f*/
 					tmp1_ = q15_q13_mul((in[2]                        - in[10]                   - in[14]),0x376C); /*1.732050808f*/
 					tmp2_ = q15_q13_mul(in[2],0x2923) /*1.2855752193731f*/  - i6_ - q15_mul(in[10],0x578E) /*0.68404028665134f*/ + q15_q13_mul(in[14],0x3F07); /*1.9696155060244f*/
 					tmp3_ = q15_mul(in[2],0x578E) /*0.68404028665134f*/ - i6_ + q15_q13_mul(in[10],0x3F07) /*1.9696155060244f*/  - q15_q13_mul(in[14],0x2923); /*1.2855752193731f*/
 				}
 	
 				/* 9 point IDCT on odd indices */
 				{
 				/* 5 points on odd indices (not realy an IDCT) */
 					i0 = in[0+1]+in[0+1];
 					i0p12 = i0 + in[12+1];
 	
 					tmp0o = i0p12 + q15_q13_mul(in[4+1],0x3C23) /*1.8793852415718f */  + q15_q13_mul(in[8+1],0x3106) /*1.532088886238f*/   + q15_mul(in[16+1],0x2C74); /* 0.34729635533386f*/
 					tmp1o = i0      + in[4+1]                   - in[8+1] - in[12+1] - in[12+1] - in[16+1];
 					tmp2o = i0p12 - q15_mul(in[4+1], 0x2C74) /*0.34729635533386f*/ - q15_q13_mul(in[8+1],0x3C23) /* 1.8793852415718f*/  + q15_q13_mul(in[16+1],0x3106); /* 1.532088886238f*/
 					tmp3o = i0p12 - q15_q13_mul(in[4+1],0x3106) /*1.532088886238f*/ + q15_mul(in[8+1], 0x2C74) /*0.34729635533386f*/ - q15_q13_mul(in[16+1],0x3C23); /*1.8793852415718f */
 					tmp4o = q15_mul((in[0+1] - in[4+1]                   + in[8+1] - in[12+1]            + in[16+1]),0x5A82); /*0.707106781f Twiddled */
 				}
 				
 				{
 				/* 4 points on even indices */
 					i6_ = q15_q13_mul(in[6+1],0x376C); /*1.732050808f*/		
 	
 					tmp0_o = q15_q13_mul(in[2+1],0x3F07) /*1.9696155060244f*/  + i6_ + q15_q13_mul(in[10+1],0x2923) /*1.2855752193731f*/  + q15_mul(in[14+1],0x578E); /*0.68404028665134f*/
 					tmp1_o = q15_q13_mul((in[2+1]                        - in[10+1]                   - in[14+1]),0x376C); /*1.732050808f*/
 					tmp2_o = q15_q13_mul(in[2+1],0x2923) /*1.2855752193731f*/  - i6_ - q15_mul(in[10+1],0x578E) /*0.68404028665134f*/ + q15_q13_mul(in[14+1],0x3F07); /*1.9696155060244f*/
 					tmp3_o = q15_mul(in[2+1],0x578E) /*0.68404028665134f*/ - i6_ + q15_q13_mul(in[10+1],0x3F07) /*1.9696155060244f*/  - q15_q13_mul(in[14+1],0x2923); /*1.2855752193731f*/
 				}
 	
 				/* Twiddle factors on odd indices
 				// and
 				// Butterflies on 9 point IDCT's
 				// and
 				// twiddle factors for 36 point IDCT
 				*/
 				{
 					e = tmp0 + tmp0_; 
 					o = q15_mul((tmp0o + tmp0_o),0x403E); /*0.501909918f*/ 
 					tmp[0] = q15_mul((e + o),0xDFF8); /*(-0.500476342f*.5f)*/
 					tmp[17] = q15_q11_mul((e - o),0xD226); /*(-11.46279281f*.5f)*/
 					
 					e = tmp1 + tmp1_; 
 					o = q15_mul((tmp1o + tmp1_o),0x4241); //0.517638090f 
 					tmp[1] = q15_mul((e + o),0xDFB9); //-0.504314480f*.5f
 					tmp[16] = q15_q13_mul((e - o),0xC2B5);//-3.830648788f*.5f)
 					
 					e = tmp2 + tmp2_; 
 					o = q15_mul((tmp2o + tmp2_o),0x469D); //0.551688959f 
 					tmp[2] = q15_mul((e + o),0xDF39); //(-0.512139757f*.5f)    
 					tmp[15] = q15_q13_mul((e - o),0xDB09); //(-2.310113158f*.5f)
 					
 					e = tmp3 + tmp3_; 
 					o = q15_mul((tmp3o + tmp3_o),0x4E21); //.610387294f
 					tmp[3] = q15_mul((e + o),0xDE72); //-0.524264562f*.5f
 					tmp[14] = q15_mul((e - o),0x9595); //(-1.662754762f*.5f)
 					
 					tmp[4] = q15_mul((tmp4 + tmp4o),0xBABA); //(-0.541196100f)
 					tmp[13] = q15_q13_mul((tmp4 - tmp4o),0xD630); //(-1.306562965f)
 					
 					e = tmp3 - tmp3_; 
 					o = q15_mul((tmp3o - tmp3_o),0x6F94); //0.871723397f
 					tmp[5] = q15_mul((e + o),0xDBEC); //(-0.563690973f*.5f)
 					tmp[12] = q15_mul((e - o),0xBAB2); //(-1.082840285f*.5f)
 					
 					e = tmp2 - tmp2_; 
 					o = q15_q13_mul((tmp2o - tmp2_o),0x25DB); //1.183100792f 
 					tmp[6] = q15_mul((e + o), 0xDA0E); //(-0.592844523f*.5f)    
 					tmp[11] = q15_mul((e - o),0xC471); // (-0.930579498f*.5f)
 					
 					e = tmp1 - tmp1_; 
 					o = q15_q13_mul((tmp1o - tmp1_o),0x3DD1); //1.931851653f
 					tmp[7] = q15_mul((e + o),0xD7AA); //(-0.630236207f*.5f)
 					tmp[10] = q15_mul((e - o),0xCB6F); //(-0.821339815f*.5f)
 					
 					e = tmp0 - tmp0_; 
 					o = q15_q11_mul((tmp0o - tmp0_o),0x2DE5); //5.736856623f
 					tmp[8] = q15_mul((e + o),0xD498); //(-0.678170852f*.5f)    
 					tmp[9] = q15_mul((e - o),0xD0A2); //(-0.740093616f*.5f)
 				}
 	
 			}
 			/* shift to modified IDCT */
 	
 			if (sb&1) 
 			{ //overlapping
 			
 				for (n=0; n<9; n+=2)
 					in[n] = q15_mul(-tmp[n+1],win[win_type][n]) + prev[n];
 				
 				for (n=1; n<9; n+=2)
 					in[n] = -q15_mul(-tmp[n+1],win[win_type][n]) + prev[n];
 	   
 				for (n=9; n<18; n+=2)
 					in[n] =-(q15_mul(tmp[26-n] , win[win_type][n]) + prev[n]);
 					
 				for (n=10; n<18; n+=2)
 					in[n] =(q15_mul(tmp[26-n] , win[win_type][n]) + prev[n]);
 				
 			} 
 			else 
 			{
 				
 				for (n=0; n<9; n++)
 					in[n] = q15_mul(-tmp[9+n]  , win[win_type][n]) + prev[n];
 				
 	   
 				for (n=9; n<18; n++)
 					in[n] = q15_mul(tmp[26-n]  , win[win_type][n]) + prev[n];
 	
 			}
 	
 			for (n=0; n<9; n++)
 				prev[n]= q15_mul(tmp[8-n]  , win[win_type][18+n]);
 				
 			for (n=9; n<18; n++)
 				prev[n]= q15_mul(tmp[n-9]  , win[win_type][18+n]);
 	
 	    }
 	}
 }

 void MD_Polyphase(u16 ch, u16 f)
 {
 	int start = u_start[ch];
 	int div = u_div[ch];
 	i16 (*u_p)[16];
 	
 	u16 j,n,k, cha;
 	
    const i16 *dewindow = Dewindow[0] + 15 - start;
    i16 *u_ptr = (i16 *) u[ch][div];
    
 	i16 outf1, outf2, outf3, outf4, out;
 	i16 d16,d17,d18,d19,d20,d21,d22,d23,d24,d25,d26,d27,d28,d29,d30,d31;
 	i16 d0,d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14,d15;
 	i16 c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15;
 	
 	cha=(ch)?0:1;
 	d0 = spectrum[ch*576+f];    d16=q15_mul((d0  - spectrum[ch*576+558+f]) , b[1]); d0 += spectrum[ch*576+558+f];
 	d1 = spectrum[ch*576+18+f]; d17=q15_q14_mul((d1  - spectrum[ch*576+540+f])  , b[3]); d1 += spectrum[ch*576+540+f];
 	d3 = spectrum[ch*576+36+f]; d19=q15_q14_mul((d3  - spectrum[ch*576+522+f])  , b[5]); d3 += spectrum[ch*576+522+f];
 	d2 = spectrum[ch*576+54+f]; d18=q15_q14_mul((d2  - spectrum[ch*576+504+f])  , b[7]); d2 += spectrum[ch*576+504+f];
 	d6 = spectrum[ch*576+72+f]; d22=q15_q14_mul((d6  - spectrum[ch*576+486+f])  , b[9]); d6 += spectrum[ch*576+486+f];
 	d7 = spectrum[ch*576+90+f]; d23=q15_q14_mul((d7  - spectrum[ch*576+468+f])  , b[11]); d7 += spectrum[ch*576+468+f];
 	d5 = spectrum[ch*576+108+f]; d21=q15_q14_mul((d5  - spectrum[ch*576+450+f]) , b[13]); d5 += spectrum[ch*576+450+f];
 	d4 = spectrum[ch*576+126+f]; d20=q15_q14_mul((d4  - spectrum[ch*576+432+f]) , b[15]); d4 += spectrum[ch*576+432+f];
 	d12= spectrum[ch*576+144+f]; d28=q15_q14_mul((d12 - spectrum[ch*576+414+f]) , b[17]); d12+= spectrum[ch*576+414+f];
 	d13= spectrum[ch*576+162+f]; d29=q15_q14_mul((d13 - spectrum[ch*576+396+f]) , b[19]); d13+= spectrum[ch*576+396+f];
 	d15= spectrum[ch*576+180+f]; d31=q15_q14_mul((d15 - spectrum[ch*576+378+f]) , b[21]); d15+= spectrum[ch*576+378+f];
 	d14= spectrum[ch*576+198+f]; d30=q15_q14_mul((d14 - spectrum[ch*576+360+f]) , b[23]); d14+= spectrum[ch*576+360+f];
 	d10= spectrum[ch*576+216+f]; d26=q15_q14_mul((d10 - spectrum[ch*576+342+f]) , b[25]); d10+= spectrum[ch*576+342+f];
 	d11= spectrum[ch*576+234+f]; d27=q15_q14_mul((d11 - spectrum[ch*576+324+f]) , b[27]); d11+= spectrum[ch*576+324+f];
 	d9 = spectrum[ch*576+252+f]; d25=q15_q14_mul((d9  - spectrum[ch*576+306+f]) , b[29]); d9 += spectrum[ch*576+306+f];
 	d8 = spectrum[ch*576+270+f]; d24=q15_q14_mul((d8  - spectrum[ch*576+288+f]) , b[31]); d8 += spectrum[ch*576+288+f];


 /* a test to see what can be done with memory separation
 * first we process indexes 0-15
 */

 	c0 = d0 + d8 ; c8 = q15_q14_mul(( d0 - d8 ) ,  b[2]);
 	c1 = d1 + d9 ; c9 = q15_q14_mul(( d1 - d9 ) ,  b[6]);
 	c2 = d2 + d10; c10= q15_q14_mul(( d2 - d10) , b[14]);
 	c3 = d3 + d11; c11= q15_q14_mul(( d3 - d11) , b[10]);
 	c4 = d4 + d12; c12= q15_q14_mul(( d4 - d12) , b[30]);
 	c5 = d5 + d13; c13= q15_q14_mul(( d5 - d13) , b[26]);
 	c6 = d6 + d14; c14= q15_q14_mul(( d6 - d14) , b[18]);
 	c7 = d7 + d15; c15= q15_q14_mul(( d7 - d15) , b[22]);
 	
 	/* step 3: 4-wide butterflies
 	*/
 	d0 = c0 + c4 ; d4 = q15_q14_mul(( c0 - c4 ) ,  b[4]);
 	d1 = c1 + c5 ; d5 = q15_q14_mul(( c1 - c5 ) , b[12]);
 	d2 = c2 + c6 ; d6 = q15_q14_mul(( c2 - c6 ) , b[28]);
 	d3 = c3 + c7 ; d7 = q15_q14_mul(( c3 - c7 ) , b[20]);
 	
 	d8 = c8 + c12; d12= q15_q14_mul(( c8 - c12) ,  b[4]);
 	d9 = c9 + c13; d13= q15_q14_mul(( c9 - c13) , b[12]);
 	d10= c10+ c14; d14= q15_q14_mul((c10 - c14) , b[28]);
 	d11= c11+ c15; d15= q15_q14_mul((c11 - c15) , b[20]);
 	
 	
 /**/	c0 = d0 + d2 ; c2 = q15_q14_mul(( d0 - d2 ) ,  b[8]);
 	c1 = d1 + d3 ; c3 = q15_q14_mul(( d1 - d3 ) , b[24]);
 /**/	c4 = d4 + d6 ; c6 = q15_q14_mul(( d4 - d6 ) ,  b[8]);
 	c5 = d5 + d7 ; c7 = q15_q14_mul(( d5 - d7 ) , b[24]);
 /**/	c8 = d8 + d10; c10= q15_q14_mul(( d8 - d10) ,  b[8]);
 	c9 = d9 + d11; c11= q15_q14_mul(( d9 - d11) , b[24]);
 /**/	c12= d12+ d14; c14= q15_q14_mul((d12 - d14) ,  b[8]);
 	c13= d13+ d15; c15= q15_q14_mul((d13 - d15) , b[24]);

 	/* step 5: 1-wide butterflies
 	*/

 	/* this is a little 'hacked up'
 	*/
 	d0 = q15_q11_mul((-c0 -c1),0x1000); d1 = q15_q14_mul(( c0 - c1 ) , b[16]); 
 	d2 = c2 + c3; d3 = q15_q14_mul(( c2 - c3 ) , b[16]); 
 	d3 -= d2;

 	d4 = c4 +c5; d5 = q15_q14_mul(( c4 - c5 ),  b[16]);
 	d5 += d4;
 	d7 = -d5;
 	d7 += q15_q14_mul(( c6 - c7 ) , b[16]); d6 = +c6 +c7;

 	d8 = c8 + c9 ; d9 = q15_q14_mul(( c8 - c9 ) , b[16]);
 	d11= +d8 +d9;
 	d11 +=q15_q14_mul((c10 - c11) , b[16]); d10= c10+ c11; 

 	d12 = c12+ c13; d13 = q15_q14_mul((c12 - c13) , b[16]);
 	d13 += -d8-d9+d12;
 	d14 = c14+ c15; d15 = q15_q14_mul((c14 - c15) , b[16]);
 	d15-=d11;
 	d14 += -d8 -d10;
 	
    u_p = (i16 (*)[16]) &u[ch][div][0][start];

 /*16*/  u_p[ 0][0] =+d1 ;
        u_p[ 2][0] = +d9 -d14;
 /*20*/  u_p[ 4][0] = +d5 -d6;
        u_p[ 6][0] = -d10 +d13;
 /*24*/  u_p[ 8][0] =d3;
        u_p[10][0] = -d8 -d9 +d11 -d13;
 /*28*/  u_p[12][0] = +d7;
        u_p[14][0] = +d15;

        /* the other 32 are stored for use with the next granule
         */

        u_p = (i16 (*)[16]) &u[ch][!div][0][start];

 /*0*/   u_p[16][0] = d0;
        u_p[14][0] = -(+d8 );
 /*4*/   u_p[12][0] = -(+d4 );
        u_p[10][0] = -(-d8 +d12 );
 /*8*/   u_p[ 8][0] = -(+d2 );
        u_p[ 6][0] = -(+d8 +d10 -d12 );
 /*12*/  u_p[ 4][0] = -(-d4 +d6 );
        u_p[ 2][0] = -d14;
        u_p[ 0][0] = -d1;


        c0=d16 + d24; c8= q15_q14_mul((d16 - d24) ,  b[2]);
        c1=d17 + d25; c9= q15_q14_mul((d17 - d25) ,  b[6]);
        c2=d18 + d26; c10= q15_q14_mul((d18 - d26) , b[14]);
        c3=d19 + d27; c11= q15_q14_mul((d19 - d27) , b[10]);
        c4=d20 + d28; c12= q15_q14_mul((d20 - d28) , b[30]);
        c5=d21 + d29; c13= q15_q14_mul((d21 - d29) , b[26]);
        c6=d22 + d30; c14= q15_q14_mul((d22 - d30) , b[18]);
        c7=d23 + d31; c15= q15_q14_mul((d23 - d31) , b[22]);

 /* 3
 */
        d16= c0+ c4; d20= q15_q14_mul((c0 - c4) ,  b[4]);
        d17= c1+ c5; d21= q15_q14_mul((c1 - c5) , b[12]);
        d18= c2+ c6; d22= q15_q14_mul((c2 - c6) , b[28]);
        d19= c3+ c7; d23= q15_q14_mul((c3 - c7) , b[20]);

        d24= c8+ c12; d28= q15_q14_mul((c8 - c12) ,  b[4]);
        d25= c9+ c13; d29= q15_q14_mul((c9 - c13) , b[12]);
        d26= c10+ c14; d30= q15_q14_mul((c10 - c14) , b[28]);
        d27= c11+ c15; d31= q15_q14_mul((c11 - c15) , b[20]);

 /* 4
 */

 /**/    c0= d16+ d18; c2= q15_q14_mul((d16 - d18) ,  b[8]);
        c1= d17+ d19; c3= q15_q14_mul((d17 - d19) , b[24]);
 /**/    c4= d20+ d22; c6= q15_q14_mul((d20 - d22) ,  b[8]);
        c5= d21+ d23; c7= q15_q14_mul((d21 - d23) , b[24]);
 /**/    c8= d24+ d26; c10= q15_q14_mul((d24 - d26) ,  b[8]);
        c9= d25+ d27; c11= q15_q14_mul((d25 - d27) , b[24]);
 /**/    c12= d28+ d30; c14= q15_q14_mul((d28 - d30) ,  b[8]);
        c13= d29+ d31; c15= q15_q14_mul((d29 - d31) , b[24]);

 /* 5
 */
        d16= c0+ c1; d17= q15_q14_mul((c0 - c1) * b[16]);
        d18= c2+ c3; d19= q15_q14_mul((c2 - c3) * b[16]);

        d20= c4+ c5; d21= q15_q14_mul((c4 - c5) * b[16]);
        d20+=d16; d21+=d17;
        d22= c6+ c7; d23= q15_q14_mul((c6 - c7) * b[16]);
        d22+=d16; d22+=d18;
        d23+=d16; d23+=d17; d23+=d19;


        d24= c8+ c9; d25= q15_q14_mul((c8 - c9) * b[16]);
        d26= c10+ c11; d27= q15_q14_mul((c10 - c11) * b[16]);
        d26+=d24;
        d27+=d24; d27+=d25;

        d28= c12+ c13; d29= q15_q14_mul((c12 - c13) * b[16]);
        d28-=d20; d29+=d28; d29-=d21;
        d30= c14+ c15; d31= q15_q14_mul((c14 - c15) * b[16]);
        d30-=d22;
        d31-=d23;
        
    u_p = (i16 (*)[16]) &u[ch][!div][0][start];

 	u_p[ 1][0] = -(+d30 );	
 	u_p[ 3][0] = -(+d22 -d26 );
 	u_p[ 5][0] = -(-d18 -d20 +d26 );
 	u_p[ 7][0] = -(+d18 -d28 );
 	u_p[ 9][0] = -(+d28 );
 	u_p[11][0] = -(+d20 -d24 );
 	u_p[13][0] = -(-d16 +d24 );
 	u_p[15][0] = -(+d16 );

 	/* the other 32 are stored for use with the next granule
 	 */

 	u_p = (i16 (*)[16]) &u[ch][div][0][start];

 	u_p[15][0] = +d31;
 	u_p[13][0] = +d23 -d27;
 	u_p[11][0] = -d19 -d20 -d21 +d27;
 	u_p[ 9][0] = +d19 -d29;
 	u_p[ 7][0] = -d18 +d29;
 	u_p[ 5][0] = +d18 +d20 +d21 -d25 -d26;
 	u_p[ 3][0] = -d17 -d22 +d25 +d26;
 	u_p[ 1][0] = +d17 -d30;
 	

 	  /* This is tuned specifically for architectures with
             autoincrement and -decrement. */

 	    u_ptr--;
 		
 		outf1=outf2=outf3=outf4=0;
 		
 	    for (j = 0; j < 16; ++j) 
 	   	{
 	   		for (n=0; n<4; n++)
 		    {  
 		       outf1 += q15_mul(*++u_ptr , *++dewindow);
 		       outf2 += q15_mul(*++u_ptr , *++dewindow);
 		       outf3 += q15_mul(*++u_ptr , *++dewindow);
 		       outf4 += q15_mul(*++u_ptr , *++dewindow);
 		    }
 	   	
 	      PcmBuffer[GrannulesInBuffer*1152+f*64+j*2+cha] = outf1 + outf2 + outf3 + outf4;

 	      dewindow += 16;
 		}			

 /*	    for (j = 0; j < 16; ++j) 
 	    {
 	   	  outf1  = q15_mul(*++u_ptr , *++dewindow);
 	      outf2  = q15_mul(*++u_ptr , *++dewindow);
 	      outf3  = q15_mul(*++u_ptr , *++dewindow);
 	      outf4  = q15_mul(*++u_ptr , *++dewindow);
 	      outf1 += q15_mul(*++u_ptr , *++dewindow);
 	      outf2 += q15_mul(*++u_ptr , *++dewindow);
 	      outf3 += q15_mul(*++u_ptr , *++dewindow);
 	      outf4 += q15_mul(*++u_ptr , *++dewindow);
 	      outf1 += q15_mul(*++u_ptr , *++dewindow);
 	      outf2 += q15_mul(*++u_ptr , *++dewindow);
 	      outf3 += q15_mul(*++u_ptr , *++dewindow);
 	      outf4 += q15_mul(*++u_ptr , *++dewindow);
 	      outf1 += q15_mul(*++u_ptr , *++dewindow);
 	      outf2 += q15_mul(*++u_ptr , *++dewindow);
 	      outf3 += q15_mul(*++u_ptr , *++dewindow);
 	      outf4 += q15_mul(*++u_ptr , *++dewindow);

 	      out = outf1 + outf2 + outf3 + outf4;

 	      dewindow += 16;
 	    } */
 	    
 	    if (div & 0x1) 
 	    {
 	    
 	    	k=1;
 	    	outf2=outf4=0;
 	    	
 	    	for (n=0; n<4; n++)
 	    	{
 	    		outf2 += q15_mul(u_ptr[ k] , dewindow[k]);
 	    		k+=2;
 				outf4 += q15_mul(u_ptr[ k] , dewindow[k]);
 				k+=2;
 			}		
 	    	
 		/*	outf2  = q15_mul(u_ptr[ 1] , dewindow[0x1]);
 			outf4  = q15_mul(u_ptr[ 3] , dewindow[0x3]);
 			outf2 += q15_mul(u_ptr[ 5] , dewindow[0x5]);
 			outf4 += q15_mul(u_ptr[ 7] , dewindow[0x7]);
 			outf2 += q15_mul(u_ptr[ 9] , dewindow[0x9]);
 			outf4 += q15_mul(u_ptr[11] , dewindow[0xb]);
 			outf2 += q15_mul(u_ptr[13] , dewindow[0xd]);
 			outf4 += q15_mul(u_ptr[15] , dewindow[0xf]); */

 			PcmBuffer[GrannulesInBuffer*1152+f*64+j*2+cha] = outf2 + outf4;

 	      dewindow -= 31;
 	      dewindow += start;
 	      dewindow += start;
 	      u_ptr -= 16;

 		outf1=outf2=outf3=outf4=0;

 	    for (; j < 31; ++j) 
 	    {
 	   		for (n=0; n<4; n++)
 		    {  
 		       outf1 += q15_mul(*++u_ptr , *--dewindow);
 		       outf2 += q15_mul(*++u_ptr , *--dewindow);
 		       outf3 += q15_mul(*++u_ptr , *--dewindow);
 		       outf4 += q15_mul(*++u_ptr , *--dewindow);
 		    }
 	   	
 	   		PcmBuffer[GrannulesInBuffer*1152+f*64+j*2+cha] = outf2 - outf1 + outf4 - outf3;

 			dewindow -= 16;
 			u_ptr -= 32;
 		}
 		
 /*		for (; j < 31; ++j) 
 	    {

 		  outf1  = q15_mul(*++u_ptr , *--dewindow);
 	      outf2  = q15_mul(*++u_ptr , *--dewindow);
 	      outf3  = q15_mul(*++u_ptr , *--dewindow);
 	      outf4  = q15_mul(*++u_ptr , *--dewindow);
 	      outf1 += q15_mul(*++u_ptr , *--dewindow);
 	      outf2 += q15_mul(*++u_ptr , *--dewindow);
 	      outf3 += q15_mul(*++u_ptr , *--dewindow);
 	      outf4 += q15_mul(*++u_ptr , *--dewindow);
 	      outf1 += q15_mul(*++u_ptr , *--dewindow);
 	      outf2 += q15_mul(*++u_ptr , *--dewindow);
 	      outf3 += q15_mul(*++u_ptr , *--dewindow);
 	      outf4 += q15_mul(*++u_ptr , *--dewindow);
 	      outf1 += q15_mul(*++u_ptr , *--dewindow);
 	      outf2 += q15_mul(*++u_ptr , *--dewindow);
 	      outf3 += q15_mul(*++u_ptr , *--dewindow);
 	      outf4 += q15_mul(*++u_ptr , *--dewindow);


 			out = outf2 - outf1 + outf4 - outf3;

 			dewindow -= 16;
 			u_ptr -= 32;

 	     }*/
 	    } 
 	    else 
 	    {
 	
 	  	k=2;
    	outf2=outf4=0;
    	
    	for (n=0; n<4; n++)
    	{
    		outf2 += q15_mul(u_ptr[ k] , dewindow[k]);
    		k+=2;
 			outf4 += q15_mul(u_ptr[ k] , dewindow[k]);
 			k+=2;
 		}	

 		PcmBuffer[GrannulesInBuffer*1152+f*64+j*2+cha] = outf2 + outf4;
 			
 /*		outf2  = q15_mul(u_ptr[ 2] , dewindow[0x2]);
 		outf4  = q15_mul(u_ptr[ 4] , dewindow[0x4]);
 		outf2 += q15_mul(u_ptr[ 6] , dewindow[0x6]);
 		outf4 += q15_mul(u_ptr[ 8] , dewindow[0x8]);
 		outf2 += q15_mul(u_ptr[10] , dewindow[0xa]);
 		outf4 += q15_mul(u_ptr[12] , dewindow[0xc]);
 		outf2 += q15_mul(u_ptr[14] , dewindow[0xe]);
 		outf4 += q15_mul(u_ptr[16] , dewindow[0x10]); */

 		//out = outf2 + outf4;

 	      dewindow -= 31;
 	      dewindow += start;
 	      dewindow += start;
 	      u_ptr -= 16;

 	      for (; j < 31; ++j) 
 	      {
 	
 			for (n=0; n<4; n++)
 		    {  
 		       outf1 += q15_mul(*++u_ptr , *--dewindow);
 		       outf2 += q15_mul(*++u_ptr , *--dewindow);
 		       outf3 += q15_mul(*++u_ptr , *--dewindow);
 		       outf4 += q15_mul(*++u_ptr , *--dewindow);
 		    }
 		    
 /*		  outf1  = q15_mul(*++u_ptr , *--dewindow);
 	      outf2  = q15_mul(*++u_ptr , *--dewindow);
 	      outf3  = q15_mul(*++u_ptr , *--dewindow);
 	      outf4  = q15_mul(*++u_ptr , *--dewindow);
 	      outf1 += q15_mul(*++u_ptr , *--dewindow);
 	      outf2 += q15_mul(*++u_ptr , *--dewindow);
 	      outf3 += q15_mul(*++u_ptr , *--dewindow);
 	      outf4 += q15_mul(*++u_ptr , *--dewindow);
 	      outf1 += q15_mul(*++u_ptr , *--dewindow);
 	      outf2 += q15_mul(*++u_ptr , *--dewindow);
 	      outf3 += q15_mul(*++u_ptr , *--dewindow);
 	      outf4 += q15_mul(*++u_ptr , *--dewindow);
 	      outf1 += q15_mul(*++u_ptr , *--dewindow);
 	      outf2 += q15_mul(*++u_ptr , *--dewindow);
 	      outf3 += q15_mul(*++u_ptr , *--dewindow);
 	      outf4 += q15_mul(*++u_ptr , *--dewindow); */

 		PcmBuffer[GrannulesInBuffer*1152+f*64+j*2+cha] = outf1 - outf2 + outf3 - outf4;

 		dewindow -= 16;
 		u_ptr -= 32;

 	      }
 	    }

 	--u_start[ch];
 	u_start[ch] &= 0xf;
 	u_div[ch]=u_div[ch] ? 0 : 1;

 }

 void MD_IMDCT_Init()
 {
 	u16 *k;
 	u16 i;
 		
 	spectrum=&SPECTRUM_POS;
 	for (i=0; i<18; i++)
 		prev[i]=0;
 		
 	k=&u[0][0][0][0];
 	for (i=0; i<2*2*17*16; i++)
 		*k++=0;
 }
diff --git a/log_exp.asm b/log_exp.asm
 ;This code is taken from TI's ........
 ;Used to calcualate the exponent of an integer
 ;from 0 to 32768 (must be in A)
 		.mmregs
 		.global _log
 		.global _exp
 		.global _descale

 		.data
 ;Look Up table for the exp function for the first 10 integers
 ;where the algorithm looses important accuracy.
 ;starts from ln(0) to ln(100) in Q13 format
 exp_lup	.word   0h,0B17h,1193h,162Eh,19C0h,1CABh,1F22h,2145h,2327h
 		.word  24D7h,265Dh,27C2h,290Ah,2A39h,2B54h,2C5Ch,2D54h,2E3Eh,2F1Ch
 		.word  2FEEh,30B6h,3174h,322Ah,32D9h,3380h,3421h,34BBh,3550h,35E0h
 		.word  366Bh,36F1h,3773h,37F1h,386Bh,38E2h,3956h,39C6h,3A33h,3A9Dh	
 		.word  3B05h,3B6Ah,3BCDh,3C2Dh,3C8Ch,3CE8h,3D42h,3D9Ah,3DF0h,3E44h
 		.word  3E97h,3EE8h,3F38h,3F86h,3FD2h,401Eh,4067h,40B0h,40F7h,413Dh
 		.word  4182h,41C6h,4208h,424Ah,428Ah,42CAh,4308h,4346h,4383h,43BEh
 		.word  43F9h,4433h,446Dh,44A5h,44DDh,4514h,454Ah,4580h,45B5h,45E9h
 		.word  461Ch,464Fh,4681h,46B3h,46E4h,4715h,4745h,4774h,47A3h,47D1h
 		.word  47FFh,482Ch,4859h,4885h,48B1h,48DCh,4907h,4932h,495Ch,4985h
 		.word  49AEh
 		
 ;Q11 format 2048*n*ln2 starting from n=15 to n=0		
 logtbl	.int 21294, 19874, 18454,17035, 15615, 14196, 12776
 		.int 11357,9937,8517,7098, 5678, 4259, 2839, 1420, 0
 ;Q15 format of the equation -32768/n (Taylor coefficients
 ;for n=11 down to 1)
 a9_log	;.int -2521,-2731
 		.int -3277, -3641,-4096,-4681,-5461,-6554,-8192
 		.int -10293,-16384,-32768, 0, 0
 		
 		.bss N,1
 		.bss X,1
 		.bss EXP, 1
 		.bss LNIS,1
 		.text
 _log:
 		STM		N, AR4
 		ADD 	#0,A,B  			;B=A=is
 		EXP 	B  					;T=leading 0's of. Exponent of B
 		LD 		#0x4000, 16,A 		;AH=16384, the largest supported scale
 		ST 		T,*AR4 				;Store scaling number in N
 		ANDM 	#0Fh, *AR4 			;compensate extra 16 leading bits
 		MVDM	N,AR0				;AR0 index to segment table
 		NORM	B 					;Normalize to Q15 fromat
 		AND 	#0x3FFF, 16, B		;BH=BH-0x4000
 		BC		taylor_log, BNEQ	;if (B==0) which means it can be represented in 2^N form
 									;just return the result pre-stored in the index table
 		STM 	#logtbl+1, AR3		
 		MAR		*AR3+0
 		LD      *AR3, A
 		RET
 taylor_log:
 		STM		X, AR4
 		SUB 	B, 0 , A 			;A=A-B.A is the X in taylor's equation
 		STH 	A, *AR4				;X is the fractional part in Q15 format
 		STM		a9_log, AR3 			;AR3 points to coefficient in Taylor's equ
 		LD 		*AR4 , T 			;T is the X in the polynomial equantion. POLY uses the value
 									;of T
 		LD 		*AR3+, 16, A 		;first coefficient of the n power in A
 		LD 		*AR3+, 16, B 		;second coefficient of the (n-1) power in B

 		RPT		#10					;loop 13 times, enough accuracy for MP3
 		
 		POLY	*AR3+				;AH=fractional part of the polynomial in Q15 format
 		
 		SFTA	A, -16				;AH=AL
 		SFTA	A, -4				;Convert to Q11 format
 		
 		STM 	#logtbl, AR3		;sum up scaling part, N*ln2
 		MAR		*AR3+0
 		ADD 	*AR3,A
 		
 		RET
 		
 		.data

 ;exptbl is generated by equation e^(-n). n starts from 0 to 10 into Q15 format		
 exptbl	.int 0x7FFF, 0x2F16, 0x1152, 0x065F, 0x0258, 0x00DC, 0x0051, 0x01D, 0x000A, 0x0004, 0x0001

 ;a9 is generated by the equation 1/n!. N starts from 8 down to 1 to 
 ;facilitate the use of POLY
 ;a9_exp	.int 1,7,46,273,1365,5461,16384,32767,0,0
 a9_exp	.int 0,0x6,0x2D,0x111,0x555,0x1555,0x4000,0x7FFF,0,0
 		
 		.text    
 		
 		.bss N1,1
 		.bss X1,1
 		
 _exp: 
 		SUB		#0ACD2h, A, B
 		BC		exp_q15limit, BLT
 		AND		#0h, B
 		OR  	#0ffffh,B
 		SUB		A,B 		 		;Negative number.Make positive to compare
 		ADD 	#0, B, A
 		ADD 	#0,A,B
 		STM		N1, AR4 		
 		AND 	#400h, B			;Check if it is larger than 0.5
 		BCD 	adj, BNEQ			;If larger than 0.5 adjust
 		ADD 	#400h, A, B			
 		STM		N1, AR4 		
 		STL		B, -11, *AR4		;store scaling index
 		AND 	#3FFh, B			;truncate fractional part
 		STM		X1, AR4				;store fractional part
 		SFTA	B,4
 		ADD 	#0,B,A
 		LD 		#0FFFFh, 0, B
 		SUB		A,B 		 		;Negative number.Make positive to compare
 		STL		B, 0, *AR4			;in Q15 format
 		
 		B		taylor_exp

 exp_q15limit:
 		AND		#0,A
 		B		exp_exit
 adj:
 		STL		B, -11, *AR4		;store scaling index
 		AND		#7FFh, B			;truncate fractional part
 		SUB 	#400h, B		
 		STM		X1, AR4				;store negative fraction
 		STL		B, 4, *AR4			;in Q15 format
 		LD 		*AR4, T
 		MPY		#-1,B
 		STL		B, *AR4

 taylor_exp:
 		
 		STM		a9_exp, AR3			;AR3 points to coefficient in
 									;Taylor's equ
 		LD 		*AR4 , T 			;T is the X in the polynomial equantion. POLY uses the value
 									;of T
 		LD 		*AR3+, 16, A 		;first coefficient of the n power in A
 		LD 		*AR3+, 16, B 		;second coefficient of the (n-1) power in B

 		RPT		#7					;loop 8 times, enough accuracy for MP3
 		POLY	*AR3+				;AH=fractional part of the polynomial in Q14 

 		ADD 	#4000h, 16, A		;taylor equation has one constant.Q14
 		ADD		#0,A, B				;round
 		AND		#0800h, 16, B
 		BC		exp_mul, BEQ        ;if less than 0.5 don't round
 		AND 	#0F000h,16, A       ;will not overflow. Max value 0x6F85
 		ADD		#1000h,16,A			;rounding	
 exp_mul:		
 		MVDM	N1, AR0				;index into expbtl
 		STM		exptbl, AR3
 		MAR		*AR3+0
 		MPYA 	*AR3				;multiply the scaling part
 		SFTA	B,-14, A			;AL=BH
 exp_exit:
 		RET
 				
 ;This routine calculates the x=(is)^4/3*2^exp
 ;Inputs <is> in A, and exp in stack
 _descale:
 		STM		EXP, AR4
 		MVMM	SP, AR3			;first extract exp and place in T
 		LD		*AR3+, T
 		LD		*AR3,T
 		ST 		T, *AR4
 		PSHM	ST0
 		PSHM	ST1	
 		RSBX	SXM				;we don't want sign extension
 		SUB 	#101, A, B		;now check is to see if we can use the look-up table
 		BC 		look_up, BLT	
 		CALL 	_log			;returns ln(A) in Q11 format in A
 								;now multiply with 1,333333
 		SFTL	A,+15
 		SFTL	A, +1
 		STM     #5555h, T
 		MPYA	B
 		SFTA	B, -14, A
 		B 		calc_x
 look_up:
 		SUB		#1, A			;substract 1 from A in order
 		STLM	A, AR0			;to use the look up table
 		STM		exp_lup, AR3	
 		nop
 		LD 		#5555h, 16, A
 		MAR 	*AR3+0			;index AR3 to look up table
 								;load A with 1,33333 in Q14 format		
 		MPYA	*AR3			;multiply and store in B
 		SFTA 	B, -12, A		;return result in Q14 format
 		SFTA 	A, -3			;convert to Q11
 calc_x:
 		STM		LNIS, AR4
 		STL		A, *AR4
 		STM		EXP, AR4		
 		LD      *AR4, T
 		MPY		#-1, B			;exp is negative. Make positive 
 		SFTA	B, +15, A		;before multiplying
 		SFTA 	A, +1
 		STM		#58B9h, T		;T=ln2
 		MPYA	B
 		SFTA	B, -14, A		;convert to Q11
 		
 		STL		A, *AR4
 		LD		*AR4, T
 		MPY	    #-1, B 			;make negative again
 		
 		STM		LNIS, AR4		;and now add with LNIS
 		ADD		*AR4, B
 		AND 	#0FFFFh, B, A
 		CALL 	_exp
 		POPM	ST1
 		POPM	ST0
 		RET		
 		.
	;This code is taken from TI's ........
	;Used to calcualate the exponent of an integer
	;from 0 to 32768 (must be in A)
	.mmregs
	.global _log
	.global _exp
	.global _descale

	.data
	;Look Up table for the exp function for the first 10 integers
	;where the algorithm looses important accuracy.
	;starts from ln(0) to ln(100) in Q13 format
	exp_lup .word 0h,0B17h,1193h,162Eh,19C0h,1CABh,1F22h,2145h,2327h
	.word 24D7h,265Dh,27C2h,290Ah,2A39h,2B54h,2C5Ch,2D54h,2E3Eh,2F1Ch
	.word 2FEEh,30B6h,3174h,322Ah,32D9h,3380h,3421h,34BBh,3550h,35E0h
	.word 366Bh,36F1h,3773h,37F1h,386Bh,38E2h,3956h,39C6h,3A33h,3A9Dh
	.word 3B05h,3B6Ah,3BCDh,3C2Dh,3C8Ch,3CE8h,3D42h,3D9Ah,3DF0h,3E44h
	.word 3E97h,3EE8h,3F38h,3F86h,3FD2h,401Eh,4067h,40B0h,40F7h,413Dh
	.word 4182h,41C6h,4208h,424Ah,428Ah,42CAh,4308h,4346h,4383h,43BEh
	.word 43F9h,4433h,446Dh,44A5h,44DDh,4514h,454Ah,4580h,45B5h,45E9h
	.word 461Ch,464Fh,4681h,46B3h,46E4h,4715h,4745h,4774h,47A3h,47D1h
	.word 47FFh,482Ch,4859h,4885h,48B1h,48DCh,4907h,4932h,495Ch,4985h
	.word 49AEh

	;Q11 format 2048nln2 starting from n=15 to n=0
	logtbl .int 21294, 19874, 18454,17035, 15615, 14196, 12776
	.int 11357,9937,8517,7098, 5678, 4259, 2839, 1420, 0
	;Q15 format of the equation -32768/n (Taylor coefficients
	;for n=11 down to 1)
	a9_log ;.int -2521,-2731
	.int -3277, -3641,-4096,-4681,-5461,-6554,-8192
	.int -10293,-16384,-32768, 0, 0

	.bss N,1
	.bss X,1
	.bss EXP, 1
	.bss LNIS,1
	.text
	_log:
	STM N, AR4
	ADD #0,A,B ;B=A=is
	EXP B ;T=leading 0's of. Exponent of B
	LD #0x4000, 16,A ;AH=16384, the largest supported scale
	ST T,*AR4 ;Store scaling number in N
	ANDM #0Fh, *AR4 ;compensate extra 16 leading bits
	MVDM N,AR0 ;AR0 index to segment table
	NORM B ;Normalize to Q15 fromat
	AND #0x3FFF, 16, B ;BH=BH-0x4000
	BC taylor_log, BNEQ ;if (B==0) which means it can be represented in 2^N form
	;just return the result pre-stored in the index table
	STM #logtbl+1, AR3
	MAR *AR3+0
	LD *AR3, A
	RET
	taylor_log:
	STM X, AR4
	SUB B, 0 , A ;A=A-B.A is the X in taylor's equation
	STH A, *AR4 ;X is the fractional part in Q15 format
	STM a9_log, AR3 ;AR3 points to coefficient in Taylor's equ
	LD *AR4 , T ;T is the X in the polynomial equantion. POLY uses the value
	;of T
	LD *AR3+, 16, A ;first coefficient of the n power in A
	LD *AR3+, 16, B ;second coefficient of the (n-1) power in B

	RPT #10 ;loop 13 times, enough accuracy for MP3

	POLY *AR3+ ;AH=fractional part of the polynomial in Q15 format

	SFTA A, -16 ;AH=AL
	SFTA A, -4 ;Convert to Q11 format

	STM #logtbl, AR3 ;sum up scaling part, N*ln2
	MAR *AR3+0
	ADD *AR3,A

	RET

	.data

	;exptbl is generated by equation e^(-n). n starts from 0 to 10 into Q15 format
	exptbl .int 0x7FFF, 0x2F16, 0x1152, 0x065F, 0x0258, 0x00DC, 0x0051, 0x01D, 0x000A, 0x0004, 0x0001

	;a9 is generated by the equation 1/n!. N starts from 8 down to 1 to
	;facilitate the use of POLY
	;a9_exp .int 1,7,46,273,1365,5461,16384,32767,0,0
	a9_exp .int 0,0x6,0x2D,0x111,0x555,0x1555,0x4000,0x7FFF,0,0

	.text

	.bss N1,1
	.bss X1,1

	_exp:
	SUB #0ACD2h, A, B
	BC exp_q15limit, BLT
	AND #0h, B
	OR #0ffffh,B
	SUB A,B ;Negative number.Make positive to compare
	ADD #0, B, A
	ADD #0,A,B
	STM N1, AR4
	AND #400h, B ;Check if it is larger than 0.5
	BCD adj, BNEQ ;If larger than 0.5 adjust
	ADD #400h, A, B
	STM N1, AR4
	STL B, -11, *AR4 ;store scaling index
	AND #3FFh, B ;truncate fractional part
	STM X1, AR4 ;store fractional part
	SFTA B,4
	ADD #0,B,A
	LD #0FFFFh, 0, B
	SUB A,B ;Negative number.Make positive to compare
	STL B, 0, *AR4 ;in Q15 format

	B taylor_exp

	exp_q15limit:
	AND #0,A
	B exp_exit
	adj:
	STL B, -11, *AR4 ;store scaling index
	AND #7FFh, B ;truncate fractional part
	SUB #400h, B
	STM X1, AR4 ;store negative fraction
	STL B, 4, *AR4 ;in Q15 format
	LD *AR4, T
	MPY #-1,B
	STL B, *AR4

	taylor_exp:

	STM a9_exp, AR3 ;AR3 points to coefficient in
	;Taylor's equ
	LD *AR4 , T ;T is the X in the polynomial equantion. POLY uses the value
	;of T
	LD *AR3+, 16, A ;first coefficient of the n power in A
	LD *AR3+, 16, B ;second coefficient of the (n-1) power in B

	RPT #7 ;loop 8 times, enough accuracy for MP3
	POLY *AR3+ ;AH=fractional part of the polynomial in Q14

	ADD #4000h, 16, A ;taylor equation has one constant.Q14
	ADD #0,A, B ;round
	AND #0800h, 16, B
	BC exp_mul, BEQ ;if less than 0.5 don't round
	AND #0F000h,16, A ;will not overflow. Max value 0x6F85
	ADD #1000h,16,A ;rounding
	exp_mul:
	MVDM N1, AR0 ;index into expbtl
	STM exptbl, AR3
	MAR *AR3+0
	MPYA *AR3 ;multiply the scaling part
	SFTA B,-14, A ;AL=BH
	exp_exit:
	RET

	;This routine calculates the x=(is)^4/3*2^exp
	;Inputs <is> in A, and exp in stack
	_descale:
	STM EXP, AR4
	MVMM SP, AR3 ;first extract exp and place in T
	LD *AR3+, T
	LD *AR3,T
	ST T, *AR4
	PSHM ST0
	PSHM ST1
	RSBX SXM ;we don't want sign extension
	SUB #101, A, B ;now check is to see if we can use the look-up table
	BC look_up, BLT
	CALL _log ;returns ln(A) in Q11 format in A
	;now multiply with 1,333333
	SFTL A,+15
	SFTL A, +1
	STM #5555h, T
	MPYA B
	SFTA B, -14, A
	B calc_x
	look_up:
	SUB #1, A ;substract 1 from A in order
	STLM A, AR0 ;to use the look up table
	STM exp_lup, AR3
	nop
	LD #5555h, 16, A
	MAR *AR3+0 ;index AR3 to look up table
	;load A with 1,33333 in Q14 format
	MPYA *AR3 ;multiply and store in B
	SFTA B, -12, A ;return result in Q14 format
	SFTA A, -3 ;convert to Q11
	calc_x:
	STM LNIS, AR4
	STL A, *AR4
	STM EXP, AR4
	LD *AR4, T
	MPY #-1, B ;exp is negative. Make positive
	SFTA B, +15, A ;before multiplying
	SFTA A, +1
	STM #58B9h, T ;T=ln2
	MPYA B
	SFTA B, -14, A ;convert to Q11

	STL A, *AR4
	LD *AR4, T
	MPY #-1, B ;make negative again

	STM LNIS, AR4 ;and now add with LNIS
	ADD *AR4, B
	AND #0FFFFh, B, A
	CALL _exp
	POPM ST1
	POPM ST0
	RET
	.