new imdct - mad-dev

23 Sep 2003


      -----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
I was testing the Szu-Wei Lee algorithm and found that it is faster than the 
one in libmad. A very simplistic implementation that directly follows the 
algorithm deduction is:
static void sdct9(const mad_fixed_t x[9], mad_fixed_t y[9]){
  mad_fixed_t x0,x1,x2,x3,x4,x5,x6,x7,x8;
  mad_fixed_t 
a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15,a16,a17,a18,a19,a20,a21,a22,a23,a24,a25,a26;
  mad_fixed_t m1,m2,m3,m4,m5,m6,m7,m8;
  const mad_fixed_t d[]={
    0x1bb67ae8, /*sqrt(3)*/
    0xe1ee09bf, /*2*cos(8*M_PI/9)*/
    0x058e86a0, /*2*cos(4*M_PI/9)*/
    0x18836fa3, /*2*cos(2*M_PI/9)*/
    0x0af1d43a, /*2*sin(8*M_PI/9)*/
    0x1f838b8d, /*2*sin(4*M_PI/9)*/
    0x1491b752  /*2*sin(2*M_PI/9)*/
  };
  x0=x[0];
  x1=x[1];
  x2=x[2];
  x3=x[3];
  x4=x[4];
  x5=x[5];
  x6=x[6];
  x7=x[7];
  x8=x[8];
a1 =x3  + x5;
  a2 =x3  - x5;
  a3 =x6  + x2;
  a4 =x6  - x2;
  a5 =x1  + x7;
  a6 =x1  - x7;
  a7 =x8  + x0;
  a8 =x8  - x0;
  a9 =x4  + a5;
  a10=a1  + a3;
  a11=a10 + a7;
  a12=a3  - a7;
  a13=a1  - a7;
  a14=a1  - a3;
  a15=a2  - a4;
  a16=a15 + a8;
  a17=a4  + a8;
  a18=a2  - a8;
  a19=a2  + a4;
  m1 =mad_f_mul(-d[0], a6);
  m2 =mad_f_mul(-d[1], a12);
  m3 =mad_f_mul(-d[2], a13);
  m4 =mad_f_mul(-d[3], a14);
  m5 =mad_f_mul(-d[0], a16);
  m6 =mad_f_mul(-d[4], a17);
  m7 =mad_f_mul(-d[5], a18);
  m8 =mad_f_mul(-d[6], a19);
  a20=x4 + x4 -a5;
  a21=a20+m2;
  a22=a20-m2;
  a23=a20+m3;
  a24=m1+m6;
  a25=m1-m6;
  a26=m1+m7;
  y[0]=a9+a11;
  y[1]=m8-a26;
  y[2]=m4-a21;
  y[3]=m5;
  y[4]=a22-m3;
  y[5]=a25-m7;
  y[6]=a11-a9-a9;
  y[7]=a24+m8;
  y[8]=a23+m4;
}
static void sdct18(const mad_fixed_t x[18], mad_fixed_t X[36]){
  mad_fixed_t u[9];
  mad_fixed_t v[9];
  mad_fixed_t x1[9];
  mad_fixed_t x2[9];
  int i;
  const mad_fixed_t costab[]={
    0x1fe0d3b4, /*2*cos(M_PI*(2*i+1)/(2*18))*/
    0x1ee8dd47, /*2*cos(M_PI*(2*i+1)/(2*18))*/
    0x1d007930, /*2*cos(M_PI*(2*i+1)/(2*18))*/
    0x1a367e59, /*2*cos(M_PI*(2*i+1)/(2*18))*/
    0x16a09e66, /*2*cos(M_PI*(2*i+1)/(2*18))*/
    0x125abcf8, /*2*cos(M_PI*(2*i+1)/(2*18))*/
    0x0d8616bc, /*2*cos(M_PI*(2*i+1)/(2*18))*/
    0x08483ee1, /*2*cos(M_PI*(2*i+1)/(2*18))*/
    0x02c9fad7 /*2*cos(M_PI*(2*i+1)/(2*18))*/
  };
for(i=0;i<9;i++){
    x1[i]=x[i]+x[18-1-i];
    x2[i]=mad_f_mul((x[i]-x[18-1-i]),costab[i]);
  }
  sdct9(x1,u);
  sdct9(x2,v);
  for(i=1;i<9;i++)
    v[i]=v[i]-v[i-1];
  X[0]=u[0];
  X[1]=v[0];
  for(i=1;i<9;i++){
    X[2*i]=u[i];
    X[2*i+1]=v[i];
  }
}
static void dctIV(mad_fixed_t const y[18], mad_fixed_t X[36]){
  int i;
  mad_fixed_t temp[18];
  mad_fixed_t costab[]={
    0x0ffc19fd, /*cos(M_PI*(2*i+1)/(2*36))*/
    0x0fdcf549, /*cos(M_PI*(2*i+1)/(2*36))*/
    0x0f9ee890, /*cos(M_PI*(2*i+1)/(2*36))*/
    0x0f426cb5, /*cos(M_PI*(2*i+1)/(2*36))*/
    0x0ec835e8, /*cos(M_PI*(2*i+1)/(2*36))*/
    0x0e313245, /*cos(M_PI*(2*i+1)/(2*36))*/
    0x0d7e8807, /*cos(M_PI*(2*i+1)/(2*36))*/
    0x0cb19346, /*cos(M_PI*(2*i+1)/(2*36))*/
    0x0bcbe352, /*cos(M_PI*(2*i+1)/(2*36))*/
    0x0acf37ad, /*cos(M_PI*(2*i+1)/(2*36))*/
    0x09bd7ca0, /*cos(M_PI*(2*i+1)/(2*36))*/
    0x0898c779, /*cos(M_PI*(2*i+1)/(2*36))*/
    0x07635284, /*cos(M_PI*(2*i+1)/(2*36))*/
    0x061f78aa, /*cos(M_PI*(2*i+1)/(2*36))*/
    0x04cfb0e2, /*cos(M_PI*(2*i+1)/(2*36))*/
    0x03768962, /*cos(M_PI*(2*i+1)/(2*36))*/
    0x0216a2a2, /*cos(M_PI*(2*i+1)/(2*36))*/
    0x00b2aa3e /*cos(M_PI*(2*i+1)/(2*36))*/
  };
for(i=0;i<18;i++){
    temp[i]=mad_f_mul(y[i],costab[i]);
  }
  sdct18(temp, X);
for(i=1;i<18;i++)
    X[i]=X[i]-X[i-1];
}
static inline
void my_imdct36(mad_fixed_t const X[18], mad_fixed_t y[36]){
  mad_fixed_t temp[36];
  int m;
  dctIV(X,temp);
for(m=0;m<18;m++)
    temp[35-m]=-temp[m];
  for(m=0;m<=8;m++)
    y[m+3*9]=-temp[m];
  for(m=9;m<36;m++)
    y[m-9]=temp[m];
}
The imdct of size 12 has not been implemented. I will now work on it and on 
some optimisations.
Hope this is useful.
Rafael.
...PGP SIGNATURE...
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.3 (GNU/Linux)

iD8DBQE/b7nBLlrfGJ8JUHwRAvkcAJoCSK5uIWWmbZNru3U7ATfllFk71ACdHtUP
mrFQRfHU8Wl+vdUZ5kwt0cw=
=k3e9
-----END PGP SIGNATURE-----