Created
September 30, 2022 08:06
-
-
Save akay-10/1c0c26f98447180e690fbd0be8f545b8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// task3.cpp : Defines the entry point for the console application. | |
// | |
#include "stdafx.h" | |
#include <stdio.h> | |
#include <cstring> | |
#include <limits> | |
#include <cfloat> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <ctype.h> | |
#include <conio.h> | |
// A frame under evaluation (20 millisecs * 16000 samples/sec = 320 samples) | |
#define N 320 | |
// Past P samples | |
#define P 12 | |
// +-2 frames at steady frame, hence total 5 frames in steady part | |
#define STEADY_FRAMES 5 | |
// Number of utterance | |
#define UTT 10 | |
// Number of vowels | |
#define NVOW 5 | |
#define PI 3.142857142857 | |
// Used in normalizing values in range [-5000, 5000] | |
const long double NORM = 5000.0; | |
// Maximum sample value | |
long double MAX; | |
// By def. = NORM / MAX | |
long double NORMALISATION_FACTOR; | |
// Stores sigma Xi | |
long double dcShift; | |
// For tracking files | |
int files = 0; | |
// Some constants used | |
const char VOWELS[NVOW] = {'a', 'e', 'i', 'o', 'u'}; | |
const long double tokhuraWeights[]={1.0, 3.0, 7.0, 13.0, 19.0, 22.0, 25.0, 33.0, 42.0, 50.0, 56.0, 61.0}; | |
// Size of sample array | |
long long sz; | |
// Size of Energy array with sz/320 frames | |
long long EnergySz; | |
// Starting and ending position of the sample in steady part | |
long long steadyStart, steadyEnd; | |
// Accuracy | |
int totalCorrect = 0, individualCorrect= 0; | |
// Array for samples | |
long double x[100000]; | |
// Array for energy of frames | |
long double energy[100000]; | |
// Array for samples but for only steady part | |
long double steadyFrames[STEADY_FRAMES][N]; | |
// Array for storing tokhura distance | |
long double tokhuraDist[5]; | |
// Array for storing Ris, ais, Cis, avg Cis, reference Cis and combined Cis respectively | |
long double R[STEADY_FRAMES][P+1]; | |
long double A[STEADY_FRAMES][P+1]; | |
long double C[STEADY_FRAMES][P+1]; | |
long double avgCi[STEADY_FRAMES*NVOW][P+1]; | |
long double Ci[STEADY_FRAMES][P+1]; | |
long double restoreCi[STEADY_FRAMES][P+1]; | |
long double combinedCi[NVOW*UTT][STEADY_FRAMES][P+1]; | |
// Storing the avg ci values to file | |
void calculateAvgCis(){ | |
FILE *filePtr; | |
char fileName[] = "gen/ref_ci_$.txt"; | |
int index = 0; | |
for(int ithVowel=0; ithVowel<5; ithVowel++){ | |
printf("ref_ci_%c.txt generated \n", VOWELS[ithVowel]); | |
fileName[11] = VOWELS[ithVowel]; | |
filePtr = fopen(fileName, "w"); | |
for(int ithFrame=0; ithFrame<STEADY_FRAMES; ithFrame++){ | |
for(int p=0; p<P; p++){ | |
long double sum = 0; | |
for(int file=ithVowel*10; file<(ithVowel+1)*10; file++){ | |
sum += combinedCi[file][ithFrame][p+1]; | |
} | |
sum /= 10.0; | |
avgCi[index][p+1] = sum; | |
fprintf(filePtr, "%lf ", sum); | |
} | |
index++; | |
fprintf(filePtr, "\n"); | |
} | |
fclose(filePtr); | |
} | |
} | |
// Function for calulating the cepstral coeff Ci's | |
void calculateCis(){ | |
long double sum=0; | |
for(int ithFrame = 0; ithFrame<STEADY_FRAMES; ithFrame++){ | |
C[ithFrame][0]=log(R[ithFrame][0]*R[ithFrame][0]); | |
for(int m=1;m<=P;m++){ | |
sum=0; | |
for(int k=1;k<m;k++){ | |
sum += (k*C[ithFrame][k]*A[ithFrame][m-k])/(m*1.0); | |
} | |
C[ithFrame][m] = A[ithFrame][m]+sum; | |
} | |
} | |
// Applying raised sine window | |
sum=0; | |
for(int ithFrame = 0; ithFrame<STEADY_FRAMES; ithFrame++){ | |
for(int m=1;m<=P;m++){ | |
sum = (P/2)*sin((PI*m)/P); | |
C[ithFrame][m] *= sum; | |
} | |
} | |
for(int ithFrame=0; ithFrame<STEADY_FRAMES; ithFrame++){ | |
for(int i=0;i<P;i++){ | |
combinedCi[files][ithFrame][i+1]=C[ithFrame][i+1]; | |
} | |
} | |
files++; | |
} | |
// Executing Durbins Algorithm and finding the value of ais | |
void execDurbinAlgo(){ | |
long double alpha[13][13]; | |
long double E[13]; | |
long double K[13]; | |
long double sum=0; | |
for(int ithFrame = 0; ithFrame<STEADY_FRAMES; ithFrame++){ | |
E[0] = R[ithFrame][0]; | |
for(int i=1;i<=P;i++){ | |
sum=0; | |
for(int j=1;j<=i-1;j++){ | |
sum += alpha[i-1][j]*R[ithFrame][i-j]; | |
} | |
K[i] = R[ithFrame][i]-sum; | |
K[i] /= E[i-1]; | |
alpha[i][i]=K[i]; | |
for(int j=1;j<=i-1;j++){ | |
alpha[i][j] = alpha[i-1][j] - K[i]*alpha[i-1][i-j]; | |
} | |
E[i]=(1-(K[i]*K[i]))*E[i-1]; | |
} | |
for(int i=1;i<=P;i++){ | |
A[ithFrame][i] = alpha[P][i]; | |
} | |
} | |
//finding cepstral constants | |
calculateCis(); | |
} | |
//function to get dcshift value and set in global variable | |
void handleDCShift(char *fileName){ | |
long int sampleCnt = 0; | |
FILE *filePtr; | |
filePtr = fopen(fileName, "r"); | |
dcShift = 0; | |
long double amp; | |
while(!feof(filePtr)){ | |
fscanf(filePtr,"%Lf\n", &); | |
dcShift += amp; | |
sampleCnt++; | |
} | |
dcShift /= sampleCnt; | |
fclose(filePtr); | |
} | |
//marking the stable frames using STE | |
void storeSteadyFrames(){ | |
long long ithSample = 0, maxAt = 0, n = 0; | |
long double E = 0, maxE = 0; | |
EnergySz = 0; | |
for(;ithSample < sz;ithSample++, n++){ | |
if(n == N){ | |
E /= N; | |
if(maxE < E){ | |
maxE = E; | |
maxAt = EnergySz; | |
} | |
energy[EnergySz++] = E; | |
E = 0, n = 0; | |
} | |
E += x[ithSample] * x[ithSample]; | |
} | |
steadyStart = (maxAt-2)*N; | |
if(maxAt <= 2) steadyStart = 0; | |
steadyEnd = (maxAt+3)*N; | |
if(maxAt >= EnergySz-3) steadyEnd = EnergySz*N; | |
int ithFrame = 0; | |
for(int i = steadyStart, j=0; i<steadyEnd; i++){ | |
steadyFrames[ithFrame][j++] = x[i]; | |
if(j == N) ithFrame++, j=0; | |
} | |
} | |
void init(char *fileName){ | |
long long totalSample = 0; | |
FILE *filePtr; | |
filePtr = fopen(fileName, "r"); | |
MAX = 0; | |
long long amp; | |
while(!feof(filePtr)){ | |
fscanf(filePtr,"%lld\n", &); | |
totalSample++; | |
if(MAX < abs(amp)) | |
MAX = abs(amp); | |
} | |
NORMALISATION_FACTOR = (long double)NORM/MAX; | |
handleDCShift(fileName); | |
filePtr = fopen(fileName, "r"); | |
sz = 0; | |
// Reading the values from input files, normalizing it and storing in x[] | |
long double amp2; | |
while(!feof(filePtr)){ | |
fscanf(filePtr,"%Lf\n", &2); | |
long double newXi = floor((amp2-dcShift)*NORMALISATION_FACTOR); | |
x[sz++] = newXi; | |
} | |
storeSteadyFrames(); | |
// Applying hamming window | |
for(int i=0; i<STEADY_FRAMES; ++i){ | |
for(int j=0; j<N; ++j){ | |
steadyFrames[i][j] *= (0.54-0.46*cos((2*PI*steadyFrames[i][j])/(N-1))); | |
} | |
} | |
// Calculating Ris | |
for(int ithFrame = 0; ithFrame < 5; ithFrame++){ | |
for(int m = 0; m <= P; m++){ | |
R[ithFrame][m] = 0; | |
for(int k=0; k<N-m; k++){ | |
R[ithFrame][m] += steadyFrames[ithFrame][k]*steadyFrames[ithFrame][k+m]; | |
} | |
} | |
} | |
//calling execDurbinAlgo to find ai values | |
execDurbinAlgo(); | |
fclose(filePtr); | |
} | |
//driver function to execute training | |
void train(){ | |
printf("Training of the data ...\n\n"); | |
for(int i=0; i<5; i++){ | |
for(int j = 0; j<10; j++){ | |
char fileName[] = "$/190101002_$_$.txt"; | |
fileName[0] = VOWELS[i]; | |
fileName[12] = VOWELS[i]; | |
fileName[14] = j + '0'; | |
init(fileName); | |
} | |
} | |
calculateAvgCis(); | |
} | |
//fucntion which calculates the distance using dump Ci values of training set | |
double tokhuraDistance(FILE *filePtr){ | |
int ithFrame = 0; | |
while(!feof(filePtr) && ithFrame<STEADY_FRAMES){ | |
for(int i=1;i<=12;i++){ | |
if(i==12) | |
fscanf(filePtr, "%lf\n", &restoreCi[ithFrame][i]); | |
else | |
fscanf(filePtr, "%lf ", &restoreCi[ithFrame][i]); | |
} | |
ithFrame++; | |
} | |
double finalDist = 0; | |
for(int i=0; i<STEADY_FRAMES; i++){ | |
double dist = 0; | |
for(int p=1; p<=P; p++){ | |
dist += tokhuraWeights[p-1]*((C[i][p]- restoreCi[i][p]))*((C[i][p]- restoreCi[i][p])); | |
} | |
finalDist += dist/(P*1.0); | |
} | |
return finalDist/(STEADY_FRAMES*1.0); | |
} | |
//function to calculate the distance and making prediction | |
char calculateTokhura(){ | |
char fileName[] = "gen/ref_ci_$.txt"; | |
FILE *filePtr; | |
long double minDist = DBL_MAX; | |
char predictedVowel; | |
for(int i=0; i<5; i++){ | |
fileName[11] = VOWELS[i]; | |
filePtr = fopen(fileName, "r"); | |
long double distance = tokhuraDistance(filePtr); | |
tokhuraDist[i] = distance; | |
if(minDist > distance){ | |
minDist = distance; | |
predictedVowel = VOWELS[i]; | |
} | |
} | |
return predictedVowel; | |
} | |
void test(){ | |
files = 0; | |
printf("\n\nTesting phase started\n"); | |
for(int ithVowel = 0; ithVowel<5; ithVowel++){ | |
individualCorrect = 0; | |
for(int file=0; file<10; file++){ | |
char fileName[] = "$/190101002_$_1$.txt"; | |
fileName[0] = VOWELS[ithVowel]; | |
fileName[12] = VOWELS[ithVowel]; | |
fileName[15] = file + '0'; | |
init(fileName); | |
char pred = calculateTokhura(); | |
printf("\nVowel %c is predicted as %c\n", fileName[0], pred); | |
// printf("Distance from (a, e, i, o, u) is \n(%lf, %lf, %lf, %lf, %lf)\n\n", tokhuraDist[0], tokhuraDist[1], tokhuraDist[2], tokhuraDist[3], tokhuraDist[4]); | |
totalCorrect += (pred == VOWELS[ithVowel]); | |
individualCorrect += (pred == VOWELS[ithVowel]); | |
} | |
printf("======================= Accuracy of %c is : %.2lf %% \n\n", VOWELS[ithVowel], (individualCorrect/10.0)*100); | |
} | |
printf("======================= Overall Accuracy is : %.2lf %% \n", (totalCorrect/50.0)*100); | |
} | |
int _tmain(int argc, _TCHAR* argv[]){ | |
// Training using 50 recorded samples and then generate reference files saved in the folder ./gen/ | |
train(); | |
// Testing the rest of the samples for prediction | |
test(); | |
getch(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment