//line numbers refer to Code Fragment 12.4 (page 435) in "Kernel Methods for Pattern Analysis" by John Shawe-Taylor and Nello Cristianini //use symbols 1, 2, 3, etc. #include #include #include #include #include using namespace std; int main() { int string_length = 10; int number_of_states = 5; int number_of_symbols = 5; int p = number_of_states; int n = string_length; int a,b; double Prob = 0; string stringstring; ifstream hmmstream("globalhmm.txt"); //INPUT: Hidden Markov model, contains one line of parameters ifstream stringfile("strings.txt"); //INPUT: symbol strings, one per line ofstream fisherfile("fisher.txt"); //OUTPUT: Fisher scores, one data item per line int s[n+1]; //symbol string, uses s[1] to s[n] (s[0] is never used) double PM[p+1][p+1]; //state transition probability matrix double P[number_of_symbols+1][p+1]; //conditional probabilities of symbols given states double scoree[p+1][number_of_symbols+1]; //Fisher scores for the emission probabilities double scoret[p+1][p+1]; //Fisher scores for the transmission probabilities double forw[p+1][n+1]; double back[p+1][n+1]; //initialize to zero for (int i=0; i<=p; i++) for (int j=0; j<=p; j++) PM[i][j] = 0; for (int i=0; i<=number_of_symbols; i++) for (int j=0; j<=p; j++) P[i][j] = 0; PM[1][0] = 1.0; //because it is a left-to-right hidden Markov model for (int i=2; i<=p; i++) PM[i][0] = 0; for (int i=1; i<=p; i++) for (int j=1; j<=p; j++) hmmstream >> PM[i][j]; for (int i=1; i<=number_of_symbols; i++) for (int j=1; j<=p; j++) hmmstream >> P[i][j]; while (getline(stringfile, stringstring)) { istringstream stringstream (stringstring); //initialize to zero for (int i=0; i<=p; i++) for (int j=0; j<=n; j++) forw[i][j] = 0; for (int i=0; i<=p; i++) for (int j=0; j<=n; j++) back[i][j] = 0; for (int i=0; i<=p; i++) for (int j=0; j<=number_of_symbols; j++) scoree[i][j] = 0; for (int i=0; i<=p; i++) for (int j=0; j<=p; j++) scoret[i][j] = 0; for (int i=0; i<=n; i++) s[i] = 0; for (int i=1; i<=n; i++) stringstream >> s[i]; for (int i=0; i<=p; i++) for (int j=1; j<=number_of_symbols; j++) scoree[i][j] = 0; //line 2 for (int i=0; i<=p; i++) for (int j=1; j<=p; j++) scoret[i][j] = 0; //mvs for (int i=0; i<=p; i++) forw[i][0] = 0; //line 3 for (int i=0; i<=p; i++) //line 4 back[i][n] = 1; forw[0][0] = 1; //line 4 (corrected) Prob = 0; for (int i=1; i<=n; i++) { //line 5 for (a=1; a<=p; a++) { //line 7 forw[a][i] = 0; //line 8 for (b=0; b<=p; b++) //line 9 (corrected) forw[a][i] = forw[a][i] + PM[a][b]*forw[b][i-1]; //line 10 forw[a][i] = forw[a][i]*P[s[i]][a]; //line 12 } } for (a=1; a<=p; a++) //line 15 Prob = Prob + forw[a][n]; for (int i=n-1;i>=1;i--) { //line 18 for (a=1; a<=p; a++) { //line 19 back[a][i] = 0; //line 20 for (b=1; b<=p; b++) back[a][i] = back[a][i] + PM[b][a]*P[s[i+1]][b]*back[b][i+1]; //line 22 } } //Fisher scores for the emission probabilities for (int i=n-1;i>=1;i--) { //line 18 for (a=1; a<=p; a++) { //line 19 scoree[a][s[i]] = scoree[a][s[i]] + back[a][i]*forw[a][i] / (P[s[i]][a]*Prob); //line 24 for (int sigma = 1; sigma<=number_of_symbols; sigma++) scoree[a][sigma] = scoree[a][sigma] - back[a][i]*forw[a][i]/Prob; //line 26 (corrected) } } //Fisher scores for the transmission probabilities for (int i=n-1;i>=1;i--) for (b=1; b<=p; b++) for (a=1; a<=p; a++) { scoret[b][a] = scoret[b][a] + (back[a][i]*forw[b][i-1]*P[s[i]][a]/Prob - back[b][i]*forw[b][i]/Prob); } //transform Fisher scores to the interval (0,1) using the logistic function scoree[i][j] = 1/(1 + exp(-scoree[i][j])); for (int i=1; i<=p; i++) for (int j=1; j<=p; j++) fisherfile << scoret[i][j] << " "; for (int j=1; j<=number_of_symbols; j++) for (int i=1; i<=p; i++) fisherfile << scoree[i][j] << " "; fisherfile << endl; } hmmstream.close(); fisherfile.close(); system("PAUSE"); }