ECE521 Project Two
Branch Predictor
 

1. C++ Code



// proj98_3.cxx
#include <iostream.h>
#include <stdio.h>
#include <stdlib.h>

typedef struct branch_st {
    unsigned int history;
    unsigned int tag;
    unsigned int target;
} Branch;

#define EMPTY 0xffffffff
 

Branch *branch;
unsigned int *predictor;
int m,n,h,t;
int branch_size, predictor_size;
int taken_limit;

//Data of Statistics

int accesses = 0;
int interferences = 0;
int mis_branch = 0;
int mis_target = 0;
float asc,cpi;
FILE *ifp;

int branch_access(unsigned int, char, unsigned int);

main(int argc, char *argv[])
{
    int i, j;
    char taken;
    unsigned int branch_addr, target_addr;
 

 // Decode command line arguments

    if (argc != 6) {
        cout<< "usage:" << argv[0] << " trace_file_name m h n t\n" ;
        exit(-1);
    }

    m = atoi(argv[2]);
    h = atoi(argv[3]);
    n = atoi(argv[4]);
    t = atoi(argv[5]);

    ifp=fopen(argv[1],"r");

    if ((m>32)||(m<1)||(n<1)||(h<1)||(n>6) ) {
        cout<<"invalid parameters of m h n\n";
        cout<<"The range of m h n should be:\n";
        cout<<"1 < m < 33\t 1 < n < 7\t 1 < h\n";
        exit(-3);
    }

 // Make arrays

    branch_size = 1<<m;
    predictor_size = 1<<h;
    taken_limit = 1 << (n-1);

    branch =   new Branch[branch_size];
    predictor = new (unsigned int)[predictor_size];
 

    for (i = 0; i < branch_size; i++) {
        branch[i].history = 0;
        branch[i].tag = EMPTY;
        branch[i].target = EMPTY;
    }

    for (i = 0; i < predictor_size; i++) {
        predictor[i] = taken_limit - 1;
    }
 

    //Begin reading the file and predicting branch directions

    while ((fscanf(ifp,"%x %c %x",&branch_addr, &taken, &target_addr))==3) {
      fflush(ifp);
      accesses++;
       //       cout<<accesses<<'\t'<<target_addr<<endl;
       branch_access(branch_addr,taken,target_addr);
    }
    fclose(ifp);

    // Print out the final Statistic results

    cout<<endl;
    cout.flags(ios::dec);
    cout << "m  n  h\t";
    cout << "accesses    interf.  mis_branch  mis_target     asc       cpi\n";
    cout << m<<" "<<n<<" "<<h<<'\t';
    cout.width(8);
    cout << accesses<<"  ";
    cout.width(8);
    cout << interferences<<"  ";
    cout.width(8);
    cout << mis_branch<<"  ";
    cout.width(8);
    cout << mis_target<<"       ";

    asc = 2.0 * (mis_branch + mis_target)/(5.0 * accesses);
    cpi = asc +1;

    cout.width(8);
    cout << asc<<'\t';
    cout.width(8);
    cout << cpi << '\n';
    cout << "accuracy   ";
    cout.width(8);
    cout << (1.0*mis_branch)/(1.0*accesses)<<'\t'<<"total bits    ";
    cout << (1<<m)*(h+t)+(1<<h)*n<<'\n';

}
 

int branch_access(unsigned int addr1, char taken, unsigned int addr2)
{
  int branch_index,predictor_index;
  int branch_tag;
  char pred;

  // Get index and tag of the branch
  branch_index = (addr1>>2) & ((1<<m)-1);
  branch_tag = addr1>>(m+2);

/*  cout.flags(ios::hex);
  cout << "index= " << branch_index << '\t' << "tag= " << branch_tag;
  cout << "\ttarget= "<<addr2 <<"\ttaken="<<taken<<endl;*/

  // Check interference
  if (branch[branch_index].tag != branch_tag) {
     if (branch[branch_index].tag != EMPTY)
         interferences++;
      branch[branch_index].tag = branch_tag;
     }

  // Check mispredictions of branch target addresses
  if ((branch[branch_index].target != addr2) && (taken == 't')) {
     mis_target++;
     branch[branch_index].target = addr2;
     }

  // Predict the branch dirction
  predictor_index = branch[branch_index].history & ((1<<h)-1);
  if (predictor[predictor_index] < taken_limit)
     pred = 'n';
  else
     pred = 't';

  // Check mispredictions of branch direction
  if (taken != pred)
     mis_branch++;

  // Modify predictor
  if ((taken == 't') && (predictor[predictor_index] != ((1<<n)-1)))
     predictor[predictor_index]++;

  if ((taken == 'n') && (predictor[predictor_index] != 0))
     predictor[predictor_index]--;

  // Modify branch history
  if (taken == 't')
     branch[branch_index].history = ( branch[branch_index].history << 1 ) | 0x1;
  if (taken == 'n')
     branch[branch_index].history = branch[branch_index].history << 1 ;

/*    cout.flags(ios::dec);
    cout << "accesses\tinterference\tmis_branch\tmis_target\n";
    cout.width(8);
    cout << accesses<<'\t';
    cout.width(8);
    cout << interferences<<'\t';
    cout.width(8);
    cout << mis_branch<<'\t';
    cout.width(8);
    cout << mis_target<<'\n';
                               */

  return 1;

}
 

                       2: branch predictor for each banchmark
 
 

099.go.branch

Now simulating :  m=7   h=10    n=2
Total accesses=100001
Interferences=1482
Mis_branch=5597
Mis_target=248
Average stall cycle=0.0233798
CPI= 1.02338
Accuracy=0.0559694
Total storage=5888bits

124.m88ksim.branch

Now simulating :  m=6   h=1     n=1
Total accesses=100001
Interferences=0
Mis_branch=5
Mis_target=6
Average stall cycle=4.39996e-05
CPI= 1.00004
Accuracy=4.99995e-05
Total storage=1346bits

126.gcc.branch

Now simulating :  m=10  h=8     n=2
Total accesses=97864
Interferences=1824
Mis_branch=2219
Mis_target=4948
Average stall cycle=0.0292937
CPI= 1.02929
Accuracy=0.0226743
Total storage=29184bits

129.compress.branch

Now simulating :  m=5   h=9     n=2
Total accesses=100001
Interferences=3049
Mis_branch=2107
Mis_target=1483
Average stall cycle=0.0143599
CPI= 1.01436
Accuracy=0.0210698
Total storage=1696bits

130.li.branch

Now simulating :  m=11  h=8     n=2
Total accesses=100001
Interferences=1113
Mis_branch=5242
Mis_target=7130
Average stall cycle=0.0494875
CPI= 1.04949
Accuracy=0.0524195
Total storage=49664bits

134.perl.branch

Now simulating :  m=11  h=8     n=3
Total accesses=100001
Interferences=4937
Mis_branch=4305
Mis_target=5252
Average stall cycle=0.0382276
CPI= 1.03823
Accuracy=0.0430496
Total storage=58112bits

147.vortex.branch

Now simulating :  m=10  h=6     n=2
Total accesses=100001
Interferences=14756
Mis_branch=5089
Mis_target=9414
Average stall cycle=0.0580114
CPI= 1.05801
Accuracy=0.0508895
Total storage=26752bits

                       3: Sample result and final result
 

Sample result for the last 5 for 126.gcc.branch

Now simulating :  m=10  h=5     n=2
Total accesses=95
Interferences=0
Mis_branch=32
Mis_target=28
Average stall cycle=0.252632
CPI= 1.25263
Accuracy=0.336842
Total storage=25664bits

Now simulating :  m=10  h=5     n=2
Total accesses=96
Interferences=0
Mis_branch=32
Mis_target=28
Average stall cycle=0.25
CPI=    1.25
Accuracy=0.333333
Total storage=25664bits

Now simulating :  m=10  h=5     n=2
Total accesses=97
Interferences=0
Mis_branch=33
Mis_target=28
Average stall cycle=0.251546
CPI= 1.25155
Accuracy=0.340206
Total storage=25664bits

Now simulating :  m=10  h=5     n=2
Total accesses=98
Interferences=0
Mis_branch=33
Mis_target=28
Average stall cycle= 0.24898
CPI= 1.24898
Accuracy=0.336735
Total storage=25664bits

Now simulating :  m=10  h=5     n=2
Total accesses=99
Interferences=0
Mis_branch=34
Mis_target=29
Average stall cycle=0.254545
CPI= 1.25455
Accuracy=0.343434
Total storage=25664bits

Final result:

Now simulating :  m=10  h=5     n=2
Total accesses=97864
Interferences=1824
Mis_branch=2629
Mis_target=4948
Average stall cycle=0.0309695
CPI= 1.03097
Accuracy=0.0268638
Total storage=25664bits
 

Now simulating :  m=6   h=10    n=4
Total accesses=95
Interferences=18
Mis_branch=48
Mis_target=28
Average stall cycle=    0.32
CPI=    1.32
Accuracy=0.505263
Total storage=6016bits

Now simulating :  m=6   h=10    n=4
Total accesses=96
Interferences=18
Mis_branch=48
Mis_target=28
Average stall cycle=0.316667
CPI= 1.31667
Accuracy=0.5
Total storage=6016bits

Now simulating :  m=6   h=10    n=4
Total accesses=97
Interferences=18
Mis_branch=49
Mis_target=28
Average stall cycle=0.317526
CPI= 1.31753
Accuracy=0.505155
Total storage=6016bits

Now simulating :  m=6   h=10    n=4
Total accesses=98
Interferences=19
Mis_branch=50
Mis_target=28
Average stall cycle=0.318367
CPI= 1.31837
Accuracy=0.510204
Total storage=6016bits

Now simulating :  m=6   h=10    n=4
Total accesses=99
Interferences=19
Mis_branch=51
Mis_target=29
Average stall cycle=0.323232
CPI= 1.32323
Accuracy=0.515152
Total storage=6016bits

Final result:

Now simulating :  m=6   h=10    n=4
Total accesses=97864
Interferences=29541
Mis_branch=4767
Mis_target=11230
Average stall cycle=0.0653846
CPI= 1.06538
Accuracy=0.0487105
Total storage=6016bits

Now simulating :  m=6   h=10    n=4
Total accesses=70852
Interferences=23071
Mis_branch=4331
Mis_target=8456
Average stall cycle=0.0721899
CPI= 1.07219
Accuracy=0.0611274
Total storage=6016bits
0xbb314 n 0xbb318
0xbb324 n 0xbb328
0xbb32c n 0xbb330
0xbb330 n 0xbb334
0xbb344 n 0xbb348
0xbb34c n 0xbb350
0xbb354 n 0xbb358
0xbb35c n 0xbb360
0xbb360 t 0xc0248
0xc0288 n 0xc028c
0xc029c n 0xc02a0
0xc02b0 t 0xbb364
0xbb368 t 0xc0dcc
0xc0dcc t 0xc0dd4
0xc0dd4 t 0xbb36c
0xbb36c t 0xc3b74
0xc3b74 t 0xc3b64
0xc3b68 t 0xb5d8
0xb5e8 t 0xc1f38
0xc1f60 n 0xc1f64
0xc1f84 n 0xc1f88
0xc1fc8 n 0xc1fcc
0xc1ff4 t 0xc1fe8
0xc1ff4 t 0xc1fe8
0xc1ff4 n 0xc1ff8
0xc2000 t 0xc2018
0xc2028 t 0xc2004
0xc2014 t 0xb5ec
0xb5f8 t 0xc1f38
0xc1f60 t 0xc202c
0xc2034 t 0x87a40
0x87a68 n 0x87a6c
0x87a7c n 0x87a80
0x87a88 t 0xbce34
0xbce34 n 0xbce38
0xbce3c n 0xbce40
0xbce44 t 0x87a8c
0x87aa8 n 0x87aac
0x87ac0 n 0x87ac4
0x87acc n 0x87ad0
0x87ad8 n 0x87adc
0x87af8 t 0xc2038
0xc2038 t 0xc1f64
0xc1f84 n 0xc1f88
0xc1fc8 n 0xc1fcc
0xc1ff4 t 0xc1fe8
0xc1ff4 t 0xc1fe8
0xc1ff4 n 0xc1ff8
0xc2000 t 0xc2018
0xc2028 t 0xc2004
0xc2014 t 0xb5fc
0xb608 t 0xc1f38
0xc1f60 n 0xc1f64
0xc1f84 n 0xc1f88
0xc1fc8 n 0xc1fcc
0xc1ff4 t 0xc1fe8
0xc1ff4 t 0xc1fe8
0xc1ff4 n 0xc1ff8
0xc2000 t 0xc2018
0xc2028 t 0xc2004
0xc2014 t 0xb60c
0xb614 t 0xc3b1c
0xc3b28 n 0xc3b2c
0xc3b30 t 0xc0e30
0xc0e48 n 0xc0e4c
0xc0e4c n 0xc0e50
0xc0e50 n 0xc0e54
0xc0e54 n 0xc0e58
0xc0e58 n 0xc0e5c
0xc0e5c t 0xc103c
0xc1040 t 0xc0e74
0xc0e84 n 0xc0e88
0xc0e9c n 0xc0ea0
0xc0ed0 n 0xc0ed4
0xc0ef4 n 0xc0ef8
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
 
 
 
 
 

                       4 Sample result
 

Now simulating :  m=10  h=5     n=2
Total accesses=97860
Interferences=1821
Mis_branch=2628
Mis_target=4948
Average stall cycle=0.0309667
CPI= 1.03097
Accuracy=0.0268547
Total storage=25664bits

Now simulating :  m=10  h=5     n=2
Total accesses=97861
Interferences=1822
Mis_branch=2629
Mis_target=4948
Average stall cycle=0.0309705
CPI= 1.03097
Accuracy=0.0268646
Total storage=25664bits

Now simulating :  m=10  h=5     n=2
Total accesses=97862
Interferences=1823
Mis_branch=2629
Mis_target=4948
Average stall cycle=0.0309701
CPI= 1.03097
Accuracy=0.0268644
Total storage=25664bits

Now simulating :  m=10  h=5     n=2
Total accesses=97863
Interferences=1824
Mis_branch=2629
Mis_target=4948
Average stall cycle=0.0309698
CPI= 1.03097
Accuracy=0.0268641
Total storage=25664bits

Now simulating :  m=10  h=5     n=2
Total accesses=97864
Interferences=1824
Mis_branch=2629
Mis_target=4948
Average stall cycle=0.0309695
CPI= 1.03097
Accuracy=0.0268638
Total storage=25664bits

Now simulating :  m=6   h=10    n=4
Total accesses=97860
Interferences=29537
Mis_branch=4766
Mis_target=11230
Average stall cycle=0.0653832
CPI= 1.06538
Accuracy=0.0487022
Total storage=6016bits

Now simulating :  m=6   h=10    n=4
Total accesses=97861
Interferences=29538
Mis_branch=4766
Mis_target=11230
Average stall cycle=0.0653825
CPI= 1.06538
Accuracy=0.0487017
Total storage=6016bits

Now simulating :  m=6   h=10    n=4
Total accesses=97862
Interferences=29539
Mis_branch=4766
Mis_target=11230
Average stall cycle=0.0653819
CPI= 1.06538
Accuracy=0.0487012
Total storage=6016bits

Now simulating :  m=6   h=10    n=4
Total accesses=97863
Interferences=29540
Mis_branch=4767
Mis_target=11230
Average stall cycle=0.0653853
CPI= 1.06539
Accuracy=0.048711
Total storage=6016bits

Now simulating :  m=6   h=10    n=4
Total accesses=97864
Interferences=29541
Mis_branch=4767
Mis_target=11230
Average stall cycle=0.0653846
CPI= 1.06538
Accuracy=0.0487105
Total storage=6016bits