ECE521 Project Two
Branch Predictor
1. C++ Code
// proj98_3.cxx
#include <iostream.h>
#include <stdio.h>
#include <stdlib.h>
typedef struct branch_st {
unsigned int history;
unsigned int tag;
unsigned int target;
} Branch;
#define EMPTY 0xffffffff
Branch *branch;
unsigned int *predictor;
int m,n,h,t;
int branch_size, predictor_size;
int taken_limit;
//Data of Statistics
int accesses = 0;
int interferences = 0;
int mis_branch = 0;
int mis_target = 0;
float asc,cpi;
FILE *ifp;
int branch_access(unsigned int, char, unsigned int);
main(int argc, char *argv[])
{
int i, j;
char taken;
unsigned int branch_addr, target_addr;
// Decode command line arguments
if (argc != 6) {
cout<< "usage:" <<
argv[0] << " trace_file_name m h n t\n" ;
exit(-1);
}
m = atoi(argv[2]);
h = atoi(argv[3]);
n = atoi(argv[4]);
t = atoi(argv[5]);
ifp=fopen(argv[1],"r");
if ((m>32)||(m<1)||(n<1)||(h<1)||(n>6) )
{
cout<<"invalid parameters
of m h n\n";
cout<<"The range of
m h n should be:\n";
cout<<"1 < m <
33\t 1 < n < 7\t 1 < h\n";
exit(-3);
}
// Make arrays
branch_size = 1<<m;
predictor_size = 1<<h;
taken_limit = 1 << (n-1);
branch = new Branch[branch_size];
predictor = new (unsigned int)[predictor_size];
for (i = 0; i < branch_size; i++) {
branch[i].history = 0;
branch[i].tag = EMPTY;
branch[i].target = EMPTY;
}
for (i = 0; i < predictor_size; i++) {
predictor[i] = taken_limit
- 1;
}
//Begin reading the file and predicting branch directions
while ((fscanf(ifp,"%x %c %x",&branch_addr,
&taken,
&target_addr))==3) {
fflush(ifp);
accesses++;
//
cout<<accesses<<'\t'<<target_addr<<endl;
branch_access(branch_addr,taken,target_addr);
}
fclose(ifp);
// Print out the final Statistic results
cout<<endl;
cout.flags(ios::dec);
cout << "m n h\t";
cout << "accesses interf.
mis_branch mis_target asc
cpi\n";
cout << m<<" "<<n<<" "<<h<<'\t';
cout.width(8);
cout << accesses<<" ";
cout.width(8);
cout << interferences<<" ";
cout.width(8);
cout << mis_branch<<" ";
cout.width(8);
cout << mis_target<<"
";
asc = 2.0 * (mis_branch + mis_target)/(5.0 * accesses);
cpi = asc +1;
cout.width(8);
cout << asc<<'\t';
cout.width(8);
cout << cpi << '\n';
cout << "accuracy ";
cout.width(8);
cout << (1.0*mis_branch)/(1.0*accesses)<<'\t'<<"total
bits ";
cout << (1<<m)*(h+t)+(1<<h)*n<<'\n';
}
int branch_access(unsigned int addr1, char taken, unsigned int addr2)
{
int branch_index,predictor_index;
int branch_tag;
char pred;
// Get index and tag of the branch
branch_index = (addr1>>2) & ((1<<m)-1);
branch_tag = addr1>>(m+2);
/* cout.flags(ios::hex);
cout << "index= " << branch_index << '\t'
<< "tag= " << branch_tag;
cout << "\ttarget= "<<addr2 <<"\ttaken="<<taken<<endl;*/
// Check interference
if (branch[branch_index].tag != branch_tag) {
if (branch[branch_index].tag != EMPTY)
interferences++;
branch[branch_index].tag = branch_tag;
}
// Check mispredictions of branch target addresses
if ((branch[branch_index].target != addr2) && (taken
== 't')) {
mis_target++;
branch[branch_index].target = addr2;
}
// Predict the branch dirction
predictor_index = branch[branch_index].history & ((1<<h)-1);
if (predictor[predictor_index] < taken_limit)
pred = 'n';
else
pred = 't';
// Check mispredictions of branch direction
if (taken != pred)
mis_branch++;
// Modify predictor
if ((taken == 't') && (predictor[predictor_index] !=
((1<<n)-1)))
predictor[predictor_index]++;
if ((taken == 'n') && (predictor[predictor_index] !=
0))
predictor[predictor_index]--;
// Modify branch history
if (taken == 't')
branch[branch_index].history = ( branch[branch_index].history
<< 1 ) | 0x1;
if (taken == 'n')
branch[branch_index].history = branch[branch_index].history
<< 1 ;
/* cout.flags(ios::dec);
cout << "accesses\tinterference\tmis_branch\tmis_target\n";
cout.width(8);
cout << accesses<<'\t';
cout.width(8);
cout << interferences<<'\t';
cout.width(8);
cout << mis_branch<<'\t';
cout.width(8);
cout << mis_target<<'\n';
*/
return 1;
}
2: branch predictor for each banchmark
099.go.branch
Now simulating : m=7 h=10 n=2
Total accesses=100001
Interferences=1482
Mis_branch=5597
Mis_target=248
Average stall cycle=0.0233798
CPI= 1.02338
Accuracy=0.0559694
Total storage=5888bits
124.m88ksim.branch
Now simulating : m=6 h=1 n=1
Total accesses=100001
Interferences=0
Mis_branch=5
Mis_target=6
Average stall cycle=4.39996e-05
CPI= 1.00004
Accuracy=4.99995e-05
Total storage=1346bits
126.gcc.branch
Now simulating : m=10 h=8 n=2
Total accesses=97864
Interferences=1824
Mis_branch=2219
Mis_target=4948
Average stall cycle=0.0292937
CPI= 1.02929
Accuracy=0.0226743
Total storage=29184bits
129.compress.branch
Now simulating : m=5 h=9 n=2
Total accesses=100001
Interferences=3049
Mis_branch=2107
Mis_target=1483
Average stall cycle=0.0143599
CPI= 1.01436
Accuracy=0.0210698
Total storage=1696bits
130.li.branch
Now simulating : m=11 h=8 n=2
Total accesses=100001
Interferences=1113
Mis_branch=5242
Mis_target=7130
Average stall cycle=0.0494875
CPI= 1.04949
Accuracy=0.0524195
Total storage=49664bits
134.perl.branch
Now simulating : m=11 h=8 n=3
Total accesses=100001
Interferences=4937
Mis_branch=4305
Mis_target=5252
Average stall cycle=0.0382276
CPI= 1.03823
Accuracy=0.0430496
Total storage=58112bits
147.vortex.branch
Now simulating : m=10 h=6 n=2
Total accesses=100001
Interferences=14756
Mis_branch=5089
Mis_target=9414
Average stall cycle=0.0580114
CPI= 1.05801
Accuracy=0.0508895
Total storage=26752bits
3: Sample result and final result
Sample result for the last 5 for 126.gcc.branch
Now simulating : m=10 h=5 n=2
Total accesses=95
Interferences=0
Mis_branch=32
Mis_target=28
Average stall cycle=0.252632
CPI= 1.25263
Accuracy=0.336842
Total storage=25664bits
Now simulating : m=10 h=5 n=2
Total accesses=96
Interferences=0
Mis_branch=32
Mis_target=28
Average stall cycle=0.25
CPI= 1.25
Accuracy=0.333333
Total storage=25664bits
Now simulating : m=10 h=5 n=2
Total accesses=97
Interferences=0
Mis_branch=33
Mis_target=28
Average stall cycle=0.251546
CPI= 1.25155
Accuracy=0.340206
Total storage=25664bits
Now simulating : m=10 h=5 n=2
Total accesses=98
Interferences=0
Mis_branch=33
Mis_target=28
Average stall cycle= 0.24898
CPI= 1.24898
Accuracy=0.336735
Total storage=25664bits
Now simulating : m=10 h=5 n=2
Total accesses=99
Interferences=0
Mis_branch=34
Mis_target=29
Average stall cycle=0.254545
CPI= 1.25455
Accuracy=0.343434
Total storage=25664bits
Final result:
Now simulating : m=10 h=5 n=2
Total accesses=97864
Interferences=1824
Mis_branch=2629
Mis_target=4948
Average stall cycle=0.0309695
CPI= 1.03097
Accuracy=0.0268638
Total storage=25664bits
Now simulating : m=6 h=10 n=4
Total accesses=95
Interferences=18
Mis_branch=48
Mis_target=28
Average stall cycle= 0.32
CPI= 1.32
Accuracy=0.505263
Total storage=6016bits
Now simulating : m=6 h=10 n=4
Total accesses=96
Interferences=18
Mis_branch=48
Mis_target=28
Average stall cycle=0.316667
CPI= 1.31667
Accuracy=0.5
Total storage=6016bits
Now simulating : m=6 h=10 n=4
Total accesses=97
Interferences=18
Mis_branch=49
Mis_target=28
Average stall cycle=0.317526
CPI= 1.31753
Accuracy=0.505155
Total storage=6016bits
Now simulating : m=6 h=10 n=4
Total accesses=98
Interferences=19
Mis_branch=50
Mis_target=28
Average stall cycle=0.318367
CPI= 1.31837
Accuracy=0.510204
Total storage=6016bits
Now simulating : m=6 h=10 n=4
Total accesses=99
Interferences=19
Mis_branch=51
Mis_target=29
Average stall cycle=0.323232
CPI= 1.32323
Accuracy=0.515152
Total storage=6016bits
Final result:
Now simulating : m=6 h=10 n=4
Total accesses=97864
Interferences=29541
Mis_branch=4767
Mis_target=11230
Average stall cycle=0.0653846
CPI= 1.06538
Accuracy=0.0487105
Total storage=6016bits
Now simulating : m=6 h=10 n=4
Total accesses=70852
Interferences=23071
Mis_branch=4331
Mis_target=8456
Average stall cycle=0.0721899
CPI= 1.07219
Accuracy=0.0611274
Total storage=6016bits
0xbb314 n 0xbb318
0xbb324 n 0xbb328
0xbb32c n 0xbb330
0xbb330 n 0xbb334
0xbb344 n 0xbb348
0xbb34c n 0xbb350
0xbb354 n 0xbb358
0xbb35c n 0xbb360
0xbb360 t 0xc0248
0xc0288 n 0xc028c
0xc029c n 0xc02a0
0xc02b0 t 0xbb364
0xbb368 t 0xc0dcc
0xc0dcc t 0xc0dd4
0xc0dd4 t 0xbb36c
0xbb36c t 0xc3b74
0xc3b74 t 0xc3b64
0xc3b68 t 0xb5d8
0xb5e8 t 0xc1f38
0xc1f60 n 0xc1f64
0xc1f84 n 0xc1f88
0xc1fc8 n 0xc1fcc
0xc1ff4 t 0xc1fe8
0xc1ff4 t 0xc1fe8
0xc1ff4 n 0xc1ff8
0xc2000 t 0xc2018
0xc2028 t 0xc2004
0xc2014 t 0xb5ec
0xb5f8 t 0xc1f38
0xc1f60 t 0xc202c
0xc2034 t 0x87a40
0x87a68 n 0x87a6c
0x87a7c n 0x87a80
0x87a88 t 0xbce34
0xbce34 n 0xbce38
0xbce3c n 0xbce40
0xbce44 t 0x87a8c
0x87aa8 n 0x87aac
0x87ac0 n 0x87ac4
0x87acc n 0x87ad0
0x87ad8 n 0x87adc
0x87af8 t 0xc2038
0xc2038 t 0xc1f64
0xc1f84 n 0xc1f88
0xc1fc8 n 0xc1fcc
0xc1ff4 t 0xc1fe8
0xc1ff4 t 0xc1fe8
0xc1ff4 n 0xc1ff8
0xc2000 t 0xc2018
0xc2028 t 0xc2004
0xc2014 t 0xb5fc
0xb608 t 0xc1f38
0xc1f60 n 0xc1f64
0xc1f84 n 0xc1f88
0xc1fc8 n 0xc1fcc
0xc1ff4 t 0xc1fe8
0xc1ff4 t 0xc1fe8
0xc1ff4 n 0xc1ff8
0xc2000 t 0xc2018
0xc2028 t 0xc2004
0xc2014 t 0xb60c
0xb614 t 0xc3b1c
0xc3b28 n 0xc3b2c
0xc3b30 t 0xc0e30
0xc0e48 n 0xc0e4c
0xc0e4c n 0xc0e50
0xc0e50 n 0xc0e54
0xc0e54 n 0xc0e58
0xc0e58 n 0xc0e5c
0xc0e5c t 0xc103c
0xc1040 t 0xc0e74
0xc0e84 n 0xc0e88
0xc0e9c n 0xc0ea0
0xc0ed0 n 0xc0ed4
0xc0ef4 n 0xc0ef8
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
0xc0f0c t 0xc0f00
4 Sample result
Now simulating : m=10 h=5 n=2
Total accesses=97860
Interferences=1821
Mis_branch=2628
Mis_target=4948
Average stall cycle=0.0309667
CPI= 1.03097
Accuracy=0.0268547
Total storage=25664bits
Now simulating : m=10 h=5 n=2
Total accesses=97861
Interferences=1822
Mis_branch=2629
Mis_target=4948
Average stall cycle=0.0309705
CPI= 1.03097
Accuracy=0.0268646
Total storage=25664bits
Now simulating : m=10 h=5 n=2
Total accesses=97862
Interferences=1823
Mis_branch=2629
Mis_target=4948
Average stall cycle=0.0309701
CPI= 1.03097
Accuracy=0.0268644
Total storage=25664bits
Now simulating : m=10 h=5 n=2
Total accesses=97863
Interferences=1824
Mis_branch=2629
Mis_target=4948
Average stall cycle=0.0309698
CPI= 1.03097
Accuracy=0.0268641
Total storage=25664bits
Now simulating : m=10 h=5 n=2
Total accesses=97864
Interferences=1824
Mis_branch=2629
Mis_target=4948
Average stall cycle=0.0309695
CPI= 1.03097
Accuracy=0.0268638
Total storage=25664bits
Now simulating : m=6 h=10 n=4
Total accesses=97860
Interferences=29537
Mis_branch=4766
Mis_target=11230
Average stall cycle=0.0653832
CPI= 1.06538
Accuracy=0.0487022
Total storage=6016bits
Now simulating : m=6 h=10 n=4
Total accesses=97861
Interferences=29538
Mis_branch=4766
Mis_target=11230
Average stall cycle=0.0653825
CPI= 1.06538
Accuracy=0.0487017
Total storage=6016bits
Now simulating : m=6 h=10 n=4
Total accesses=97862
Interferences=29539
Mis_branch=4766
Mis_target=11230
Average stall cycle=0.0653819
CPI= 1.06538
Accuracy=0.0487012
Total storage=6016bits
Now simulating : m=6 h=10 n=4
Total accesses=97863
Interferences=29540
Mis_branch=4767
Mis_target=11230
Average stall cycle=0.0653853
CPI= 1.06539
Accuracy=0.048711
Total storage=6016bits
Now simulating : m=6 h=10 n=4
Total accesses=97864
Interferences=29541
Mis_branch=4767
Mis_target=11230
Average stall cycle=0.0653846
CPI= 1.06538
Accuracy=0.0487105
Total storage=6016bits