@@ -142,11 +142,13 @@ int main(int argc, char *argv[]) {
142142 vector<int > evts;
143143 evts.push_back (PERF_COUNT_HW_CPU_CYCLES);
144144 evts.push_back (PERF_COUNT_HW_INSTRUCTIONS);
145+ evts.push_back (PERF_COUNT_HW_BRANCH_MISSES);
145146 LinuxEvents<PERF_TYPE_HARDWARE> unified (evts);
146147 vector<u64 > results;
147148 results.resize (evts.size ());
148149 unsigned long cy1 = 0 , cy2 = 0 , cy3 = 0 ;
149150 unsigned long cl1 = 0 , cl2 = 0 , cl3 = 0 ;
151+ unsigned long mis1 = 0 , mis2 = 0 , mis3 = 0 ;
150152#endif
151153 bool isok = true ;
152154 for (u32 i = 0 ; i < iterations; i++) {
@@ -159,6 +161,7 @@ int main(int argc, char *argv[]) {
159161 unified.end (results);
160162 cy1 += results[0 ];
161163 cl1 += results[1 ];
164+ mis1 += results[2 ];
162165 if (!isok) {
163166 cout << " Failed out during stage 1\n " ;
164167 break ;
@@ -170,6 +173,7 @@ int main(int argc, char *argv[]) {
170173 unified.end (results);
171174 cy2 += results[0 ];
172175 cl2 += results[1 ];
176+ mis2 += results[2 ];
173177 if (!isok) {
174178 cout << " Failed out during stage 2\n " ;
175179 break ;
@@ -182,6 +186,7 @@ int main(int argc, char *argv[]) {
182186 unified.end (results);
183187 cy3 += results[0 ];
184188 cl3 += results[1 ];
189+ mis3 += results[2 ];
185190 if (!isok) {
186191 cout << " Failed out during stage 34\n " ;
187192 break ;
@@ -200,22 +205,22 @@ int main(int argc, char *argv[]) {
200205 unsigned long total = cy1 + cy2 + cy3;
201206
202207 printf (
203- " stage 1 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n " ,
204- cl1 / iterations, cy1 / iterations, 100 . * cy1 / total, (double )cl1 / cy1);
208+ " stage 1 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f mis. branches: %10lu (cycles/mis.branch %.2f) \n " ,
209+ cl1 / iterations, cy1 / iterations, 100 . * cy1 / total, (double )cl1 / cy1, mis1/iterations, ( double )cy1/mis1 );
205210 printf (" stage 1 runs at %.2f cycles per input byte.\n " ,
206211 (double )cy1 / (iterations * p.second ));
207212
208213 printf (
209- " stage 2 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n " ,
210- cl2 / iterations, cy2 / iterations, 100 . * cy2 / total, (double )cl2 / cy2);
214+ " stage 2 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f mis. branches: %10lu (cycles/mis.branch %.2f) \n " ,
215+ cl2 / iterations, cy2 / iterations, 100 . * cy2 / total, (double )cl2 / cy2, mis2/iterations, ( double )cy2/mis2 );
211216 printf (" stage 2 runs at %.2f cycles per input byte and " ,
212217 (double )cy2 / (iterations * p.second ));
213218 printf (" %.2f cycles per structural character.\n " ,
214219 (double )cy2 / (iterations * pj.n_structural_indexes ));
215220
216221 printf (
217- " stage 3 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f \n " ,
218- cl3 / iterations, cy3 /iterations, 100 . * cy3 / total, (double )cl3 / cy3);
222+ " stage 3 instructions: %10lu cycles: %10lu (%.2f %%) ins/cycles: %.2f mis. branches: %10lu (cycles/mis.branch %.2f) \n " ,
223+ cl3 / iterations, cy3 /iterations, 100 . * cy3 / total, (double )cl3 / cy3, mis3/iterations, ( double )cy3/mis3 );
219224 printf (" stage 3 runs at %.2f cycles per input byte and " ,
220225 (double )cy3 / (iterations * p.second ));
221226 printf (" %.2f cycles per structural character.\n " ,
0 commit comments