@@ -134,21 +134,6 @@ struct option_struct {
134134 }
135135};
136136
137- double actual (const benchmarker& feature) {
138- return feature.stage1 .best .elapsed_ns () / feature.stats ->blocks ;
139- }
140- double diff (const benchmarker& feature, const benchmarker& struct7) {
141- if (feature.stats ->blocks == struct7.stats ->blocks ) {
142- return (feature.stage1 .best .elapsed_ns () - struct7.stage1 .best .elapsed_ns ()) / struct7.stats ->blocks ;
143- } else {
144- return (feature.stage1 .best .elapsed_ns () / feature.stats ->blocks ) - (struct7.stage1 .best .elapsed_ns () / struct7.stats ->blocks );
145- }
146- }
147- double diff_miss (const benchmarker& feature, const benchmarker& struct7) {
148- // There are roughly 2650 branch mispredicts, so we have to scale it so it represents a per block amount
149- return diff (feature, struct7) * 10000.0 / 2650.0 ;
150- }
151-
152137struct feature_benchmarker {
153138 benchmarker utf8;
154139 benchmarker utf8_miss;
@@ -192,87 +177,182 @@ struct feature_benchmarker {
192177 struct23_miss.run_iterations (iterations, stage1_only);
193178 }
194179
195- void print () {
196- printf (" base (ns/block)" );
197- printf (" ,struct 1-7" );
198- printf (" ,struct 1-7 miss" );
199- printf (" ,utf-8" );
200- printf (" ,utf-8 miss" );
201- printf (" ,struct 8-15" );
202- printf (" ,struct 8-15 miss" );
203- printf (" ,struct 16+" );
204- printf (" ,struct 16+ miss" );
205- printf (" \n " );
206-
207- printf (" %g" , actual (empty));
208- printf (" ,%+g" , diff (struct7, empty));
209- printf (" ,%+g" , diff (struct7_miss, struct7));
210- printf (" ,%+g" , diff (utf8, struct7));
211- printf (" ,%+g" , diff (utf8_miss, utf8));
212- printf (" ,%+g" , diff (struct15, struct7));
213- printf (" ,%+g" , diff (struct15_miss, struct15));
214- printf (" ,%+g" , diff (struct23, struct15));
215- printf (" ,%+g" , diff (struct23_miss, struct23));
216- printf (" \n " );
180+ double cost_per_block (const benchmarker& feature, size_t feature_blocks, const benchmarker& base) const {
181+ return (feature.stage1 .best .elapsed_ns () - base.stage1 .best .elapsed_ns ()) / feature_blocks;
217182 }
218183
219- double cost_per_block (benchmarker& feature, size_t feature_blocks, benchmarker& base) {
220- return (feature.stage1 .best .elapsed_ns () - base.stage1 .best .elapsed_ns ()) / feature_blocks;
184+ // Whether we're recording cache miss and branch miss events
185+ bool has_events () const {
186+ return empty.collector .has_events ();
221187 }
222188
223189 // Base cost of any block (including empty ones)
224- double base_cost () {
190+ double base_cost () const {
225191 return (empty.stage1 .best .elapsed_ns () / empty.stats ->blocks );
226192 }
193+
227194 // Extra cost of a 1-7 structural block over an empty block
228- double struct1_7_cost () {
195+ double struct1_7_cost () const {
229196 return cost_per_block (struct7, struct7.stats ->blocks_with_1_structural , empty);
230197 }
231198 // Extra cost of an 1-7-structural miss
232- double struct1_7_miss_cost () {
199+ double struct1_7_miss_cost () const {
233200 return cost_per_block (struct7_miss, struct7_miss.stats ->blocks_with_1_structural , struct7);
234201 }
202+ // Rate of 1-7-structural misses per 8-structural flip
203+ double struct1_7_miss_rate () const {
204+ if (!has_events ()) { return 1 ; }
205+ return double (struct7_miss.stage1 .best .branch_misses () - struct7.stage1 .best .branch_misses ()) / struct7_miss.stats ->blocks_with_1_structural_flipped ;
206+ }
207+
235208 // Extra cost of an 8-15 structural block over a 1-7 structural block
236- double struct8_15_cost () {
209+ double struct8_15_cost () const {
237210 return cost_per_block (struct15, struct15.stats ->blocks_with_8_structurals , struct7);
238211 }
239212 // Extra cost of an 8-15-structural miss over a 1-7 miss
240- double struct8_15_miss_cost () {
213+ double struct8_15_miss_cost () const {
241214 return cost_per_block (struct15_miss, struct15_miss.stats ->blocks_with_8_structurals_flipped , struct15);
242215 }
216+ // Rate of 8-15-structural misses per 8-structural flip
217+ double struct8_15_miss_rate () const {
218+ if (!has_events ()) { return 1 ; }
219+ return double (struct15_miss.stage1 .best .branch_misses () - struct15.stage1 .best .branch_misses ()) / struct15_miss.stats ->blocks_with_8_structurals_flipped ;
220+ }
221+
243222 // Extra cost of a 16+-structural block over an 8-15 structural block (actual varies based on # of structurals!)
244- double struct16_cost () {
223+ double struct16_cost () const {
245224 return cost_per_block (struct23, struct23.stats ->blocks_with_16_structurals , struct15);
246225 }
247226 // Extra cost of a 16-structural miss over an 8-15 miss
248- double struct16_miss_cost () {
227+ double struct16_miss_cost () const {
249228 return cost_per_block (struct23_miss, struct23_miss.stats ->blocks_with_16_structurals_flipped , struct23);
250229 }
230+ // Rate of 16-structural misses per 16-structural flip
231+ double struct16_miss_rate () const {
232+ if (!has_events ()) { return 1 ; }
233+ return double (struct23_miss.stage1 .best .branch_misses () - struct23.stage1 .best .branch_misses ()) / struct23_miss.stats ->blocks_with_16_structurals_flipped ;
234+ }
235+
251236 // Extra cost of having UTF-8 in a block
252- double utf8_cost () {
237+ double utf8_cost () const {
253238 return cost_per_block (utf8, utf8.stats ->blocks_with_utf8 , struct7_full);
254239 }
255240 // Extra cost of a UTF-8 miss
256- double utf8_miss_cost () {
241+ double utf8_miss_cost () const {
257242 return cost_per_block (utf8_miss, utf8_miss.stats ->blocks_with_utf8_flipped , utf8);
258243 }
244+ // Rate of UTF-8 misses per UTF-8 flip
245+ double utf8_miss_rate () const {
246+ if (!has_events ()) { return 1 ; }
247+ return double (utf8_miss.stage1 .best .branch_misses () - utf8.stage1 .best .branch_misses ()) / utf8_miss.stats ->blocks_with_utf8_flipped ;
248+ }
249+
250+ double calc_expected_feature_cost (const benchmarker& file) const {
251+ // Expected base ns/block (empty)
252+ json_stats& stats = *file.stats ;
253+ double expected = base_cost () * stats.blocks ;
254+ expected += struct1_7_cost () * stats.blocks_with_1_structural ;
255+ expected += utf8_cost () * stats.blocks_with_utf8 ;
256+ expected += struct8_15_cost () * stats.blocks_with_8_structurals ;
257+ expected += struct16_cost () * stats.blocks_with_16_structurals ;
258+ return expected / stats.blocks ;
259+ }
259260
260- double calc_expected ( benchmarker& file) {
261+ double calc_expected_miss_cost ( const benchmarker& file) const {
261262 // Expected base ns/block (empty)
262263 json_stats& stats = *file.stats ;
263- double expected = base_cost () * stats.blocks ;
264- expected += struct1_7_cost () * stats.blocks_with_1_structural ;
265- expected += struct1_7_miss_cost () * stats.blocks_with_1_structural_flipped ;
266- expected += utf8_cost () * stats.blocks_with_utf8 ;
267- expected += utf8_miss_cost () * stats.blocks_with_utf8_flipped ;
268- expected += struct8_15_cost () * stats.blocks_with_8_structurals ;
269- expected += struct8_15_miss_cost () * stats.blocks_with_8_structurals_flipped ;
270- expected += struct16_cost () * stats.blocks_with_16_structurals ;
271- expected += struct16_miss_cost () * stats.blocks_with_16_structurals_flipped ;
264+ double expected = struct1_7_miss_cost () * stats.blocks_with_1_structural_flipped * struct1_7_miss_rate ();
265+ expected += utf8_miss_cost () * stats.blocks_with_utf8_flipped * utf8_miss_rate ();
266+ expected += struct8_15_miss_cost () * stats.blocks_with_8_structurals_flipped * struct8_15_miss_rate ();
267+ expected += struct16_miss_cost () * stats.blocks_with_16_structurals_flipped * struct16_miss_rate ();
272268 return expected / stats.blocks ;
273269 }
270+
271+ double calc_expected_misses (const benchmarker& file) const {
272+ json_stats& stats = *file.stats ;
273+ double expected = stats.blocks_with_1_structural_flipped * struct1_7_miss_rate ();
274+ expected += stats.blocks_with_utf8_flipped * utf8_miss_rate ();
275+ expected += stats.blocks_with_8_structurals_flipped * struct8_15_miss_rate ();
276+ expected += stats.blocks_with_16_structurals_flipped * struct16_miss_rate ();
277+ return expected;
278+ }
279+
280+ double calc_expected (const benchmarker& file) const {
281+ return calc_expected_feature_cost (file) + calc_expected_miss_cost (file);
282+ }
283+
284+ void print () {
285+ printf (" \n " );
286+ printf (" Features in ns/block (64 bytes):\n " );
287+ printf (" \n " );
288+ printf (" | %-8s " , " Stage" );
289+ printf (" | %8s " , " Base" );
290+ printf (" | %8s " , " 7 Struct" );
291+ printf (" | %8s " , " UTF-8" );
292+ printf (" | %8s " , " 15 Str." );
293+ printf (" | %8s " , " 16+ Str." );
294+ printf (" | %15s " , " 7 Struct Miss" );
295+ printf (" | %15s " , " UTF-8 Miss" );
296+ printf (" | %15s " , " 15 Str. Miss" );
297+ printf (" | %15s " , " 16+ Str. Miss" );
298+ printf (" |\n " );
299+
300+ printf (" |%.10s" , " ---------------------------------------" );
301+ printf (" |%.10s" , " ---------------------------------------" );
302+ printf (" |%.10s" , " ---------------------------------------" );
303+ printf (" |%.10s" , " ---------------------------------------" );
304+ printf (" |%.10s" , " ---------------------------------------" );
305+ printf (" |%.10s" , " ---------------------------------------" );
306+ printf (" |%.17s" , " ---------------------------------------" );
307+ printf (" |%.17s" , " ---------------------------------------" );
308+ printf (" |%.17s" , " ---------------------------------------" );
309+ printf (" |%.17s" , " ---------------------------------------" );
310+ printf (" |\n " );
311+
312+ printf (" | %-8s " , " Stage 1" );
313+ printf (" | %8.3g " , base_cost ());
314+ printf (" | %8.3g " , struct1_7_cost ());
315+ printf (" | %8.3g " , utf8_cost ());
316+ printf (" | %8.3g " , struct8_15_cost ());
317+ printf (" | %8.3g " , struct16_cost ());
318+ if (has_events ()) {
319+ printf (" | %8.3g (%3d%%) " , struct1_7_miss_cost (), int (struct1_7_miss_rate ()*100 ));
320+ printf (" | %8.3g (%3d%%) " , utf8_miss_cost (), int (utf8_miss_rate ()*100 ));
321+ printf (" | %8.3g (%3d%%) " , struct8_15_miss_cost (), int (struct8_15_miss_rate ()*100 ));
322+ printf (" | %8.3g (%3d%%) " , struct16_miss_cost (), int (struct16_miss_rate ()*100 ));
323+ } else {
324+ printf (" | %8.3g " , struct1_7_miss_cost ());
325+ printf (" | %8.3g " , utf8_miss_cost ());
326+ printf (" | %8.3g " , struct8_15_miss_cost ());
327+ printf (" | %8.3g " , struct16_miss_cost ());
328+ }
329+ printf (" |\n " );
330+ }
274331};
275332
333+ void print_file_effectiveness (const char * filename, const benchmarker& results, const feature_benchmarker& features) {
334+ double actual = results.stage1 .best .elapsed_ns () / results.stats ->blocks ;
335+ double calc = features.calc_expected (results);
336+ uint64_t actual_misses = results.stage1 .best .branch_misses ();
337+ uint64_t calc_misses = uint64_t (features.calc_expected_misses (results));
338+ double calc_miss_cost = features.calc_expected_miss_cost (results);
339+ printf (" | %-15s " , filename);
340+ printf (" | %8.3g " , features.calc_expected_feature_cost (results));
341+ printf (" | %8.3g " , calc_miss_cost);
342+ printf (" | %8.3g " , calc);
343+ printf (" | %8.3g " , actual);
344+ printf (" | %+8.3g " , actual - calc);
345+ printf (" | %13lu " , calc_misses);
346+ if (features.has_events ()) {
347+ printf (" | %13lu " , actual_misses);
348+ printf (" | %+13ld " , int64_t (actual_misses - calc_misses));
349+ double miss_adjustment = calc_miss_cost * (double (int64_t (actual_misses - calc_misses)) / calc_misses);
350+ printf (" | %8.3g " , calc_miss_cost + miss_adjustment);
351+ printf (" | %+8.3g " , actual - (calc + miss_adjustment));
352+ }
353+ printf (" |\n " );
354+ }
355+
276356int main (int argc, char *argv[]) {
277357 // Read options
278358 exe_name = argv[0 ];
@@ -317,10 +397,42 @@ int main(int argc, char *argv[]) {
317397
318398 features.print ();
319399
320- // Gauge effectiveness
321- printf (" gsoc-2018.json expected/actual: %g/%g\n " , features.calc_expected (gsoc_2018), actual (gsoc_2018));
322- printf (" twitter.json expected/actual: %g/%g\n " , features.calc_expected (twitter), actual (twitter));
323- printf (" random.json expected/actual: %g/%g\n " , features.calc_expected (random), actual (random));
400+ // Gauge effectiveness
401+ printf (" \n " );
402+ printf (" Estimated vs. Actual ns/block for real files:\n " );
403+ printf (" \n " );
404+ printf (" | %-15s " , " File" );
405+ printf (" | %11s " , " Est. (Base)" );
406+ printf (" | %11s " , " Est. (Miss)" );
407+ printf (" | %8s " , " Est." );
408+ printf (" | %8s " , " Actual" );
409+ printf (" | %8s " , " Diff" );
410+ printf (" | %13s " , " Est. Misses" );
411+ if (features.has_events ()) {
412+ printf (" | %13s " , " Actual Misses" );
413+ printf (" | %13s " , " Diff (Misses)" );
414+ printf (" | %13s " , " Adjusted Miss" );
415+ printf (" | %13s " , " Adjusted Diff" );
416+ }
417+ printf (" |\n " );
418+ printf (" |%.17s" , " ---------------------------------------" );
419+ printf (" |%.13s" , " ---------------------------------------" );
420+ printf (" |%.13s" , " ---------------------------------------" );
421+ printf (" |%.10s" , " ---------------------------------------" );
422+ printf (" |%.10s" , " ---------------------------------------" );
423+ printf (" |%.10s" , " ---------------------------------------" );
424+ printf (" |%.15s" , " ---------------------------------------" );
425+ if (features.has_events ()) {
426+ printf (" |%.15s" , " ---------------------------------------" );
427+ printf (" |%.15s" , " ---------------------------------------" );
428+ printf (" |%.15s" , " ---------------------------------------" );
429+ printf (" |%.15s" , " ---------------------------------------" );
430+ }
431+ printf (" |\n " );
432+
433+ print_file_effectiveness (" gsoc-2018.json" , gsoc_2018, features);
434+ print_file_effectiveness (" twitter.json" , twitter, features);
435+ print_file_effectiveness (" random.json" , random, features);
324436
325437 return EXIT_SUCCESS;
326438}
0 commit comments