benchmark: ignore significance when using --runs 1

AndreasMadsen · AndreasMadsen · commit d3834a1fa329 · 2016-09-16T20:58:27.000+02:00
Because the standard deviation can't be calculated when there is only one observation the R scripts raises an error. However it may still be useful to run them for non-statistical purposes. This changes the behaviour such when there is only one observation, the values that depends on the standard deviation becomes Not Applicable (NA). Fixes: nodejs#8288 PR-URL: nodejs#8299 Reviewed-By: Anna Henningsen <anna@addaleax.net>
diff --git a/benchmark/compare.R b/benchmark/compare.R
@@ -33,30 +33,39 @@ if (!is.null(plot.filename)) {
 
 # Print a table with results
 statistics = ddply(dat, "name", function(subdat) {
-  # Perform a statistics test to see of there actually is a difference in
-  # performace.
-  w = t.test(rate ~ binary, data=subdat);
+  old.rate = subset(subdat, binary == "old")$rate;
+  new.rate = subset(subdat, binary == "new")$rate;
 
   # Calculate improvement for the "new" binary compared with the "old" binary
-  new_mu = mean(subset(subdat, binary == "new")$rate);
-  old_mu = mean(subset(subdat, binary == "old")$rate);
-  improvement = sprintf("%.2f %%", ((new_mu - old_mu) / old_mu * 100));
+  old.mu = mean(old.rate);
+  new.mu = mean(new.rate);
+  improvement = sprintf("%.2f %%", ((new.mu - old.mu) / old.mu * 100));
 
-  # Add user friendly stars to the table. There should be at least one star
-  # before you can say that there is an improvement.
-  significant = '';
-  if (w$p.value < 0.001) {
-    significant = '***';
-  } else if (w$p.value < 0.01) {
-    significant = '**';
-  } else if (w$p.value < 0.05) {
-    significant = '*';
+  p.value = NA;
+  significant = 'NA';
+  # Check if there is enough data to calulate the calculate the p-value
+  if (length(old.rate) > 1 && length(new.rate) > 1) {
+    # Perform a statistics test to see of there actually is a difference in
+    # performance.
+    w = t.test(rate ~ binary, data=subdat);
+    p.value = w$p.value;
+
+    # Add user friendly stars to the table. There should be at least one star
+    # before you can say that there is an improvement.
+    significant = '';
+    if (p.value < 0.001) {
+      significant = '***';
+    } else if (p.value < 0.01) {
+      significant = '**';
+    } else if (p.value < 0.05) {
+      significant = '*';
+    }
   }
 
   r = list(
     improvement = improvement,
     significant = significant,
-    p.value = w$p.value
+    p.value = p.value
   );
   return(data.frame(r));
 });
diff --git a/benchmark/scatter.R b/benchmark/scatter.R
@@ -51,13 +51,17 @@ if (length(aggregate) > 0) {
 stats = ddply(dat, c(x.axis.name, category.name), function(subdat) {
   rate = subdat$rate;
 
-  # calculate standard error of the mean
-  se = sqrt(var(rate)/length(rate));
+  # calculate confidence interval of the mean
+  ci = NA;
+  if (length(rate) > 1) {
+    se = sqrt(var(rate)/length(rate));
+    ci = se * qt(0.975, length(rate) - 1)
+  }
 
   # calculate mean and 95 % confidence interval
   r = list(
     rate = mean(rate),
-    confidence.interval = se * qt(0.975, length(rate) - 1)
+    confidence.interval = ci
   );
 
   return(data.frame(r));
@@ -66,11 +70,14 @@ stats = ddply(dat, c(x.axis.name, category.name), function(subdat) {
 print(stats, row.names=F);
 
 if (!is.null(plot.filename)) {
-  p = ggplot(stats, aes_string(x=x.axis.name, y='mean', colour=category.name));
+  p = ggplot(stats, aes_string(x=x.axis.name, y='rate', colour=category.name));
   if (use.log2) {
     p = p + scale_x_continuous(trans='log2');
   }
-  p = p + geom_errorbar(aes(ymin=mean-confidence.interval, ymax=mean+confidence.interval), width=.1);
+  p = p + geom_errorbar(
+    aes(ymin=rate-confidence.interval, ymax=rate+confidence.interval),
+    width=.1, na.rm=TRUE
+  );
   p = p + geom_point();
   p = p + ylab("rate of operations (higher is better)");
   p = p + ggtitle(dat[1, 1]);