Skip to content

Commit d3834a1

Browse files
committed
benchmark: ignore significance when using --runs 1
Because the standard deviation can't be calculated when there is only one observation the R scripts raises an error. However it may still be useful to run them for non-statistical purposes. This changes the behaviour such when there is only one observation, the values that depends on the standard deviation becomes Not Applicable (NA). Fixes: nodejs#8288 PR-URL: nodejs#8299 Reviewed-By: Anna Henningsen <anna@addaleax.net>
1 parent 6f9157f commit d3834a1

File tree

2 files changed

+37
-21
lines changed

2 files changed

+37
-21
lines changed

benchmark/compare.R

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -33,30 +33,39 @@ if (!is.null(plot.filename)) {
3333

3434
# Print a table with results
3535
statistics = ddply(dat, "name", function(subdat) {
36-
# Perform a statistics test to see of there actually is a difference in
37-
# performace.
38-
w = t.test(rate ~ binary, data=subdat);
36+
old.rate = subset(subdat, binary == "old")$rate;
37+
new.rate = subset(subdat, binary == "new")$rate;
3938

4039
# Calculate improvement for the "new" binary compared with the "old" binary
41-
new_mu = mean(subset(subdat, binary == "new")$rate);
42-
old_mu = mean(subset(subdat, binary == "old")$rate);
43-
improvement = sprintf("%.2f %%", ((new_mu - old_mu) / old_mu * 100));
40+
old.mu = mean(old.rate);
41+
new.mu = mean(new.rate);
42+
improvement = sprintf("%.2f %%", ((new.mu - old.mu) / old.mu * 100));
4443

45-
# Add user friendly stars to the table. There should be at least one star
46-
# before you can say that there is an improvement.
47-
significant = '';
48-
if (w$p.value < 0.001) {
49-
significant = '***';
50-
} else if (w$p.value < 0.01) {
51-
significant = '**';
52-
} else if (w$p.value < 0.05) {
53-
significant = '*';
44+
p.value = NA;
45+
significant = 'NA';
46+
# Check if there is enough data to calulate the calculate the p-value
47+
if (length(old.rate) > 1 && length(new.rate) > 1) {
48+
# Perform a statistics test to see of there actually is a difference in
49+
# performance.
50+
w = t.test(rate ~ binary, data=subdat);
51+
p.value = w$p.value;
52+
53+
# Add user friendly stars to the table. There should be at least one star
54+
# before you can say that there is an improvement.
55+
significant = '';
56+
if (p.value < 0.001) {
57+
significant = '***';
58+
} else if (p.value < 0.01) {
59+
significant = '**';
60+
} else if (p.value < 0.05) {
61+
significant = '*';
62+
}
5463
}
5564

5665
r = list(
5766
improvement = improvement,
5867
significant = significant,
59-
p.value = w$p.value
68+
p.value = p.value
6069
);
6170
return(data.frame(r));
6271
});

benchmark/scatter.R

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,17 @@ if (length(aggregate) > 0) {
5151
stats = ddply(dat, c(x.axis.name, category.name), function(subdat) {
5252
rate = subdat$rate;
5353

54-
# calculate standard error of the mean
55-
se = sqrt(var(rate)/length(rate));
54+
# calculate confidence interval of the mean
55+
ci = NA;
56+
if (length(rate) > 1) {
57+
se = sqrt(var(rate)/length(rate));
58+
ci = se * qt(0.975, length(rate) - 1)
59+
}
5660

5761
# calculate mean and 95 % confidence interval
5862
r = list(
5963
rate = mean(rate),
60-
confidence.interval = se * qt(0.975, length(rate) - 1)
64+
confidence.interval = ci
6165
);
6266

6367
return(data.frame(r));
@@ -66,11 +70,14 @@ stats = ddply(dat, c(x.axis.name, category.name), function(subdat) {
6670
print(stats, row.names=F);
6771

6872
if (!is.null(plot.filename)) {
69-
p = ggplot(stats, aes_string(x=x.axis.name, y='mean', colour=category.name));
73+
p = ggplot(stats, aes_string(x=x.axis.name, y='rate', colour=category.name));
7074
if (use.log2) {
7175
p = p + scale_x_continuous(trans='log2');
7276
}
73-
p = p + geom_errorbar(aes(ymin=mean-confidence.interval, ymax=mean+confidence.interval), width=.1);
77+
p = p + geom_errorbar(
78+
aes(ymin=rate-confidence.interval, ymax=rate+confidence.interval),
79+
width=.1, na.rm=TRUE
80+
);
7481
p = p + geom_point();
7582
p = p + ylab("rate of operations (higher is better)");
7683
p = p + ggtitle(dat[1, 1]);

0 commit comments

Comments
 (0)