Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add an optional cohort block to science experiments
Many experiments operate on data with a very long tail, and the most
frequent part of the distribution can wash out notable results in
sub-groups.  For example, experiment results derived from the data of
very large customers often look quite different than the much more
common results from the small data.  Even the use of percentile metrics
can't overcome these effects since often the relevant percentiles are
very high (above 99-percentile).

This adds an optional block to Science::Experiment which should return a
"cohort" when called.  The cohort is passed the result of the experiment
so it can determine the cohort from the context data, whether the result
is a mismatch or any of the observation data.

The determined cohort value is available as `Scientist::Result#cohort`
and is intended to be used by the user-defined publication mechanism.
  • Loading branch information
Carl Brasic committed Jan 3, 2022
commit 1174575fdcc3a93cec1668dbeffa170c67199dcf
9 changes: 8 additions & 1 deletion lib/scientist/experiment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,13 @@ def use(&block)
try "control", &block
end

# Define a block which will determine the cohort of this experiment
# when called. The block will be passed a `Scientist::Result` as its
# only argument and the cohort will be set on the result.
def cohort(&block)
@_scientist_determine_cohort = block
end

# Whether or not to raise a mismatch error when a mismatch occurs.
def raise_on_mismatches?
if raise_on_mismatches.nil?
Expand All @@ -316,7 +323,7 @@ def generate_result(name)
end

control = observations.detect { |o| o.name == name }
Scientist::Result.new(self, observations, control)
Scientist::Result.new(self, observations, control, @_scientist_determine_cohort)
end

private
Expand Down
22 changes: 18 additions & 4 deletions lib/scientist/result.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,33 @@ class Scientist::Result
# An Array of Observations in execution order.
attr_reader :observations

# If the experiment was defined with a cohort block, the cohort this
# result has been determined to belong to.
attr_reader :cohort

# Internal: Create a new result.
#
# experiment - the Experiment this result is for
# observations: - an Array of Observations, in execution order
# control: - the control Observation
# experiment - the Experiment this result is for
# observations: - an Array of Observations, in execution order
# control: - the control Observation
# determine_cohort - An optional callable that is passed the Result to
# determine its cohort
#
def initialize(experiment, observations = [], control = nil)
def initialize(experiment, observations = [], control = nil, determine_cohort = nil)
@experiment = experiment
@observations = observations
@control = control
@candidates = observations - [control]
evaluate_candidates

if determine_cohort
begin
@cohort = determine_cohort.call(self)
rescue StandardError => e
experiment.raised :cohort, e
end
end

freeze
end

Expand Down
40 changes: 40 additions & 0 deletions test/scientist/experiment_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,46 @@ def @ex.enabled?
assert_equal "kaboom", exception.message
end

describe "cohorts" do
it "accepts a cohort config block" do
@ex.cohort { "1" }
end

it "assigns a cohort to the result using the provided block" do
@ex.context(foo: "bar")
@ex.cohort { |res| "foo-#{res.context[:foo]}-#{Math.log10(res.control.value).round}" }
@ex.use { 5670 }
@ex.try { 5670 }

@ex.run
assert_equal "foo-bar-4", @ex.published_result.cohort
end

it "assigns no cohort if no cohort block passed" do
@ex.use { 5670 }
@ex.try { 5670 }

@ex.run
assert_nil @ex.published_result.cohort
end

it "rescues errors raised in the cohort determination block" do
@ex.use { 5670 }
@ex.try { 5670 }
@ex.cohort { |res| raise "intentional" }

@ex.run

refute_nil @ex.published_result
assert_nil @ex.published_result.cohort

assert_equal 1, @ex.exceptions.size
code, exception = @ex.exceptions[0]
assert_equal :cohort, code
assert_equal "intentional", exception.message
end
end

describe "#raise_with" do
it "raises custom error if provided" do
CustomError = Class.new(Scientist::Experiment::MismatchError)
Expand Down
11 changes: 11 additions & 0 deletions test/scientist/result_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,17 @@
assert_equal @experiment.name, result.experiment_name
end

it "takes an optional callable to determine cohort" do
a = Scientist::Observation.new("a", @experiment) { 1 }
b = Scientist::Observation.new("b", @experiment) { 1 }

result = Scientist::Result.new @experiment, [a, b], a
assert_nil result.cohort

result = Scientist::Result.new @experiment, [a, b], a, ->(res) { "cohort-1" }
assert_equal "cohort-1", result.cohort
end

it "has the context from an experiment" do
@experiment.context :foo => :bar
a = Scientist::Observation.new("a", @experiment) { 1 }
Expand Down