From 1174575fdcc3a93cec1668dbeffa170c67199dcf Mon Sep 17 00:00:00 2001 From: Carl Brasic Date: Thu, 23 Dec 2021 11:19:58 -0600 Subject: [PATCH] Add an optional `cohort` block to science experiments Many experiments operate on data with a very long tail, and the most frequent part of the distribution can wash out notable results in sub-groups. For example, experiment results derived from the data of very large customers often look quite different than the much more common results from the small data. Even the use of percentile metrics can't overcome these effects since often the relevant percentiles are very high (above 99-percentile). This adds an optional block to Science::Experiment which should return a "cohort" when called. The cohort is passed the result of the experiment so it can determine the cohort from the context data, whether the result is a mismatch or any of the observation data. The determined cohort value is available as `Scientist::Result#cohort` and is intended to be used by the user-defined publication mechanism. --- lib/scientist/experiment.rb | 9 ++++++- lib/scientist/result.rb | 22 +++++++++++++---- test/scientist/experiment_test.rb | 40 +++++++++++++++++++++++++++++++ test/scientist/result_test.rb | 11 +++++++++ 4 files changed, 77 insertions(+), 5 deletions(-) diff --git a/lib/scientist/experiment.rb b/lib/scientist/experiment.rb index f48b339..5720ac4 100644 --- a/lib/scientist/experiment.rb +++ b/lib/scientist/experiment.rb @@ -290,6 +290,13 @@ def use(&block) try "control", &block end + # Define a block which will determine the cohort of this experiment + # when called. The block will be passed a `Scientist::Result` as its + # only argument and the cohort will be set on the result. + def cohort(&block) + @_scientist_determine_cohort = block + end + # Whether or not to raise a mismatch error when a mismatch occurs. def raise_on_mismatches? if raise_on_mismatches.nil? @@ -316,7 +323,7 @@ def generate_result(name) end control = observations.detect { |o| o.name == name } - Scientist::Result.new(self, observations, control) + Scientist::Result.new(self, observations, control, @_scientist_determine_cohort) end private diff --git a/lib/scientist/result.rb b/lib/scientist/result.rb index 76a4d21..79dc43b 100644 --- a/lib/scientist/result.rb +++ b/lib/scientist/result.rb @@ -19,19 +19,33 @@ class Scientist::Result # An Array of Observations in execution order. attr_reader :observations + # If the experiment was defined with a cohort block, the cohort this + # result has been determined to belong to. + attr_reader :cohort + # Internal: Create a new result. # - # experiment - the Experiment this result is for - # observations: - an Array of Observations, in execution order - # control: - the control Observation + # experiment - the Experiment this result is for + # observations: - an Array of Observations, in execution order + # control: - the control Observation + # determine_cohort - An optional callable that is passed the Result to + # determine its cohort # - def initialize(experiment, observations = [], control = nil) + def initialize(experiment, observations = [], control = nil, determine_cohort = nil) @experiment = experiment @observations = observations @control = control @candidates = observations - [control] evaluate_candidates + if determine_cohort + begin + @cohort = determine_cohort.call(self) + rescue StandardError => e + experiment.raised :cohort, e + end + end + freeze end diff --git a/test/scientist/experiment_test.rb b/test/scientist/experiment_test.rb index 12462f6..87240e8 100644 --- a/test/scientist/experiment_test.rb +++ b/test/scientist/experiment_test.rb @@ -302,6 +302,46 @@ def @ex.enabled? assert_equal "kaboom", exception.message end + describe "cohorts" do + it "accepts a cohort config block" do + @ex.cohort { "1" } + end + + it "assigns a cohort to the result using the provided block" do + @ex.context(foo: "bar") + @ex.cohort { |res| "foo-#{res.context[:foo]}-#{Math.log10(res.control.value).round}" } + @ex.use { 5670 } + @ex.try { 5670 } + + @ex.run + assert_equal "foo-bar-4", @ex.published_result.cohort + end + + it "assigns no cohort if no cohort block passed" do + @ex.use { 5670 } + @ex.try { 5670 } + + @ex.run + assert_nil @ex.published_result.cohort + end + + it "rescues errors raised in the cohort determination block" do + @ex.use { 5670 } + @ex.try { 5670 } + @ex.cohort { |res| raise "intentional" } + + @ex.run + + refute_nil @ex.published_result + assert_nil @ex.published_result.cohort + + assert_equal 1, @ex.exceptions.size + code, exception = @ex.exceptions[0] + assert_equal :cohort, code + assert_equal "intentional", exception.message + end + end + describe "#raise_with" do it "raises custom error if provided" do CustomError = Class.new(Scientist::Experiment::MismatchError) diff --git a/test/scientist/result_test.rb b/test/scientist/result_test.rb index c9bc41e..474a851 100644 --- a/test/scientist/result_test.rb +++ b/test/scientist/result_test.rb @@ -98,6 +98,17 @@ assert_equal @experiment.name, result.experiment_name end + it "takes an optional callable to determine cohort" do + a = Scientist::Observation.new("a", @experiment) { 1 } + b = Scientist::Observation.new("b", @experiment) { 1 } + + result = Scientist::Result.new @experiment, [a, b], a + assert_nil result.cohort + + result = Scientist::Result.new @experiment, [a, b], a, ->(res) { "cohort-1" } + assert_equal "cohort-1", result.cohort + end + it "has the context from an experiment" do @experiment.context :foo => :bar a = Scientist::Observation.new("a", @experiment) { 1 }