Skip to content

Commit 75fc97d

Browse files
authored
DPL: add initial set of helpers for analysis (#1746)
This reduces quite a lot the verbosity of the analysis.
1 parent 68a24b4 commit 75fc97d

4 files changed

Lines changed: 152 additions & 58 deletions

File tree

Framework/Core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ endif()
2121

2222
set(SRCS
2323
src/AODReaderHelpers.cxx
24+
src/AnalysisHelpers.cxx
2425
src/BoostOptionsRetriever.cxx
2526
src/ConfigParamsHelper.cxx
2627
src/CompletionPolicy.cxx
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Copyright CERN and copyright holders of ALICE O2. This software is
2+
// distributed under the terms of the GNU General Public License v3 (GPL
3+
// Version 3), copied verbatim in the file "COPYING".
4+
//
5+
// See http://alice-o2.web.cern.ch/license for full licensing information.
6+
//
7+
// In applying this license CERN does not waive the privileges and immunities
8+
// granted to it by virtue of its status as an Intergovernmental Organization
9+
// or submit itself to any jurisdiction.
10+
#ifndef o2_framework_AnalysisHelpers_H_DEFINED
11+
#define o2_framework_AnalysisHelpers_H_DEFINED
12+
13+
#include "Framework/AnalysisHelpers.h"
14+
15+
#include <ROOT/RDataFrame.hxx>
16+
#include <string>
17+
18+
using namespace ROOT::RDF;
19+
20+
namespace o2
21+
{
22+
namespace framework
23+
{
24+
class TableConsumer;
25+
}
26+
27+
namespace analysis
28+
{
29+
30+
/// Do a single loop on all the entries of the @a input table
31+
ROOT::RDataFrame doSingleLoopOn(std::unique_ptr<framework::TableConsumer>& input);
32+
33+
/// Do a double loop on all the entries with the same value for the \a grouping
34+
/// of the @a input table, where the entries for the outer index are prefixed
35+
/// with `<name>_` while the entries for the inner loop are prefixed with
36+
/// `<name>bar_`.
37+
ROOT::RDataFrame doSelfCombinationsWith(std::unique_ptr<framework::TableConsumer>& input,
38+
std::string name = "p",
39+
std::string grouping = "eventID");
40+
41+
} // namespace analysis
42+
} // namespace o2
43+
44+
#endif // o2_framework_AnalysisHelpers_H_DEFINED
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// Copyright CERN and copyright holders of ALICE O2. This software is
2+
// distributed under the terms of the GNU General Public License v3 (GPL
3+
// Version 3), copied verbatim in the file "COPYING".
4+
//
5+
// See http://alice-o2.web.cern.ch/license for full licensing information.
6+
//
7+
// In applying this license CERN does not waive the privileges and immunities
8+
// granted to it by virtue of its status as an Intergovernmental Organization
9+
// or submit itself to any jurisdiction.
10+
#include "Framework/AnalysisHelpers.h"
11+
#include "Framework/RCombinedDS.h"
12+
#include "Framework/TableBuilder.h"
13+
#include "Framework/TableConsumer.h"
14+
15+
#include <ROOT/RDataFrame.hxx>
16+
#include <ROOT/RArrowDS.hxx>
17+
18+
using namespace ROOT::RDF;
19+
20+
namespace o2
21+
{
22+
namespace analysis
23+
{
24+
25+
ROOT::RDataFrame doSingleLoopOn(std::unique_ptr<framework::TableConsumer>& input)
26+
{
27+
auto flat = std::make_unique<RArrowDS>(input->asArrowTable(), std::vector<std::string>{});
28+
ROOT::RDataFrame rdf(std::move(flat));
29+
return rdf;
30+
}
31+
32+
ROOT::RDataFrame doSelfCombinationsWith(std::unique_ptr<framework::TableConsumer>& input, std::string name, std::string grouping)
33+
{
34+
auto table = input->asArrowTable();
35+
using Index = RCombinedDSBlockJoinIndex<int>;
36+
auto left = std::make_unique<RArrowDS>(table, std::vector<std::string>{});
37+
auto right = std::make_unique<RArrowDS>(table, std::vector<std::string>{});
38+
auto combined = std::make_unique<RCombinedDS>(std::move(left), std::move(right), std::move(std::make_unique<Index>(grouping, true, BlockCombinationRule::StrictlyUpper)), name + "_", name + "bar_");
39+
40+
ROOT::RDataFrame rdf(std::move(combined));
41+
return rdf;
42+
}
43+
44+
} // namespace analysis
45+
} // namespace o2

Framework/TestWorkflows/src/o2D0Analysis.cxx

Lines changed: 62 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -8,85 +8,89 @@
88
// granted to it by virtue of its status as an Intergovernmental Organization
99
// or submit itself to any jurisdiction.
1010
#include "Framework/runDataProcessing.h"
11-
#include "Framework/RCombinedDS.h"
12-
#include "Framework/TableBuilder.h"
11+
#include "Framework/AnalysisHelpers.h"
1312

1413
#include <ROOT/RDataFrame.hxx>
15-
#include <ROOT/RArrowDS.hxx>
1614

15+
using namespace ROOT::RDF;
1716
using namespace o2::framework;
1817

1918
// A dummy workflow which creates a few of the tables proposed by Ruben,
2019
// using ARROW
2120
WorkflowSpec defineDataProcessing(ConfigContext const& specs)
2221
{
22+
// Workflow definition. A workflow can be one or more DataProcessors
23+
// each implementing (part of) an analysis. Each DataProcessor has
24+
// (at least) a name, some Inputs, some Outputs and they get arranged
25+
// together accordingly.
2326
WorkflowSpec workflow{
24-
/// Minimal analysis example
27+
// Multiple DataProcessor specs
28+
// can be provided per workflow
2529
DataProcessorSpec{
30+
// The name of my analysis
2631
"d0-analysis",
27-
{
32+
Inputs{
2833
// Dangling inputs of type AOD will be automatically picked up
2934
// by DPL and an extra reader device will be instanciated to
30-
// read them.
31-
InputSpec{ "DZeroFlagged", "AOD", "DZEROFLAGGED" },
35+
// read them. In this particular case the signature
36+
// AOD/DZEROFLAGGED is associated to Gianmichele's
37+
// D0 candidates schema. The first string is just a label
38+
// so that the algorithm can be in principle be reused for different
39+
// kind of candidates.
40+
InputSpec{ "candidates", "AOD", "DZEROFLAGGED" },
3241
},
33-
{},
42+
// No outputs for the time being.
43+
Outputs{},
3444
AlgorithmSpec{
35-
[](InitContext& setup) {
36-
return [](ProcessingContext& ctx) {
37-
auto s = ctx.inputs().get<TableConsumer>("DZeroFlagged");
38-
/// From the handle, we construct the actual arrow table
39-
/// which is then used as a source for the RDataFrame.
40-
/// This is probably easy to change to a:
41-
///
42-
/// auto rdf = ctx.inputs().get<RDataSource>("xz");
43-
auto table = s->asArrowTable();
44-
using namespace ROOT::RDF;
45+
// This is the actual per "message" loop, where a message could
46+
// be the contents of a file or part of it.
47+
// FIXME: Too much boilerplate.
48+
adaptStateless([](InputRecord& inputs) {
49+
auto input = inputs.get<TableConsumer>("candidates");
4550

46-
TFile f("result.root", "RECREATE");
51+
// This does a single loop on all the candidates in the input message
52+
// using a simple mask on the cand_type_ML column and does
53+
// a simple 1D histogram of the filtered entries.
54+
auto candidates = o2::analysis::doSingleLoopOn(input);
4755

48-
auto flatD0 = std::make_unique<RArrowDS>(table, std::vector<std::string>{});
49-
ROOT::RDataFrame rdf1(std::move(flatD0));
50-
/// A single loop to do an invariant mass plot, where we use the preselected
51-
/// candidates
52-
auto candFilter = [](int x) -> bool { return x & 0x1; };
53-
auto h1 = rdf1.Filter(candFilter, { "cand_type_ML" }).Histo1D("inv_mass_ML");
54-
h1->SetName("InvariantMass");
55-
h1->Write();
56+
auto h1 = candidates.Filter("(bool)(cand_type_ML & 0x1)").Histo1D("inv_mass_ML");
5657

57-
/// Double loops on all supported loop types.
58-
using Index = RCombinedDSBlockJoinIndex<int>;
59-
auto types = {
60-
BlockCombinationRule::Anti,
61-
BlockCombinationRule::Full,
62-
BlockCombinationRule::Diagonal,
63-
BlockCombinationRule::StrictlyUpper,
64-
BlockCombinationRule::Upper,
65-
};
66-
// A few helpers
67-
auto bothCandFilter = [](int x, int y) -> bool { return x & 0x1 && y & 0x1; };
68-
auto delta = [](float x, float y) { return x - y; };
58+
// A lambda function subtracting two quantities. This defines
59+
// a function "delta" which can be invoked with
60+
//
61+
// delta(1,2)
62+
//
63+
// and will return 1 - 2.
64+
auto delta = [](float x, float y) { return x - y; };
6965

70-
for (auto combinationType : types) {
71-
auto d0 = std::make_unique<RArrowDS>(table, std::vector<std::string>{});
72-
auto d0bar = std::make_unique<RArrowDS>(table, std::vector<std::string>{});
73-
auto d0d0bar = std::make_unique<RCombinedDS>(std::move(d0), std::move(d0bar), std::move(std::make_unique<Index>("cand_evtID_ML", true, combinationType)), "d0_", "d0bar_");
66+
// This does all the combinations for all the candidates which have
67+
// the same value for cand_evtID_ML (the Event ID).
68+
// d0_ is the prefix assigned to the outer variable of the double loop.
69+
// d0bar_ is the prefix assigned to the inner variable of the double loop.
70+
//
71+
// The lines below will:
72+
// * Filter the combinations according to some mask
73+
// * Define a column delta_phi with the difference in phi between d0 and d0bar phi
74+
// * Define a column delta_eta with the difference in phi between d0 and d0bar eta
75+
// * Do two histograms with delta_phi, delta_eta
76+
auto combinations = o2::analysis::doSelfCombinationsWith(input, "d0", "cand_evtID_ML");
77+
auto deltas = combinations.Filter("d0_cand_type_ML & 0x1 && d0bar_cand_type_ML & 0x1")
78+
.Define("delta_phi", delta, { "d0_phi_cand_ML", "d0bar_phi_cand_ML" })
79+
.Define("delta_eta", delta, { "d0_eta_cand_ML", "d0bar_eta_cand_ML" });
80+
auto h2 = deltas.Histo1D("delta_phi");
81+
auto h3 = deltas.Histo1D("delta_eta");
7482

75-
ROOT::RDataFrame rdf2(std::move(d0d0bar));
76-
auto combinatorics = rdf2.Filter(bothCandFilter, { "d0_cand_type_ML", "d0bar_cand_type_ML" })
77-
.Define("delta_phi", delta, { "d0_phi_cand_ML", "d0bar_phi_cand_ML" })
78-
.Define("delta_eta", delta, { "d0_eta_cand_ML", "d0bar_eta_cand_ML" });
79-
auto h2 = combinatorics.Histo1D("delta_phi");
80-
auto h3 = combinatorics.Histo1D("delta_eta");
81-
82-
std::string rule = RCombinedDSIndexHelpers::combinationRuleAsString(combinationType);
83-
h2->SetName(("DeltaPhi/" + rule).c_str());
84-
h2->Write();
85-
h3->SetName(("DeltaEta/" + rule).c_str());
86-
h3->Write();
87-
}
88-
};
89-
} } }
83+
// FIXME: For the moment we hardcode saving the histograms.
84+
// In reality it should send the results as outputs to a downstream merger
85+
// process which merges them as wished.
86+
TFile f("result.root", "RECREATE");
87+
h1->SetName("InvariantMass");
88+
h1->Write();
89+
h2->SetName("DeltaPhi");
90+
h2->Write();
91+
h3->SetName("DeltaEta");
92+
h3->Write();
93+
}) } }
9094
};
9195
return workflow;
9296
}

0 commit comments

Comments
 (0)