Skip to content

Commit b33c799

Browse files
authored
Adapt DataInputDirector to the new AO2D file format, which includes a subdirectory TF_%d per time frame (#4573)
* This allows to save table data of several Time Frames (TF) in one file. Each TF is saved in a sub folder TF_%d. Each subfolder contains the same trees.
1 parent ca788e1 commit b33c799

4 files changed

Lines changed: 109 additions & 64 deletions

File tree

Framework/Core/include/Framework/DataInputDirector.h

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@ namespace framework
2424
{
2525
using namespace rapidjson;
2626

27+
struct FileNameHolder {
28+
std::string fileName;
29+
int numberOfTimeFrames = 0;
30+
std::vector<std::string> listOfTimeFrameKeys;
31+
};
32+
FileNameHolder* makeFileNameHolder(std::string fileName);
33+
2734
struct DataInputDescriptor {
2835
/// Holds information concerning the reading of an aod table.
2936
/// The information includes the table specification, treename,
@@ -44,31 +51,34 @@ struct DataInputDescriptor {
4451
void setFilenamesRegex(std::string fn) { mFilenameRegex = fn; }
4552
void setFilenamesRegex(std::string* fnptr) { mFilenameRegexPtr = fnptr; }
4653

47-
void setDefaultInputfiles(std::vector<std::string>* difnptr) { mdefaultFilenamesPtr = difnptr; }
54+
void setDefaultInputfiles(std::vector<FileNameHolder*>* difnptr) { mdefaultFilenamesPtr = difnptr; }
4855

49-
void addFilename(std::string fn);
56+
void addFileNameHolder(FileNameHolder* fn);
5057
int fillInputfiles();
5158

5259
// getters
5360
std::string getInputfilesFilename();
5461
std::string getFilenamesRegexString();
5562
std::regex getFilenamesRegex();
5663
int getNumberInputfiles() { return mfilenames.size(); }
64+
int getNumberTimeFrames() { return mtotalNumberTimeFrames; }
65+
66+
std::tuple<TFile*, std::string> getFileFolder(int counter);
5767

58-
TFile* getInputFile(int counter);
5968
void closeInputFile();
60-
std::string getInputFilename(int counter);
6169
bool isAlienSupportOn() { return mAlienSupport; }
6270

6371
private:
6472
std::string minputfilesFile = "";
6573
std::string* minputfilesFilePtr = nullptr;
6674
std::string mFilenameRegex = "";
6775
std::string* mFilenameRegexPtr = nullptr;
68-
std::vector<std::string> mfilenames;
69-
std::vector<std::string>* mdefaultFilenamesPtr = nullptr;
76+
std::vector<FileNameHolder*> mfilenames;
77+
std::vector<FileNameHolder*>* mdefaultFilenamesPtr = nullptr;
7078
TFile* mcurrentFile = nullptr;
7179
bool mAlienSupport = false;
80+
81+
int mtotalNumberTimeFrames = 0;
7282
};
7383

7484
struct DataInputDirector {
@@ -94,7 +104,7 @@ struct DataInputDirector {
94104
// getters
95105
DataInputDescriptor* getDataInputDescriptor(header::DataHeader dh);
96106
std::unique_ptr<TTreeReader> getTreeReader(header::DataHeader dh, int counter, std::string treeName);
97-
std::string getInputFilename(header::DataHeader dh, int counter);
107+
std::tuple<TFile*, std::string> getFileFolder(header::DataHeader dh, int counter);
98108
TTree* getDataTree(header::DataHeader dh, int counter);
99109
int getNumberInputDescriptors() { return mdataInputDescriptors.size(); }
100110

@@ -104,7 +114,7 @@ struct DataInputDirector {
104114
std::string mFilenameRegex;
105115
std::string* const mFilenameRegexPtr = &mFilenameRegex;
106116
DataInputDescriptor* mdefaultDataInputDescriptor = nullptr;
107-
std::vector<std::string> mdefaultInputFiles;
117+
std::vector<FileNameHolder*> mdefaultInputFiles;
108118
std::vector<DataInputDescriptor*> mdataInputDescriptors;
109119

110120
bool mDebugMode = false;

Framework/Core/src/DataInputDirector.cxx

Lines changed: 84 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,48 @@ namespace framework
2424
{
2525
using namespace rapidjson;
2626

27+
FileNameHolder* makeFileNameHolder(std::string fileName)
28+
{
29+
auto fileNameHolder = new FileNameHolder();
30+
fileNameHolder->fileName = fileName;
31+
32+
TFile file = TFile(fileName.c_str(), "R");
33+
if (!file.IsOpen()) {
34+
LOGP(ERROR, "\"{}\" can not be opened.", fileName);
35+
return fileNameHolder;
36+
}
37+
38+
// find TimeFrame folders
39+
std::regex TFRegex = std::regex("TF_[0-9]+");
40+
TList* keyList = file.GetListOfKeys();
41+
for (auto key : *keyList) {
42+
if (std::regex_match(((TObjString*)key)->GetString().Data(), TFRegex)) {
43+
fileNameHolder->listOfTimeFrameKeys.emplace_back(std::string(((TObjString*)key)->GetString().Data()));
44+
}
45+
}
46+
fileNameHolder->numberOfTimeFrames = fileNameHolder->listOfTimeFrameKeys.size();
47+
48+
return fileNameHolder;
49+
}
50+
2751
DataInputDescriptor::DataInputDescriptor(bool alienSupport)
2852
{
2953
mAlienSupport = alienSupport;
3054
}
3155

56+
void DataInputDescriptor::printOut()
57+
{
58+
LOGP(INFO, "DataInputDescriptor");
59+
LOGP(INFO, " Table name : {}", tablename);
60+
LOGP(INFO, " Tree name : {}", treename);
61+
LOGP(INFO, " Input files file : {}", getInputfilesFilename());
62+
LOGP(INFO, " File name regex : {}", getFilenamesRegexString());
63+
LOGP(INFO, " Input files : {}", mfilenames.size());
64+
for (auto fn : mfilenames)
65+
LOGP(INFO, " {} {}", fn->fileName, fn->numberOfTimeFrames);
66+
LOGP(INFO, " Total number of TF: {}", getNumberTimeFrames());
67+
}
68+
3269
std::string DataInputDescriptor::getInputfilesFilename()
3370
{
3471
return (minputfilesFile.empty() && minputfilesFilePtr) ? (std::string)*minputfilesFilePtr : minputfilesFile;
@@ -44,35 +81,52 @@ std::regex DataInputDescriptor::getFilenamesRegex()
4481
return std::regex(getFilenamesRegexString());
4582
}
4683

47-
void DataInputDescriptor::addFilename(std::string fn)
84+
void DataInputDescriptor::addFileNameHolder(FileNameHolder* fn)
4885
{
49-
if (!mAlienSupport && fn.rfind("alien://", 0) == 0) {
86+
if (!mAlienSupport && fn->fileName.rfind("alien://", 0) == 0) {
5087
LOG(debug) << "AliEn file requested. Enabling support.";
5188
TGrid::Connect("alien://");
5289
mAlienSupport = true;
5390
}
91+
92+
mtotalNumberTimeFrames += fn->numberOfTimeFrames;
5493
mfilenames.emplace_back(fn);
5594
}
5695

57-
TFile* DataInputDescriptor::getInputFile(int counter)
96+
std::tuple<TFile*, std::string> DataInputDescriptor::getFileFolder(int counter)
5897
{
59-
if (counter < getNumberInputfiles()) {
98+
std::string filename("");
99+
std::string directoryName("");
100+
101+
int cnt = mfilenames[0]->numberOfTimeFrames;
102+
if (counter >= 0 && counter < getNumberTimeFrames()) {
103+
for (int ii = 0; ii < getNumberInputfiles(); ii++) {
104+
if (counter < cnt) {
105+
filename = mfilenames[ii]->fileName;
106+
cnt -= mfilenames[ii]->numberOfTimeFrames;
107+
directoryName = (mfilenames[ii]->listOfTimeFrameKeys)[counter - cnt];
108+
break;
109+
} else {
110+
cnt += mfilenames[ii + 1]->numberOfTimeFrames;
111+
}
112+
}
113+
60114
if (mcurrentFile) {
61-
if (mcurrentFile->GetName() != mfilenames[counter]) {
115+
if (mcurrentFile->GetName() != filename) {
62116
closeInputFile();
63-
mcurrentFile = TFile::Open(mfilenames[counter].c_str());
117+
mcurrentFile = TFile::Open(filename.c_str());
64118
}
65119
} else {
66-
mcurrentFile = TFile::Open(mfilenames[counter].c_str());
120+
mcurrentFile = TFile::Open(filename.c_str());
67121
}
68122
if (!mcurrentFile) {
69-
throw std::runtime_error(fmt::format("Couldn't open file \"{}\"!", mfilenames[counter]));
123+
throw std::runtime_error(fmt::format("Couldn't open file \"{}\"!", filename));
70124
}
71125
} else {
72126
closeInputFile();
73127
}
74128

75-
return mcurrentFile;
129+
return std::make_tuple(mcurrentFile, directoryName);
76130
}
77131

78132
void DataInputDescriptor::closeInputFile()
@@ -98,7 +152,7 @@ int DataInputDescriptor::fillInputfiles()
98152
while (std::getline(filelist, fileName)) {
99153
if (getFilenamesRegexString().empty() ||
100154
std::regex_match(fileName, getFilenamesRegex())) {
101-
addFilename(fileName);
155+
addFileNameHolder(makeFileNameHolder(fileName));
102156
}
103157
}
104158
} catch (...) {
@@ -108,10 +162,10 @@ int DataInputDescriptor::fillInputfiles()
108162
} else {
109163
// 3. getFilenamesRegex() @ mdefaultFilenamesPtr
110164
if (mdefaultFilenamesPtr) {
111-
for (auto fileName : *mdefaultFilenamesPtr) {
165+
for (auto fileNameHolder : *mdefaultFilenamesPtr) {
112166
if (getFilenamesRegexString().empty() ||
113-
std::regex_match(fileName, getFilenamesRegex())) {
114-
addFilename(fileName);
167+
std::regex_match(fileNameHolder->fileName, getFilenamesRegex())) {
168+
addFileNameHolder(fileNameHolder);
115169
}
116170
}
117171
}
@@ -120,29 +174,6 @@ int DataInputDescriptor::fillInputfiles()
120174
return getNumberInputfiles();
121175
}
122176

123-
std::string DataInputDescriptor::getInputFilename(int counter)
124-
{
125-
std::string filename("");
126-
if (counter >= 0 && counter < getNumberInputfiles()) {
127-
filename = mfilenames[counter];
128-
}
129-
130-
return filename;
131-
}
132-
133-
void DataInputDescriptor::printOut()
134-
{
135-
LOGP(INFO, "DataInputDescriptor");
136-
LOGP(INFO, " Table name : {}", tablename);
137-
LOGP(INFO, " Tree name : {}", treename);
138-
LOGP(INFO, " Input files file : {}", getInputfilesFilename());
139-
LOGP(INFO, " File name regex : {}", getFilenamesRegexString());
140-
LOGP(INFO, " Input files : {}", mfilenames.size());
141-
for (auto fn : mfilenames) {
142-
LOGP(INFO, " {}", fn);
143-
}
144-
}
145-
146177
DataInputDirector::DataInputDirector()
147178
{
148179
createDefaultDataInputDescriptor();
@@ -154,7 +185,7 @@ DataInputDirector::DataInputDirector(std::string inputFile)
154185
inputFile.erase(0, 1);
155186
setInputfilesFile(inputFile);
156187
} else {
157-
mdefaultInputFiles.emplace_back(inputFile);
188+
mdefaultInputFiles.emplace_back(makeFileNameHolder(inputFile));
158189
}
159190

160191
createDefaultDataInputDescriptor();
@@ -163,7 +194,7 @@ DataInputDirector::DataInputDirector(std::string inputFile)
163194
DataInputDirector::DataInputDirector(std::vector<std::string> inputFiles)
164195
{
165196
for (auto inputFile : inputFiles) {
166-
mdefaultInputFiles.emplace_back(inputFile);
197+
mdefaultInputFiles.emplace_back(makeFileNameHolder(inputFile));
167198
}
168199

169200
createDefaultDataInputDescriptor();
@@ -278,13 +309,13 @@ bool DataInputDirector::readJsonDocument(Document* jsonDoc)
278309
setInputfilesFile(fileName);
279310
} else {
280311
setInputfilesFile("");
281-
mdefaultInputFiles.emplace_back(fileName);
312+
mdefaultInputFiles.emplace_back(makeFileNameHolder(fileName));
282313
}
283314
} else if (didirItem[itemName].IsArray()) {
284315
setInputfilesFile("");
285316
auto fns = didirItem[itemName].GetArray();
286317
for (auto& fn : fns) {
287-
mdefaultInputFiles.emplace_back(fn.GetString());
318+
mdefaultInputFiles.emplace_back(makeFileNameHolder(fn.GetString()));
288319
}
289320
} else {
290321
LOGP(ERROR, "Check the JSON document! Item \"{}\" must be a string or an array!", itemName);
@@ -361,15 +392,15 @@ bool DataInputDirector::readJsonDocument(Document* jsonDoc)
361392
} else {
362393
if (didesc->getFilenamesRegexString().empty() ||
363394
std::regex_match(fileName, didesc->getFilenamesRegex())) {
364-
didesc->addFilename(fileName);
395+
didesc->addFileNameHolder(makeFileNameHolder(fileName));
365396
}
366397
}
367398
} else if (didescItem[itemName].IsArray()) {
368399
auto fns = didescItem[itemName].GetArray();
369400
for (auto& fn : fns) {
370401
if (didesc->getFilenamesRegexString().empty() ||
371402
std::regex_match(fn.GetString(), didesc->getFilenamesRegex())) {
372-
didesc->addFilename(fn.GetString());
403+
didesc->addFileNameHolder(makeFileNameHolder(fn.GetString()));
373404
}
374405
}
375406
} else {
@@ -434,8 +465,9 @@ std::unique_ptr<TTreeReader> DataInputDirector::getTreeReader(header::DataHeader
434465
didesc = mdefaultDataInputDescriptor;
435466
}
436467

437-
auto file = didesc->getInputFile(counter);
468+
auto [file, directory] = didesc->getFileFolder(counter);
438469
if (file) {
470+
treename = directory + "/" + treename;
439471
reader = std::make_unique<TTreeReader>(treename.c_str(), file);
440472
if (!reader) {
441473
throw std::runtime_error(fmt::format(R"(Couldn't create TTreeReader for tree "{}" in file "{}")", treename, file->GetName()));
@@ -445,16 +477,16 @@ std::unique_ptr<TTreeReader> DataInputDirector::getTreeReader(header::DataHeader
445477
return reader;
446478
}
447479

448-
std::string DataInputDirector::getInputFilename(header::DataHeader dh, int counter)
480+
std::tuple<TFile*, std::string> DataInputDirector::getFileFolder(header::DataHeader dh, int counter)
449481
{
450482
auto didesc = getDataInputDescriptor(dh);
451483
// if NOT match then use defaultDataInputDescriptor
452484
if (!didesc) {
453485
didesc = mdefaultDataInputDescriptor;
454486
}
455-
auto filename = didesc->getInputFilename(counter);
487+
auto [file, directory] = didesc->getFileFolder(counter);
456488

457-
return filename;
489+
return std::make_tuple(file, directory);
458490
}
459491

460492
TTree* DataInputDirector::getDataTree(header::DataHeader dh, int counter)
@@ -474,8 +506,9 @@ TTree* DataInputDirector::getDataTree(header::DataHeader dh, int counter)
474506
treename = aod::datamodel::getTreeName(dh);
475507
}
476508

477-
auto file = didesc->getInputFile(counter);
509+
auto [file, directory] = didesc->getFileFolder(counter);
478510
if (file) {
511+
treename = directory + "/" + treename;
479512
tree = (TTree*)file->Get(treename.c_str());
480513
if (!tree) {
481514
throw std::runtime_error(fmt::format(R"(Couldn't get TTree "{}" from "{}")", treename, file->GetName()));
@@ -506,9 +539,9 @@ bool DataInputDirector::isValid()
506539

507540
bool DataInputDirector::atEnd(int counter)
508541
{
509-
bool status = mdefaultDataInputDescriptor->getNumberInputfiles() <= counter;
542+
bool status = mdefaultDataInputDescriptor->getNumberTimeFrames() <= counter;
510543
for (auto didesc : mdataInputDescriptors) {
511-
status &= (didesc->getNumberInputfiles() <= counter);
544+
status &= (didesc->getNumberTimeFrames() <= counter);
512545
}
513546

514547
return status;
@@ -520,9 +553,8 @@ void DataInputDirector::printOut()
520553
LOGP(INFO, " Default input files file : {}", minputfilesFile);
521554
LOGP(INFO, " Default file name regex : {}", mFilenameRegex);
522555
LOGP(INFO, " Default file names : {}", mdefaultInputFiles.size());
523-
for (auto const& fn : mdefaultInputFiles) {
524-
LOGP(INFO, " {}", fn);
525-
}
556+
for (auto const& fn : mdefaultInputFiles)
557+
LOGP(INFO, " {} {}", fn->fileName, fn->numberOfTimeFrames);
526558
LOGP(INFO, " Default DataInputDescriptor:");
527559
mdefaultDataInputDescriptor->printOut();
528560
LOGP(INFO, " DataInputDescriptors : {}", getNumberInputDescriptors());

Framework/Core/src/WorkflowHelpers.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ void WorkflowHelpers::injectServiceDevices(WorkflowSpec& workflow, ConfigContext
385385
for (auto ii = 0u; ii < OutputsInputs.size(); ii++) {
386386
if ((outputTypes[ii] & 2) == 2) {
387387

388-
// temporarily also request to be dangling
388+
// is this dangling ?
389389
if ((outputTypes[ii] & 1) == 1) {
390390
outputsInputsAOD.emplace_back(OutputsInputs[ii]);
391391
isdangling.emplace_back((outputTypes[ii] & 1) == 1);

Framework/Core/test/test_DataInputDirector.cxx

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,15 @@ BOOST_AUTO_TEST_CASE(TestDatainputDirector)
5050

5151
DataInputDirector didir1;
5252
BOOST_CHECK(didir1.readJson(jsonFile));
53-
//didir1.printOut(); printf("\n\n");
53+
didir1.printOut(); printf("\n\n");
5454

5555
BOOST_CHECK_EQUAL(didir1.getNumberInputDescriptors(), 2);
5656

5757
auto dh = DataHeader(DataDescription{"DUE"},
5858
DataOrigin{"AOD"},
5959
DataHeader::SubSpecificationType{0});
60-
BOOST_CHECK_EQUAL(didir1.getInputFilename(dh, 1), "Bresults_1.root");
60+
auto [file1, directory1] = didir1.getFileFolder(dh, 1);
61+
//BOOST_CHECK_EQUAL(file1->GetName(), "Bresults_1.root");
6162

6263
auto didesc = didir1.getDataInputDescriptor(dh);
6364
BOOST_CHECK(didesc);
@@ -93,9 +94,11 @@ BOOST_AUTO_TEST_CASE(TestDatainputDirector)
9394
"Bresults_1.root",
9495
"Bresults_2.root"};
9596
DataInputDirector didir2(inputFiles);
97+
didir2.printOut(); printf("\n\n");
9698
BOOST_CHECK(didir2.readJson(jsonFile));
9799

98-
BOOST_CHECK_EQUAL(didir2.getInputFilename(dh, 1), "Bresults_1.root");
100+
auto [file2, directory2] = didir2.getFileFolder(dh, 1);
101+
//BOOST_CHECK_EQUAL(file2->GetName(), "Bresults_1.root");
99102

100103
didesc = didir2.getDataInputDescriptor(dh);
101104
BOOST_CHECK(didesc);

0 commit comments

Comments
 (0)