diff --git a/Framework/AODMerger/CMakeLists.txt b/Framework/AODMerger/CMakeLists.txt new file mode 100644 index 0000000000000..6d344237928b3 --- /dev/null +++ b/Framework/AODMerger/CMakeLists.txt @@ -0,0 +1,20 @@ +# Copyright 2019-2020 CERN and copyright holders of ALICE O2. +# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +# All rights not expressly granted are reserved. +# +# This software is distributed under the terms of the GNU General Public +# License v3 (GPL Version 3), copied verbatim in the file "COPYING". +# +# In applying this license CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization +# or submit itself to any jurisdiction. + +o2_add_executable(merger + COMPONENT_NAME aod + SOURCES src/aodMerger.cxx + PUBLIC_LINK_LIBRARIES ROOT::Core ROOT::Net) + +o2_add_executable(thinner + COMPONENT_NAME aod + SOURCES src/aodThinner.cxx + PUBLIC_LINK_LIBRARIES ROOT::Core ROOT::Net) diff --git a/Framework/AODMerger/src/aodMerger.cxx b/Framework/AODMerger/src/aodMerger.cxx new file mode 100644 index 0000000000000..8edaefb5efb2c --- /dev/null +++ b/Framework/AODMerger/src/aodMerger.cxx @@ -0,0 +1,460 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include +#include +#include +#include + +#include "TSystem.h" +#include "TFile.h" +#include "TTree.h" +#include "TList.h" +#include "TKey.h" +#include "TDirectory.h" +#include "TObjString.h" +#include +#include +#include + +#include "aodMerger.h" + +// AOD merger with correct index rewriting +// No need to know the datamodel because the branch names follow a canonical standard (identified by fIndex) +int main(int argc, char* argv[]) +{ + std::string inputCollection("input.txt"); + std::string outputFileName("AO2D.root"); + long maxDirSize = 100000000; + bool skipNonExistingFiles = false; + bool skipParentFilesList = false; + int verbosity = 2; + int exitCode = 0; // 0: success, >0: failure + + int option_index = 0; + static struct option long_options[] = { + {"input", required_argument, nullptr, 0}, + {"output", required_argument, nullptr, 1}, + {"max-size", required_argument, nullptr, 2}, + {"skip-non-existing-files", no_argument, nullptr, 3}, + {"skip-parent-files-list", no_argument, nullptr, 4}, + {"verbosity", required_argument, nullptr, 5}, + {"help", no_argument, nullptr, 6}, + {nullptr, 0, nullptr, 0}}; + + while (true) { + int c = getopt_long(argc, argv, "", long_options, &option_index); + if (c == -1) { + break; + } else if (c == 0) { + inputCollection = optarg; + } else if (c == 1) { + outputFileName = optarg; + } else if (c == 2) { + maxDirSize = atol(optarg); + } else if (c == 3) { + skipNonExistingFiles = true; + } else if (c == 4) { + skipParentFilesList = true; + } else if (c == 5) { + verbosity = atoi(optarg); + } else if (c == 6) { + printf("AO2D merging tool. Options: \n"); + printf(" --input Contains path to files to be merged. Default: %s\n", inputCollection.c_str()); + printf(" --output Target output ROOT file. Default: %s\n", outputFileName.c_str()); + printf(" --max-size Target directory size. Default: %ld. Set to 0 if file is not self-contained.\n", maxDirSize); + printf(" --skip-non-existing-files Flag to allow skipping of non-existing files in the input list.\n"); + printf(" --skip-parent-files-list Flag to allow skipping the merging of the parent files list.\n"); + printf(" --verbosity Verbosity of output (default: %d).\n", verbosity); + return -1; + } else { + return -2; + } + } + + printf("AOD merger started with:\n"); + printf(" Input file: %s\n", inputCollection.c_str()); + printf(" Output file name: %s\n", outputFileName.c_str()); + printf(" Maximal folder size (uncompressed): %ld\n", maxDirSize); + if (skipNonExistingFiles) { + printf(" WARNING: Skipping non-existing files.\n"); + } + + std::map trees; + std::map sizeCompressed; + std::map sizeUncompressed; + std::map offsets; + std::map unassignedIndexOffset; + + auto outputFile = TFile::Open(outputFileName.c_str(), "RECREATE", "", 501); + TDirectory* outputDir = nullptr; + long currentDirSize = 0; + + std::ifstream in; + in.open(inputCollection); + TString line; + bool connectedToAliEn = false; + TMap* metaData = nullptr; + TMap* parentFiles = nullptr; + int totalMergedDFs = 0; + int mergedDFs = 0; + while (in.good() && exitCode == 0) { + in >> line; + + if (line.Length() == 0) { + continue; + } + + if (line.BeginsWith("alien:") && !connectedToAliEn) { + printf("Connecting to AliEn..."); + TGrid::Connect("alien:"); + connectedToAliEn = true; // Only try once + } + + printf("Processing input file: %s\n", line.Data()); + + auto inputFile = TFile::Open(line); + if (!inputFile) { + printf("Error: Could not open input file %s.\n", line.Data()); + if (skipNonExistingFiles) { + continue; + } else { + printf("Aborting merge!\n"); + exitCode = 1; + break; + } + } + + TList* keyList = inputFile->GetListOfKeys(); + keyList->Sort(); + + for (auto key1 : *keyList) { + if (((TObjString*)key1)->GetString().EqualTo("metaData")) { + auto metaDataCurrentFile = (TMap*)inputFile->Get("metaData"); + if (metaData == nullptr) { + metaData = metaDataCurrentFile; + outputFile->cd(); + metaData->Write("metaData", TObject::kSingleKey); + } else { + for (auto metaDataPair : *metaData) { + auto metaDataKey = ((TPair*)metaDataPair)->Key(); + if (metaDataCurrentFile->Contains(((TObjString*)metaDataKey)->GetString())) { + auto value = (TObjString*)metaData->GetValue(((TObjString*)metaDataKey)->GetString()); + auto valueCurrentFile = (TObjString*)metaDataCurrentFile->GetValue(((TObjString*)metaDataKey)->GetString()); + if (!value->GetString().EqualTo(valueCurrentFile->GetString())) { + printf("WARNING: Metadata differs between input files. Key %s : %s vs. %s\n", ((TObjString*)metaDataKey)->GetString().Data(), + value->GetString().Data(), valueCurrentFile->GetString().Data()); + } + } else { + printf("WARNING: Metadata differs between input files. Key %s is not present in current file\n", ((TObjString*)metaDataKey)->GetString().Data()); + } + } + } + } + + if (((TObjString*)key1)->GetString().EqualTo("parentFiles") && !skipParentFilesList) { + auto parentFilesCurrentFile = (TMap*)inputFile->Get("parentFiles"); + if (parentFiles == nullptr) { + parentFiles = new TMap; + } + for (auto pair : *parentFilesCurrentFile) { + parentFiles->Add(((TPair*)pair)->Key(), ((TPair*)pair)->Value()); + } + delete parentFilesCurrentFile; + } + + if (!((TObjString*)key1)->GetString().BeginsWith("DF_")) { + continue; + } + + auto dfName = ((TObjString*)key1)->GetString().Data(); + + if (verbosity > 0) { + printf(" Processing folder %s\n", dfName); + } + ++mergedDFs; + ++totalMergedDFs; + auto folder = (TDirectoryFile*)inputFile->Get(dfName); + auto treeList = folder->GetListOfKeys(); + + treeList->Sort(); + + // purging keys from duplicates + for (auto i = 0; i < treeList->GetEntries(); ++i) { + TKey* ki = (TKey*)treeList->At(i); + for (int j = i + 1; j < treeList->GetEntries(); ++j) { + TKey* kj = (TKey*)treeList->At(j); + if (std::strcmp(ki->GetName(), kj->GetName()) == 0 && std::strcmp(ki->GetTitle(), kj->GetTitle()) == 0) { + if (ki->GetCycle() < kj->GetCycle()) { + printf(" *** FATAL *** we had ordered the keys, first cycle should be higher, please check"); + exitCode = 5; + } else { + // key is a duplicate, let's remove it + treeList->Remove(kj); + j--; + } + } else { + // we changed key, since they are sorted, we won't have the same anymore + break; + } + } + } + + std::list foundTrees; + + for (auto key2 : *treeList) { + auto treeName = ((TObjString*)key2)->GetString().Data(); + bool found = (std::find(foundTrees.begin(), foundTrees.end(), treeName) != foundTrees.end()); + if (found == true) { + printf(" ***WARNING*** Tree %s was already merged (even if we purged duplicated trees before, so this should not happen), skipping\n", treeName); + continue; + } + foundTrees.push_back(treeName); + + auto inputTree = (TTree*)inputFile->Get(Form("%s/%s", dfName, treeName)); + bool fastCopy = (inputTree->GetTotBytes() > 10000000); // Only do this for large enough trees to avoid that baskets are too small + if (verbosity > 1) { + printf(" Processing tree %s with %lld entries with total size %lld (fast copy: %d)\n", treeName, inputTree->GetEntries(), inputTree->GetTotBytes(), fastCopy); + } + + bool alreadyCopied = false; + if (trees.count(treeName) == 0) { + if (mergedDFs > 1) { + printf(" *** FATAL ***: The tree %s was not in the previous dataframe(s)\n", treeName); + exitCode = 3; + } + + // Connect trees but do not copy entries (using the clone function) unless fast copy is on + // NOTE Basket size etc. are copied in CloneTree() + if (!outputDir) { + outputDir = outputFile->mkdir(dfName); + currentDirSize = 0; + if (verbosity > 0) { + printf("Writing to output folder %s\n", dfName); + } + } + outputDir->cd(); + auto outputTree = inputTree->CloneTree(-1, (fastCopy) ? "fast" : ""); + currentDirSize += inputTree->GetTotBytes(); // NOTE outputTree->GetTotBytes() is 0, so we use the inputTree here + alreadyCopied = true; + outputTree->SetAutoFlush(0); + trees[treeName] = outputTree; + } else { + // adjust addresses tree + trees[treeName]->CopyAddresses(inputTree); + } + + auto outputTree = trees[treeName]; + // register index and connect VLA columns + std::vector> indexList; + std::vector vlaPointers; + std::vector indexPointers; + TObjArray* branches = inputTree->GetListOfBranches(); + for (int i = 0; i < branches->GetEntriesFast(); ++i) { + TBranch* br = (TBranch*)branches->UncheckedAt(i); + TString branchName(br->GetName()); + + // detect VLA + if (((TLeaf*)br->GetListOfLeaves()->First())->GetLeafCount() != nullptr) { + int maximum = ((TLeaf*)br->GetListOfLeaves()->First())->GetLeafCount()->GetMaximum(); + + // get type + static TClass* cls; + EDataType type; + br->GetExpectedType(cls, type); + auto typeSize = TDataType::GetDataType(type)->Size(); + + char* buffer = new char[maximum * typeSize]; + memset(buffer, 0, maximum * typeSize); + vlaPointers.push_back(buffer); + if (verbosity > 2) { + printf(" Allocated VLA buffer of length %d with %d bytes each for branch name %s\n", maximum, typeSize, br->GetName()); + } + inputTree->SetBranchAddress(br->GetName(), buffer); + outputTree->SetBranchAddress(br->GetName(), buffer); + + if (branchName.BeginsWith("fIndexArray")) { + for (int i = 0; i < maximum; i++) { + indexList.push_back({reinterpret_cast(buffer + i * typeSize), offsets[getTableName(branchName, treeName)]}); + } + } + } else if (branchName.BeginsWith("fIndexSlice")) { + int* buffer = new int[2]; + memset(buffer, 0, 2 * sizeof(buffer[0])); + vlaPointers.push_back(reinterpret_cast(buffer)); + + inputTree->SetBranchAddress(br->GetName(), buffer); + outputTree->SetBranchAddress(br->GetName(), buffer); + + indexList.push_back({buffer, offsets[getTableName(branchName, treeName)]}); + indexList.push_back({buffer + 1, offsets[getTableName(branchName, treeName)]}); + } else if (branchName.BeginsWith("fIndex") && !branchName.EndsWith("_size")) { + int* buffer = new int; + *buffer = 0; + indexPointers.push_back(buffer); + + inputTree->SetBranchAddress(br->GetName(), buffer); + outputTree->SetBranchAddress(br->GetName(), buffer); + + indexList.push_back({buffer, offsets[getTableName(branchName, treeName)]}); + } + } + + if (indexList.size() > 0) { + auto entries = inputTree->GetEntries(); + int minIndexOffset = unassignedIndexOffset[treeName]; + auto newMinIndexOffset = minIndexOffset; + for (int i = 0; i < entries; i++) { + for (auto& index : indexList) { + *(index.first) = 0; // Any positive number will do, in any case it will not be filled in the output. Otherwise the previous entry is used and manipulated in the following. + } + inputTree->GetEntry(i); + // shift index columns by offset + for (const auto& idx : indexList) { + // if negative, the index is unassigned. In this case, the different unassigned blocks have to get unique negative IDs + if (*(idx.first) < 0) { + *(idx.first) += minIndexOffset; + newMinIndexOffset = std::min(newMinIndexOffset, *(idx.first)); + } else { + *(idx.first) += idx.second; + } + } + if (!alreadyCopied) { + int nbytes = outputTree->Fill(); + if (nbytes > 0) { + currentDirSize += nbytes; + } + } + } + unassignedIndexOffset[treeName] = newMinIndexOffset; + } else if (!alreadyCopied) { + auto nbytes = outputTree->CopyEntries(inputTree, -1, (fastCopy) ? "fast" : ""); + if (nbytes > 0) { + currentDirSize += nbytes; + } + } + + delete inputTree; + + for (auto& buffer : indexPointers) { + delete buffer; + } + for (auto& buffer : vlaPointers) { + delete[] buffer; + } + } + if (exitCode > 0) { + break; + } + + // check if all trees were present + if (mergedDFs > 1) { + for (auto const& tree : trees) { + bool found = (std::find(foundTrees.begin(), foundTrees.end(), tree.first) != foundTrees.end()); + if (found == false) { + printf(" *** FATAL ***: The tree %s was not in the current dataframe\n", tree.first.c_str()); + exitCode = 4; + } + } + } + + // set to -1 to identify not found tables + for (auto& offset : offsets) { + offset.second = -1; + } + + // update offsets + for (auto const& tree : trees) { + offsets[removeVersionSuffix(tree.first.c_str())] = tree.second->GetEntries(); + } + + // check for not found tables + for (auto& offset : offsets) { + if (offset.second < 0) { + if (maxDirSize > 0) { + // if maxDirSize is 0 then we do not merge DFs and this error is not an error actually (e.g. for not self-contained derived data) + printf("ERROR: Index on %s but no tree found\n", offset.first.c_str()); + } + offset.second = 0; + } + } + + if (currentDirSize > maxDirSize) { + if (verbosity > 0) { + printf("Maximum size reached: %ld. Closing folder %s.\n", currentDirSize, dfName); + } + for (auto const& tree : trees) { + // printf("Writing %s\n", tree.first.c_str()); + outputDir->cd(); + tree.second->Write(); + + // stats + sizeCompressed[tree.first] += tree.second->GetZipBytes(); + sizeUncompressed[tree.first] += tree.second->GetTotBytes(); + + delete tree.second; + } + outputDir = nullptr; + trees.clear(); + offsets.clear(); + mergedDFs = 0; + } + } + inputFile->Close(); + } + + if (parentFiles) { + outputFile->cd(); + parentFiles->Write("parentFiles", TObject::kSingleKey); + } + + for (auto const& tree : trees) { + outputDir->cd(); + tree.second->Write(); + + // stats + sizeCompressed[tree.first] += tree.second->GetZipBytes(); + sizeUncompressed[tree.first] += tree.second->GetTotBytes(); + + delete tree.second; + } + + outputFile->Write(); + outputFile->Close(); + + if (totalMergedDFs == 0) { + printf("ERROR: Did not merge a single DF. This does not seem right.\n"); + exitCode = 2; + } + + // in case of failure, remove the incomplete file + if (exitCode != 0) { + printf("Removing incomplete output file %s.\n", outputFile->GetName()); + gSystem->Unlink(outputFile->GetName()); + } else { + printf("AOD merger finished. Size overview follows:\n"); + + uint64_t totalCompressed = 0; + uint64_t totalUncompressed = 0; + for (auto const& tree : sizeCompressed) { + totalCompressed += tree.second; + totalUncompressed += sizeUncompressed[tree.first]; + } + if (totalCompressed > 0 && totalUncompressed > 0) { + for (auto const& tree : sizeCompressed) { + printf(" Tree %20s | Compressed: %12llu (%2.0f%%) | Uncompressed: %12llu (%2.0f%%)\n", tree.first.c_str(), tree.second, 100.0 * tree.second / totalCompressed, sizeUncompressed[tree.first], 100.0 * sizeUncompressed[tree.first] / totalUncompressed); + } + } + } + printf("\n"); + + return exitCode; +} diff --git a/Framework/AODMerger/src/aodMerger.h b/Framework/AODMerger/src/aodMerger.h new file mode 100644 index 0000000000000..e565272264b7b --- /dev/null +++ b/Framework/AODMerger/src/aodMerger.h @@ -0,0 +1,50 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include + +const char* removeVersionSuffix(const char* treeName) +{ + // remove version suffix, e.g. O2v0_001 becomes O2v0 + static TString tmp; + tmp = treeName; + if (tmp.First("_") >= 0) { + tmp.Remove(tmp.First("_")); + } + return tmp; +} + +const char* getTableName(const char* branchName, const char* treeName) +{ + // Syntax for branchName: + // fIndex[_] + // fIndexArray
[_] + // fIndexSlice
[_] + // if
is empty it is a self index and treeName is used as table name + static TString tableName; + tableName = branchName; + if (tableName.BeginsWith("fIndexArray") || tableName.BeginsWith("fIndexSlice")) { + tableName.Remove(0, 11); + } else { + tableName.Remove(0, 6); + } + if (tableName.First("_") >= 0) { + tableName.Remove(tableName.First("_")); + } + if (tableName.Length() == 0) { + return removeVersionSuffix(treeName); + } + tableName.Remove(tableName.Length() - 1); // remove s + tableName.ToLower(); + tableName = "O2" + tableName; + // printf("%s --> %s\n", branchName, tableName.Data()); + return tableName; +} diff --git a/Framework/AODMerger/src/aodThinner.cxx b/Framework/AODMerger/src/aodThinner.cxx new file mode 100644 index 0000000000000..6ed8463cae3e3 --- /dev/null +++ b/Framework/AODMerger/src/aodThinner.cxx @@ -0,0 +1,343 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include +#include +#include +#include + +#include "TSystem.h" +#include "TFile.h" +#include "TTree.h" +#include "TList.h" +#include "TKey.h" +#include "TDirectory.h" +#include "TObjString.h" +#include +#include +#include + +#include "aodMerger.h" + +// AOD reduction tool +// Designed for the 2022 pp data with specific selections: +// - Remove all TPC only tracks +// - Remove all V0s which refer to any removed track +// - Remove all cascade which refer to any removed V0 +// - Remove all ambiguous track entries which point to a track with collision +// - Adjust all indices +int main(int argc, char* argv[]) +{ + std::string inputFileName("AO2D.root"); + std::string outputFileName("AO2D_thinned.root"); + int exitCode = 0; // 0: success, >0: failure + + int option_index = 1; + static struct option long_options[] = { + {"input", required_argument, nullptr, 0}, + {"output", required_argument, nullptr, 1}, + {"help", no_argument, nullptr, 2}, + {nullptr, 0, nullptr, 0}}; + + while (true) { + int c = getopt_long(argc, argv, "", long_options, &option_index); + if (c == -1) { + break; + } else if (c == 0) { + inputFileName = optarg; + } else if (c == 1) { + outputFileName = optarg; + } else if (c == 2) { + printf("AO2D thinning tool. Options: \n"); + printf(" --input Contains input file path to the file to be thinned. Default: %s\n", inputFileName.c_str()); + printf(" --output Target output ROOT file. Default: %s\n", outputFileName.c_str()); + return -1; + } else { + return -2; + } + } + + printf("AOD reduction started with:\n"); + printf(" Input file: %s\n", inputFileName.c_str()); + printf(" Ouput file name: %s\n", outputFileName.c_str()); + + auto outputFile = TFile::Open(outputFileName.c_str(), "RECREATE", "", 501); + TDirectory* outputDir = nullptr; + + if (inputFileName.find("alien:") == 0) { + printf("Connecting to AliEn..."); + TGrid::Connect("alien:"); + } + + auto inputFile = TFile::Open(inputFileName.c_str()); + if (!inputFile) { + printf("Error: Could not open input file %s.\n", inputFileName.c_str()); + return 1; + } + + TList* keyList = inputFile->GetListOfKeys(); + keyList->Sort(); + + for (auto key1 : *keyList) { + if (((TObjString*)key1)->GetString().EqualTo("metaData")) { + auto metaData = (TMap*)inputFile->Get("metaData"); + outputFile->cd(); + metaData->Write("metaData", TObject::kSingleKey); + } + + if (((TObjString*)key1)->GetString().EqualTo("parentFiles")) { + auto parentFiles = (TMap*)inputFile->Get("parentFiles"); + outputFile->cd(); + parentFiles->Write("parentFiles", TObject::kSingleKey); + } + + if (!((TObjString*)key1)->GetString().BeginsWith("DF_")) { + continue; + } + + auto dfName = ((TObjString*)key1)->GetString().Data(); + + printf(" Processing folder %s\n", dfName); + auto folder = (TDirectoryFile*)inputFile->Get(dfName); + auto treeList = folder->GetListOfKeys(); + treeList->Sort(); + + // purging keys from duplicates + for (auto i = 0; i < treeList->GetEntries(); ++i) { + TKey* ki = (TKey*)treeList->At(i); + for (int j = i + 1; j < treeList->GetEntries(); ++j) { + TKey* kj = (TKey*)treeList->At(j); + if (std::strcmp(ki->GetName(), kj->GetName()) == 0 && std::strcmp(ki->GetTitle(), kj->GetTitle()) == 0) { + if (ki->GetCycle() < kj->GetCycle()) { + printf(" *** FATAL *** we had ordered the keys, first cycle should be higher, please check"); + exitCode = 5; + } else { + // key is a duplicate, let's remove it + treeList->Remove(kj); + j--; + } + } else { + // we changed key, since they are sorted, we won't have the same anymore + break; + } + } + } + + // Certain order needed in order to populate vectors of skipped entries + auto v0Entry = (TObject*)treeList->FindObject("O2v0_001"); + treeList->Remove(v0Entry); + treeList->AddFirst(v0Entry); + + // Prepare maps for track skimming + auto trackExtraTree = (TTree*)inputFile->Get(Form("%s/%s", dfName, "O2trackextra")); // for example we can use this line to access the track table + if (trackExtraTree == nullptr) { + printf("O2trackextra table not found\n"); + exitCode = 6; + break; + } + auto track_iu = (TTree*)inputFile->Get(Form("%s/%s", dfName, "O2track_iu")); + if (track_iu == nullptr) { + printf("O2track_iu table not found\n"); + exitCode = 7; + break; + } + auto v0s = (TTree*)inputFile->Get(Form("%s/%s", dfName, "O2v0_001")); + if (v0s == nullptr) { + printf("O2v0_001 table not found\n"); + exitCode = 8; + break; + } + + std::vector acceptedTracks(trackExtraTree->GetEntries(), -1); + std::vector hasCollision(trackExtraTree->GetEntries(), false); + std::vector keepV0s(v0s->GetEntries(), -1); + + uint8_t tpcNClsFindable = 0; + uint8_t ITSClusterMap = 0; + uint8_t TRDPattern = 0; + float_t TOFChi2 = 0; + // char16_t fTPCNClsFindableMinusFound = 0; + trackExtraTree->SetBranchAddress("fTPCNClsFindable", &tpcNClsFindable); + trackExtraTree->SetBranchAddress("fITSClusterMap", &ITSClusterMap); + trackExtraTree->SetBranchAddress("fTRDPattern", &TRDPattern); + trackExtraTree->SetBranchAddress("fTOFChi2", &TOFChi2); + // trackExtraTree->SetBranchAddress("fTPCNClsFindableMinusFound", &fTPCNClsFindableMinusFound); + + int fIndexCollisions = 0; + track_iu->SetBranchAddress("fIndexCollisions", &fIndexCollisions); + + // loop over all tracks + auto entries = trackExtraTree->GetEntries(); + int counter = 0; + for (int i = 0; i < entries; i++) { + trackExtraTree->GetEntry(i); + track_iu->GetEntry(i); + + // Remove TPC only tracks + if (tpcNClsFindable > 0. && ITSClusterMap == 0 && TRDPattern == 0 && TOFChi2 < -1.) { + counter++; + } else { + acceptedTracks[i] = i - counter; + } + hasCollision[i] = (fIndexCollisions >= 0); + } + + for (auto key2 : *treeList) { + auto treeName = ((TObjString*)key2)->GetString().Data(); + + auto inputTree = (TTree*)inputFile->Get(Form("%s/%s", dfName, treeName)); + printf(" Processing tree %s with %lld entries with total size %lld\n", treeName, inputTree->GetEntries(), inputTree->GetTotBytes()); + + // Connect trees but do not copy entries (using the clone function) + // NOTE Basket size etc. are copied in CloneTree() + if (!outputDir) { + outputDir = outputFile->mkdir(dfName); + printf("Writing to output folder %s\n", dfName); + } + outputDir->cd(); + auto outputTree = inputTree->CloneTree(0); + outputTree->SetAutoFlush(0); + + std::vector indexList; + std::vector vlaPointers; + std::vector indexPointers; + TObjArray* branches = inputTree->GetListOfBranches(); + for (int i = 0; i < branches->GetEntriesFast(); ++i) { + TBranch* br = (TBranch*)branches->UncheckedAt(i); + TString branchName(br->GetName()); + TString tableName(getTableName(branchName, treeName)); + // register index of track index ONLY + if (!tableName.EqualTo("O2track")) { + continue; + } + // detect VLA + if (((TLeaf*)br->GetListOfLeaves()->First())->GetLeafCount() != nullptr) { + printf(" *** FATAL ***: VLA detection is not supported\n"); + exitCode = 9; + } else if (branchName.BeginsWith("fIndexSlice")) { + int* buffer = new int[2]; + memset(buffer, 0, 2 * sizeof(buffer[0])); + vlaPointers.push_back(reinterpret_cast(buffer)); + inputTree->SetBranchAddress(br->GetName(), buffer); + outputTree->SetBranchAddress(br->GetName(), buffer); + + indexList.push_back(buffer); + indexList.push_back(buffer + 1); + } else if (branchName.BeginsWith("fIndex") && !branchName.EndsWith("_size")) { + int* buffer = new int; + *buffer = 0; + indexPointers.push_back(buffer); + + inputTree->SetBranchAddress(br->GetName(), buffer); + outputTree->SetBranchAddress(br->GetName(), buffer); + + indexList.push_back(buffer); + } + } + + bool processingTracks = memcmp(treeName, "O2track", 7) == 0; // matches any of the track tables + bool processingCascades = memcmp(treeName, "O2cascade", 9) == 0; + bool processingV0s = memcmp(treeName, "O2v0", 4) == 0; + bool processingAmbiguousTracks = memcmp(treeName, "O2ambiguoustrack", 16) == 0; + + auto indexV0s = -1; + if (processingCascades) { + inputTree->SetBranchAddress("fIndexV0s", &indexV0s); + outputTree->SetBranchAddress("fIndexV0s", &indexV0s); + } + + auto entries = inputTree->GetEntries(); + for (int i = 0; i < entries; i++) { + inputTree->GetEntry(i); + bool fillThisEntry = true; + // Special case for Tracks, TracksExtra, TracksCov + if (processingTracks) { + if (acceptedTracks[i] < 0) { + fillThisEntry = false; + } + } else { + // Other table than Tracks* --> reassign indices to Tracks + for (const auto& idx : indexList) { + int oldTrackIndex = *idx; + + // if negative, the index is unassigned. + if (oldTrackIndex >= 0) { + if (acceptedTracks[oldTrackIndex] < 0) { + fillThisEntry = false; + } else { + *idx = acceptedTracks[oldTrackIndex]; + } + } + } + } + + // Reassign v0 index of cascades + if (processingCascades) { + if (keepV0s[indexV0s] < 0) { + fillThisEntry = false; + } else { + indexV0s = keepV0s[indexV0s]; + } + } + + // Keep only tracks which have no collision, see O2-3601 + if (processingAmbiguousTracks) { + if (hasCollision[i]) { + fillThisEntry = false; + } + } + + if (fillThisEntry) { + outputTree->Fill(); + if (processingV0s) { + keepV0s[i] = outputTree->GetEntries() - 1; + } + } + } + + if (entries != outputTree->GetEntries()) { + printf(" Reduced from %lld to %lld entries\n", entries, outputTree->GetEntries()); + } + + delete inputTree; + + for (auto& buffer : indexPointers) { + delete buffer; + } + for (auto& buffer : vlaPointers) { + delete[] buffer; + } + + outputDir->cd(); + outputTree->Write(); + delete outputTree; + } + if (exitCode > 0) { + break; + } + + outputDir = nullptr; + } + inputFile->Close(); + + outputFile->Write(); + outputFile->Close(); + + // in case of failure, remove the incomplete file + if (exitCode != 0) { + printf("Removing incomplete output file %s.\n", outputFile->GetName()); + gSystem->Unlink(outputFile->GetName()); + } + + printf("End of AOD thinning.\n"); + + return exitCode; +} diff --git a/Framework/CMakeLists.txt b/Framework/CMakeLists.txt index 5107581fb6d87..a007d6292f238 100644 --- a/Framework/CMakeLists.txt +++ b/Framework/CMakeLists.txt @@ -17,7 +17,7 @@ add_subdirectory(Core) add_subdirectory(Utils) -add_subdirectory(AnalysisSupport) +add_subdirectory(AnalysisSupport) add_subdirectory(CCDBSupport) add_subdirectory(PhysicsSupport) add_subdirectory(DataTakingSupport) @@ -31,3 +31,5 @@ add_subdirectory(GUISupport) endif() add_subdirectory(TestWorkflows) + +add_subdirectory(AODMerger)