From a917b6b5c387e4a6ec2a232b317d5e7468b99091 Mon Sep 17 00:00:00 2001 From: shahoian Date: Fri, 9 May 2025 19:56:48 +0200 Subject: [PATCH 0001/1426] Add TPC occupancy in 10bins/drift in trackStudy --- .../study/src/TrackingStudy.cxx | 76 ++++++++++++------- 1 file changed, 47 insertions(+), 29 deletions(-) diff --git a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx index f206c43f7f57a..a74349bdeba15 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx @@ -95,7 +95,7 @@ class TrackingStudySpec : public Task std::unique_ptr mDBGOut; std::unique_ptr mDBGOutVtx; std::unique_ptr mTPCRefitter; ///< TPC refitter used for TPC tracks refit during the reconstruction - std::vector mTBinClOccAft, mTBinClOccBef, mTBinClOccWgh; ///< TPC occupancy histo: i-th entry is the integrated occupancy for ~1 orbit starting/preceding from the TB = i*mNTPCOccBinLength + std::vector mMltHistTB, mTBinClOccAft, mTBinClOccBef, mTBinClOccWgh; ///< TPC occupancy histo: i-th entry is the integrated occupancy for ~1 orbit starting/preceding from the TB = i*mNTPCOccBinLength std::unique_ptr mOccWghFun; float mITSROFrameLengthMUS = 0.f; float mTPCTBinMUS = 0.f; // TPC bin in microseconds @@ -107,6 +107,7 @@ class TrackingStudySpec : public Task float mMinX = 46.; float mMaxEta = 0.8; float mMinPt = 0.1; + int mNOccBinsDrift = 10; int mMinTPCClusters = 60; int mNTPCOccBinLength = 0; ///< TPC occ. histo bin length in TBs int mNHBPerTF = 0; @@ -142,6 +143,10 @@ void TrackingStudySpec::init(InitContext& ic) mDCAYFormula = ic.options().get("dcay-vs-pt"); mDCAZFormula = ic.options().get("dcaz-vs-pt"); mDoPairsCorr = ic.options().get("pair-correlations"); + mNOccBinsDrift = ic.options().get("noccbins"); + if (mNOccBinsDrift < 3) { + mNOccBinsDrift = 3; + } auto str = ic.options().get("occ-weight-fun"); if (!str.empty()) { mOccWghFun = std::make_unique("occFun", str.c_str(), -100., 100.); @@ -172,42 +177,23 @@ void TrackingStudySpec::run(ProcessingContext& pc) mTBinClOccAft.resize(nTPCOccBins); mTBinClOccBef.resize(nTPCOccBins); float sm = 0., tb = 0.5 * mNTPCOccBinLength; - /* // at the moment not used - if (mOccWghFun) { - mTBinClOccWgh.resize(nTPCBins); - float occBin2MUS = 8 * o2::constants::lhc::LHCBunchSpacingMUS; - int covWghTB = TMath::NInt(100./occBin2MUS); // coverage of weighted occ. in TBins - for (int i = 0; i < nTPCBins; i++) { - sm = 0.; - for (int j=-covWghTB;j=nTPCBins) { - continue; - } - sm += mOccWghFun->Eval(j*occBin2MUS)*mTPCRefitter->getParam()->GetUnscaledMult(j+i); - } - mTBinClOccWgh[i] = sm; - } - } else { - mTBinClOccWgh.resize(1); - } - */ - std::vector mltHistTB(nTPCOccBins); + mMltHistTB.resize(nTPCOccBins); for (int i = 0; i < nTPCOccBins; i++) { - mltHistTB[i] = mTPCRefitter->getParam()->GetUnscaledMult(tb); + mMltHistTB[i] = mTPCRefitter->getParam()->GetUnscaledMult(tb); tb += mNTPCOccBinLength; } for (int i = nTPCOccBins; i--;) { - sm += mltHistTB[i]; + sm += mMltHistTB[i]; if (i + sumBins < nTPCOccBins) { - sm -= mltHistTB[i + sumBins]; + sm -= mMltHistTB[i + sumBins]; } mTBinClOccAft[i] = sm; } sm = 0; for (int i = 0; i < nTPCOccBins; i++) { - sm += mltHistTB[i]; + sm += mMltHistTB[i]; if (i - sumBins > 0) { - sm -= mltHistTB[i - sumBins]; + sm -= mMltHistTB[i - sumBins]; } mTBinClOccBef[i] = sm; } @@ -271,13 +257,17 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) o2::dataformats::PrimaryVertexExt pveDummy; o2::dataformats::PrimaryVertexExt vtxDummy(mMeanVtx.getPos(), {}, {}, 0); std::vector pveVec(nv); + std::vector tpcOccAftV, tpcOccBefV; pveVec.back() = vtxDummy; const auto& alpParams = o2::itsmft::DPLAlpideParam::Instance(); float tBiasITS = alpParams.roFrameBiasInBC * o2::constants::lhc::LHCBunchSpacingMUS; const o2::ft0::InteractionTag& ft0Params = o2::ft0::InteractionTag::Instance(); std::vector trcExtVec; std::vector trcPairsVec; - auto vdrit = mTPCVDriftHelper.getVDriftObject().getVDrift(); + auto vdrift = mTPCVDriftHelper.getVDriftObject().getVDrift(); + float maxDriftTB = 250.f / vdrift / (o2::constants::lhc::LHCBunchSpacingMUS * 8); + int groupOcc = std::ceil(maxDriftTB / mNOccBinsDrift / mNTPCOccBinLength); + bool tpcTrackOK = recoData.isTrackSourceLoaded(GTrackID::TPC); auto fillTPCClInfo = [&recoData, this](const o2::tpc::TrackTPC& trc, o2::dataformats::TrackInfoExt& trExt, float timestampTB = -1e9) { @@ -391,6 +381,8 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) } } }; + tpcOccAftV.resize(mNOccBinsDrift); + tpcOccBefV.resize(mNOccBinsDrift); for (int iv = 0; iv < nv; iv++) { LOGP(debug, "processing PV {} of {}", iv, nv); @@ -455,7 +447,7 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) continue; } if (iv < nv - 1 && is == GTrackID::TPC && tpcTr && !tpcTr->hasBothSidesClusters()) { // for unconstrained TPC tracks correct track Z - float corz = vdrit * (tpcTr->getTime0() * mTPCTBinMUS - pvvec[iv].getTimeStamp().getTimeStamp()); + float corz = vdrift * (tpcTr->getTime0() * mTPCTBinMUS - pvvec[iv].getTimeStamp().getTimeStamp()); if (tpcTr->hasASideClustersOnly()) { corz = -corz; // A-side } @@ -500,7 +492,7 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) } else { o2::track::TrackParCov tmpTPC(*tpcTr); if (iv < nv - 1 && is == GTrackID::TPC && tpcTr && !tpcTr->hasBothSidesClusters()) { // for unconstrained TPC tracks correct track Z - float corz = vdrit * (tpcTr->getTime0() * mTPCTBinMUS - pvvec[iv].getTimeStamp().getTimeStamp()); + float corz = vdrift * (tpcTr->getTime0() * mTPCTBinMUS - pvvec[iv].getTimeStamp().getTimeStamp()); if (tpcTr->hasASideClustersOnly()) { corz = -corz; // A-side } @@ -554,10 +546,35 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) int tb = pveVec[iv].getTimeStamp().getTimeStamp() * mTPCTBinMUSInv * mNTPCOccBinLengthInv; tpcOccBef = tb < 0 ? mTBinClOccBef[0] : (tb >= mTBinClOccBef.size() ? mTBinClOccBef.back() : mTBinClOccBef[tb]); tpcOccAft = tb < 0 ? mTBinClOccAft[0] : (tb >= mTBinClOccAft.size() ? mTBinClOccAft.back() : mTBinClOccAft[tb]); + int tbc = pveVec[iv].getTimeStamp().getTimeStamp() * mTPCTBinMUSInv * mNTPCOccBinLengthInv - groupOcc / 2.; + for (int iob = 0; iob < mNOccBinsDrift; iob++) { + float sm = 0; + for (int ig = 0; ig < groupOcc; ig++) { + int ocb = tbc + ig + groupOcc * iob; + if (ocb < 0 || ocb >= (int)mMltHistTB.size()) { + sm = -1; + break; + } + sm += mMltHistTB[ocb]; + } + tpcOccAftV[iob] = sm; + // + sm = 0; + for (int ig = 0; ig < groupOcc; ig++) { + int ocb = tbc + ig - groupOcc * iob; + if (ocb < 0 || ocb >= (int)mMltHistTB.size()) { + sm = -1; + break; + } + sm += mMltHistTB[ocb]; + } + tpcOccBefV[iob] = sm; + } } (*mDBGOut) << "trpv" << "orbit=" << recoData.startIR.orbit << "tfID=" << TFCount << "tpcOccBef=" << tpcOccBef << "tpcOccAft=" << tpcOccAft + << "tpcOccBefV=" << tpcOccBefV << "tpcOccAftV=" << tpcOccAftV << "pve=" << pveVec[iv] << "trc=" << trcExtVec << "\n"; if (mDoPairsCorr) { @@ -752,6 +769,7 @@ DataProcessorSpec getTrackingStudySpec(GTrackID::mask_t srcTracks, GTrackID::mas {"with-its-only", VariantType::Bool, false, {"Store tracks with ITS only"}}, {"pair-correlations", VariantType::Bool, false, {"Do pairs correlation"}}, {"occ-weight-fun", VariantType::String, "(x>=-40&&x<-5) ? (1./1225*pow(x+40,2)) : ((x>-5&&x<15) ? 1. : ((x>=15&&x<40) ? (-0.4/25*x+1.24 ) : ( (x>40&&x<100) ? -0.4/60*x+0.6+0.8/3 : 0)))", {"Occupancy weighting f-n vs time in musec"}}, + {"noccbins", VariantType::Int, 10, {"Number of occupancy bins per full drift time"}}, {"min-x-prop", VariantType::Float, 100.f, {"track should be propagated to this X at least"}}, }; o2::tpc::VDriftHelper::requestCCDBInputs(dataRequest->inputs); From 1bcf367115a918253404a92a9537311646c895e1 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Sun, 11 May 2025 18:47:32 +0200 Subject: [PATCH 0002/1426] DPL: fix rate limiting handling (#14255) On success, FairMQ returns a positive number of bytes, not 0. --- Framework/Core/src/CommonDataProcessors.cxx | 26 ++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/Framework/Core/src/CommonDataProcessors.cxx b/Framework/Core/src/CommonDataProcessors.cxx index 737e1b7e635c8..c2431b3ab068d 100644 --- a/Framework/Core/src/CommonDataProcessors.cxx +++ b/Framework/Core/src/CommonDataProcessors.cxx @@ -30,9 +30,11 @@ #include "Framework/RuntimeError.h" #include "Framework/RateLimiter.h" #include "Framework/PluginManager.h" +#include "Framework/Signpost.h" #include #include +#include #include #include #include @@ -40,6 +42,9 @@ using namespace o2::framework::data_matcher; +// Special log to track callbacks we know about +O2_DECLARE_DYNAMIC_LOG(callbacks); + namespace o2::framework { @@ -145,6 +150,10 @@ DataProcessorSpec CommonDataProcessors::getGlobalFairMQSink(std::vectordata; auto& timesliceIndex = services->get(); @@ -152,20 +161,35 @@ void retryMetricCallback(uv_async_t* async) auto channel = device->GetChannels().find("metric-feedback"); auto oldestPossingTimeslice = timesliceIndex.getOldestPossibleOutput().timeslice.value; if (channel == device->GetChannels().end()) { + O2_SIGNPOST_EVENT_EMIT(callbacks, cid, "rate-limiting", "Could not find metric-feedback channel."); return; } fair::mq::MessagePtr payload(device->NewMessage()); payload->Rebuild(&oldestPossingTimeslice, sizeof(int64_t), nullptr, nullptr); auto consumed = oldestPossingTimeslice; + size_t start = uv_hrtime(); int64_t result = channel->second[0].Send(payload, 100); + size_t stop = uv_hrtime(); // If the sending worked, we do not retry. - if (result != 0) { + if (result <= 0) { + // Forcefully slow down in case FairMQ returns earlier than expected... + int64_t ellapsed = (stop - start) / 1000000; + if (ellapsed < 100) { + O2_SIGNPOST_EVENT_EMIT(callbacks, cid, "rate-limiting", + "FairMQ returned %llu earlier than expected. Sleeping %llu ms more before, retrying.", + result, ellapsed); + uv_sleep(100 - ellapsed); + } else { + O2_SIGNPOST_EVENT_EMIT(callbacks, cid, "rate-limiting", + "FairMQ returned %llu, unable to send last consumed timeslice to source for %llu ms, retrying.", result, ellapsed); + } // If the sending did not work, we keep trying until it actually works. // This will schedule other tasks in the queue, so the processing of the // data will still happen. uv_async_send(async); } else { + O2_SIGNPOST_EVENT_EMIT(callbacks, cid, "rate-limiting", "Send %llu bytes, Last timeslice now set to %zu.", result, consumed); lastTimeslice = consumed; } } From f926be7e0b3e05ddce8e040f264b3eadf25a5a84 Mon Sep 17 00:00:00 2001 From: shahoian Date: Sun, 11 May 2025 15:37:02 +0200 Subject: [PATCH 0003/1426] Fix typo in the RecoContainer::getTrackTimeTPCTRD Thanks for Felix for spotting --- DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx b/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx index 39cc05d8a69e7..d4b4e2b89cbb0 100644 --- a/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx +++ b/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx @@ -1594,7 +1594,7 @@ void RecoContainer::getTrackTimeITSTPCTRD(GTrackID gid, float& t, float& tErr) c //________________________________________________________ void RecoContainer::getTrackTimeTPCTRD(GTrackID gid, float& t, float& tErr) const { - const auto trigTPCTRD = getITSTPCTRDTriggers(); + const auto trigTPCTRD = getTPCTRDTriggers(); // very slow: find the trigger this track belongs to for (const auto& trig : trigTPCTRD) { if (trig.getTrackRefs().getEntriesBound() > gid.getIndex()) { From 04ba3bd18306dd47a245d32f6556eec86d546a4b Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 12 May 2025 10:46:51 +0200 Subject: [PATCH 0004/1426] GPU: Fix performance regression: DETERMINISTIC CODE was used unintentionally in 2 places --- GPU/Common/GPUCommonAlgorithm.h | 37 +++++++++---------- .../Base/cuda/GPUReconstructionCUDArtc.cu | 5 ++- .../GPUTPCCompressionKernels.cxx | 24 ++++++------ 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h index d0643391246a8..8cd53ec5e0609 100644 --- a/GPU/Common/GPUCommonAlgorithm.h +++ b/GPU/Common/GPUCommonAlgorithm.h @@ -282,30 +282,27 @@ GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end, const S& comp) { #ifndef GPUCA_GPUCODE GPUCommonAlgorithm::sort(begin, end, comp); +#elif defined(GPUCA_DETERMINISTIC_MODE) // Not using GPUCA_DETERMINISTIC_CODE, which is enforced in TPC compression + if (get_local_id(0) == 0) { + GPUCommonAlgorithm::sort(begin, end, comp); + } + GPUbarrier(); #else - GPUCA_DETERMINISTIC_CODE( // clang-format off - GPUbarrier(); - if (get_local_id(0) == 0) { - GPUCommonAlgorithm::sort(begin, end, comp); - } - GPUbarrier(); - , // !GPUCA_DETERMINISTIC_CODE - int32_t n = end - begin; - for (int32_t i = 0; i < n; i++) { - for (int32_t tIdx = get_local_id(0); tIdx < n; tIdx += get_local_size(0)) { - int32_t offset = i % 2; - int32_t curPos = 2 * tIdx + offset; - int32_t nextPos = curPos + 1; - - if (nextPos < n) { - if (!comp(begin[curPos], begin[nextPos])) { - IterSwap(&begin[curPos], &begin[nextPos]); - } + int32_t n = end - begin; + for (int32_t i = 0; i < n; i++) { + for (int32_t tIdx = get_local_id(0); tIdx < n; tIdx += get_local_size(0)) { + int32_t offset = i % 2; + int32_t curPos = 2 * tIdx + offset; + int32_t nextPos = curPos + 1; + + if (nextPos < n) { + if (!comp(begin[curPos], begin[nextPos])) { + IterSwap(&begin[curPos], &begin[nextPos]); } } - GPUbarrier(); } - ) // clang-format on + GPUbarrier(); + } #endif } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu index 50a568ab345cf..805397c9b430e 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu @@ -14,9 +14,12 @@ #define GPUCA_GPUCODE_GENRTC #define GPUCA_GPUCODE_COMPILEKERNELS + +// Keep some preprocessor calls unprocessed #define GPUCA_RTC_SPECIAL_CODE(...) GPUCA_RTC_SPECIAL_CODE(__VA_ARGS__) #define GPUCA_DETERMINISTIC_CODE(...) GPUCA_DETERMINISTIC_CODE(__VA_ARGS__) -// GPUReconstructionCUDAIncludesSystem.h prependended without preprocessor running + +// GPUReconstructionCUDAIncludesSystem.h prependended by CMakewithout preprocessor running #include "GPUReconstructionCUDADef.h" #include "GPUReconstructionIncludesDeviceAll.h" diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index 3b88c8764d0fd..bba97e9eace9b 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -273,19 +273,19 @@ GPUdii() void GPUTPCCompressionKernels::Thread(clusters->clusters[iSector][iRow])); +#else // GPUCA_DETERMINISTIC_MODE + if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZPadTime) { CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - , // !GPUCA_DETERMINISTIC_CODE - if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZPadTime) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortPad) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortTime) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - } - ) // clang-format on + } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) { + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); + } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortPad) { + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); + } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortTime) { + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); + } +#endif // GPUCA_DETERMINISTIC_MODE GPUbarrier(); } From ea6e536b48052186c6bd263cd9df1e83e3f16ce7 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Mon, 12 May 2025 20:49:42 +0200 Subject: [PATCH 0005/1426] DPL: do not create unneeded statics (#14261) --- .../Core/include/Framework/TableBuilder.h | 28 ++++++++----------- Framework/Core/src/TableBuilder.cxx | 4 +-- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/Framework/Core/include/Framework/TableBuilder.h b/Framework/Core/include/Framework/TableBuilder.h index 32fe78b852eff..936a8a04d5a5a 100644 --- a/Framework/Core/include/Framework/TableBuilder.h +++ b/Framework/Core/include/Framework/TableBuilder.h @@ -855,7 +855,7 @@ auto makeEmptyTable(const char* name, framework::pack p) } std::shared_ptr spawnerHelper(std::shared_ptr const& fullTable, std::shared_ptr newSchema, size_t nColumns, - expressions::Projector* projectors, std::vector> const& fields, const char* name, std::shared_ptr& projector); + expressions::Projector* projectors, const char* name, std::shared_ptr& projector); /// Expression-based column generator to materialize columns template @@ -867,10 +867,9 @@ auto spawner(std::vector>&& tables, const char* na if (fullTable->num_rows() == 0) { return makeEmptyTable(name, placeholders_pack_t{}); } - static auto fields = o2::soa::createFieldsFromColumns(placeholders_pack_t{}); - static auto new_schema = std::make_shared(fields); + static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(placeholders_pack_t{})); - return spawnerHelper(fullTable, new_schema, framework::pack_size(placeholders_pack_t{}), projectors, fields, name, projector); + return spawnerHelper(fullTable, new_schema, framework::pack_size(placeholders_pack_t{}), projectors, name, projector); } template @@ -881,10 +880,9 @@ auto spawner(std::shared_ptr const& fullTable, const char* name, o if (fullTable->num_rows() == 0) { return makeEmptyTable(name, placeholders_pack_t{}); } - static auto fields = o2::soa::createFieldsFromColumns(placeholders_pack_t{}); - static auto new_schema = std::make_shared(fields); + static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(placeholders_pack_t{})); - return spawnerHelper(fullTable, new_schema, framework::pack_size(placeholders_pack_t{}), projectors, fields, name, projector); + return spawnerHelper(fullTable, new_schema, framework::pack_size(placeholders_pack_t{}), projectors, name, projector); } template @@ -896,15 +894,15 @@ auto spawner(std::vector>&& tables, const char* na if (fullTable->num_rows() == 0) { return makeEmptyTable(name, expression_pack_t{}); } - static auto fields = o2::soa::createFieldsFromColumns(expression_pack_t{}); - static auto new_schema = std::make_shared(fields); + static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(expression_pack_t{})); + auto projectors = [](framework::pack) -> std::array { return {{std::move(C::Projector())...}}; } (expression_pack_t{}); - return spawnerHelper(fullTable, new_schema, framework::pack_size(expression_pack_t{}), projectors.data(), fields, name, projector); + return spawnerHelper(fullTable, new_schema, framework::pack_size(expression_pack_t{}), projectors.data(), name, projector); } template @@ -915,15 +913,14 @@ auto spawner(std::shared_ptr const& fullTable, const char* name, s if (fullTable->num_rows() == 0) { return makeEmptyTable(name, expression_pack_t{}); } - static auto fields = o2::soa::createFieldsFromColumns(expression_pack_t{}); - static auto new_schema = std::make_shared(fields); + static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(expression_pack_t{})); auto projectors = [](framework::pack) -> std::array { return {{std::move(C::Projector())...}}; } (expression_pack_t{}); - return spawnerHelper(fullTable, new_schema, framework::pack_size(expression_pack_t{}), projectors.data(), fields, name, projector); + return spawnerHelper(fullTable, new_schema, framework::pack_size(expression_pack_t{}), projectors.data(), name, projector); } template @@ -933,10 +930,9 @@ auto spawner(framework::pack columns, std::vectornum_rows() == 0) { return makeEmptyTable(name, framework::pack{}); } - static auto fields = o2::soa::createFieldsFromColumns(columns); - static auto new_schema = std::make_shared(fields); + static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(columns)); std::array projectors{{std::move(C::Projector())...}}; - return spawnerHelper(fullTable, new_schema, sizeof...(C), projectors.data(), fields, name, projector); + return spawnerHelper(fullTable, new_schema, sizeof...(C), projectors.data(), name, projector); } template diff --git a/Framework/Core/src/TableBuilder.cxx b/Framework/Core/src/TableBuilder.cxx index d9827559c2148..eb19f8d3fe642 100644 --- a/Framework/Core/src/TableBuilder.cxx +++ b/Framework/Core/src/TableBuilder.cxx @@ -85,11 +85,11 @@ void TableBuilder::setLabel(const char* label) } std::shared_ptr spawnerHelper(std::shared_ptr const& fullTable, std::shared_ptr newSchema, size_t nColumns, - expressions::Projector* projectors, std::vector> const& fields, const char* name, + expressions::Projector* projectors, const char* name, std::shared_ptr& projector) { if (projector == nullptr) { - projector = framework::expressions::createProjectorHelper(nColumns, projectors, fullTable->schema(), fields); + projector = framework::expressions::createProjectorHelper(nColumns, projectors, fullTable->schema(), newSchema->fields()); } arrow::TableBatchReader reader(*fullTable); From 240812f69d5d2cb39cfc5b30ce9ef63a3254f394 Mon Sep 17 00:00:00 2001 From: shahoian Date: Mon, 12 May 2025 15:04:30 +0200 Subject: [PATCH 0006/1426] add TPC chi2 to trackstudy output --- .../study/include/GlobalTrackingStudy/TrackInfoExt.h | 3 ++- Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TrackInfoExt.h b/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TrackInfoExt.h index 935e57873bbd9..26eeea858d14b 100644 --- a/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TrackInfoExt.h +++ b/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TrackInfoExt.h @@ -35,6 +35,7 @@ struct TrackInfoExt { float ttime = 0; float ttimeE = 0; float xmin = 0; + float chi2TPC = 0.f; float chi2ITSTPC = 0.f; float q2ptITS = 0.f; float q2ptTPC = 0.f; @@ -56,7 +57,7 @@ struct TrackInfoExt { float getTPCInY0() const { return innerTPCPos0[1]; } float getTPCInZ0() const { return innerTPCPos0[2]; } - ClassDefNV(TrackInfoExt, 5); + ClassDefNV(TrackInfoExt, 6); }; } // namespace dataformats diff --git a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx index a74349bdeba15..a2bf6abd35fef 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx @@ -503,6 +503,7 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) } } fillTPCClInfo(*tpcTr, trcExt, tsuse); + trcExt.chi2TPC = tpcTr->getChi2(); } auto gidRefs = recoData.getSingleDetectorRefs(vid); if (gidRefs[GTrackID::ITS].isIndexSet()) { From 751bb12f5b93dd6612ad15f470eec418db2800bb Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 13 May 2025 08:42:44 +0200 Subject: [PATCH 0007/1426] Move JSON parsing out of line (#14264) --- Framework/Core/CMakeLists.txt | 1 + .../include/Framework/VariantJSONHelpers.h | 430 +--------------- Framework/Core/src/VariantJSONHelpers.cxx | 464 ++++++++++++++++++ 3 files changed, 468 insertions(+), 427 deletions(-) create mode 100644 Framework/Core/src/VariantJSONHelpers.cxx diff --git a/Framework/Core/CMakeLists.txt b/Framework/Core/CMakeLists.txt index 2691d9d33a0c6..17320348d9272 100644 --- a/Framework/Core/CMakeLists.txt +++ b/Framework/Core/CMakeLists.txt @@ -140,6 +140,7 @@ o2_add_library(Framework src/Task.cxx src/Array2D.cxx src/Variant.cxx + src/VariantJSONHelpers.cxx src/VariantPropertyTreeHelpers.cxx src/WorkflowCustomizationHelpers.cxx src/WorkflowHelpers.cxx diff --git a/Framework/Core/include/Framework/VariantJSONHelpers.h b/Framework/Core/include/Framework/VariantJSONHelpers.h index eab78d547ca13..811e6f13d4985 100644 --- a/Framework/Core/include/Framework/VariantJSONHelpers.h +++ b/Framework/Core/include/Framework/VariantJSONHelpers.h @@ -19,439 +19,15 @@ #include #include -#include -#include -#include +#include namespace o2::framework { -namespace -{ -template -struct VariantReader : public rapidjson::BaseReaderHandler, VariantReader> { - using Ch = rapidjson::UTF8<>::Ch; - using SizeType = rapidjson::SizeType; - - enum struct State { - IN_START, - IN_STOP, - IN_DATA, - IN_KEY, - IN_ARRAY, - IN_ROW, - IN_ERROR - }; - - VariantReader() - : states{}, - rows{0}, - cols{0} - { - debug << "Start" << std::endl; - states.push(State::IN_START); - } - - bool Null() - { - debug << "Null value encountered" << std::endl; - return true; - } - - bool Int(int i) - { - debug << "Int(" << i << ")" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if constexpr (!std::is_same_v>) { - states.push(State::IN_ERROR); - return true; - } else { - if (states.top() == State::IN_ARRAY || states.top() == State::IN_ROW) { - debug << "added to array" << std::endl; - accumulatedData.push_back(i); - return true; - } - } - states.push(State::IN_ERROR); - return true; - } - - bool Uint(unsigned i) - { - debug << "Uint -> Int" << std::endl; - return Int(static_cast(i)); - } - - bool Int64(int64_t i) - { - debug << "Int64 -> Int" << std::endl; - return Int(static_cast(i)); - } - - bool Uint64(uint64_t i) - { - debug << "Uint64 -> Int" << std::endl; - return Int(static_cast(i)); - } - - bool Double(double d) - { - debug << "Double(" << d << ")" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if constexpr (!(std::is_same_v> || std::is_same_v>)) { - states.push(State::IN_ERROR); - return true; - } - if (states.top() == State::IN_ARRAY || states.top() == State::IN_ROW) { - if constexpr (std::is_same_v>) { - debug << "added to array as double" << std::endl; - accumulatedData.push_back(d); - return true; - } else if constexpr (std::is_same_v>) { - debug << "added to array as float" << std::endl; - accumulatedData.push_back(static_cast(d)); - return true; - } - } - states.push(State::IN_ERROR); - return true; - } - - bool Bool(bool b) - { - debug << "Bool(" << b << ")" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if constexpr (!std::is_same_v>) { - states.push(State::IN_ERROR); - return false; - } else { - if (states.top() == State::IN_ARRAY) { - debug << "added to array" << std::endl; - accumulatedData.push_back(b); - return true; - } - states.push(State::IN_ERROR); - return true; - } - } - - bool String(const Ch* str, SizeType, bool) - { - debug << "String(" << str << ")" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if constexpr (!(V == VariantType::ArrayString || isLabeledArray())) { - states.push(State::IN_ERROR); - return true; - } else { - if (states.top() == State::IN_ARRAY || states.top() == State::IN_ROW) { - debug << "added to array" << std::endl; - if constexpr (isLabeledArray()) { - if (currentKey == labels_rows_str) { - labels_rows.push_back(str); - return true; - } - if (currentKey == labels_cols_str) { - labels_cols.push_back(str); - return true; - } - } - if (currentKey == "values") { - if constexpr (std::is_same_v>) { - accumulatedData.push_back(str); - } else { - states.push(State::IN_ERROR); - } - return true; - } - return true; - } - states.push(State::IN_ERROR); - return true; - } - } - - bool StartObject() - { - debug << "StartObject()" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if (states.top() == State::IN_START) { - states.push(State::IN_DATA); - return true; - } - states.push(State::IN_ERROR); - return true; - } - - bool Key(const Ch* str, SizeType, bool) - { - debug << "Key(" << str << ")" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - currentKey = str; - return false; - } - if (states.top() == State::IN_DATA) { - // no previous keys - states.push(State::IN_KEY); - currentKey = str; - return true; - } - if (states.top() == State::IN_KEY) { - currentKey = str; - if constexpr (!isLabeledArray()) { - debug << "extra keys in a single-key variant" << std::endl; - states.push(State::IN_ERROR); - return true; - } - return true; - } - currentKey = str; - states.push(State::IN_ERROR); - return true; - } - - bool EndObject(SizeType) - { - debug << "EndObject()" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if (states.top() == State::IN_KEY) { - if constexpr (isArray()) { - debug << "creating 1d-array variant" << std::endl; - result = Variant(accumulatedData); - } else if constexpr (isArray2D()) { - debug << "creating 2d-array variant" << std::endl; - assert(accumulatedData.size() == rows * cols); - result = Variant(Array2D{accumulatedData, rows, cols}); - } else if constexpr (isLabeledArray()) { - debug << "creating labeled array variant" << std::endl; - assert(accumulatedData.size() == rows * cols); - if (labels_rows.empty() == false) { - assert(labels_rows.size() == rows); - } - if (labels_cols.empty() == false) { - assert(labels_cols.size() == cols); - } - result = Variant(LabeledArray{Array2D{accumulatedData, rows, cols}, labels_rows, labels_cols}); - } - states.push(State::IN_STOP); - return true; - } - states.push(State::IN_ERROR); - return true; - } - - bool StartArray() - { - debug << "StartArray()" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if (states.top() == State::IN_KEY) { - states.push(State::IN_ARRAY); - return true; - } else if (states.top() == State::IN_ARRAY) { - if constexpr (isArray2D() || isLabeledArray()) { - states.push(State::IN_ROW); - return true; - } - } - states.push(State::IN_ERROR); - return true; - } - - bool EndArray(SizeType elementCount) - { - debug << "EndArray()" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if (states.top() == State::IN_ARRAY) { - // finish up array - states.pop(); - if constexpr (isArray2D() || isLabeledArray()) { - rows = elementCount; - } - return true; - } else if (states.top() == State::IN_ROW) { - // finish up row - states.pop(); - if constexpr (isArray2D() || isLabeledArray()) { - cols = elementCount; - } - return true; - } - states.push(State::IN_ERROR); - return true; - } - - std::stack states; - std::ostringstream debug; - - uint32_t rows; - uint32_t cols; - std::string currentKey; - std::vector> accumulatedData; - std::vector labels_rows; - std::vector labels_cols; - Variant result; -}; - -template -void writeVariant(std::ostream& o, Variant const& v) -{ - if constexpr (isArray() || isArray2D() || isLabeledArray()) { - using type = variant_array_element_type_t; - rapidjson::OStreamWrapper osw(o); - rapidjson::Writer w(osw); - - auto writeArray = [&](auto* values, size_t size) { - using T = std::remove_pointer_t; - w.StartArray(); - for (auto i = 0u; i < size; ++i) { - if constexpr (std::is_same_v) { - w.Int(values[i]); - } else if constexpr (std::is_same_v || std::is_same_v) { - w.Double(values[i]); - } else if constexpr (std::is_same_v) { - w.Bool(values[i]); - } else if constexpr (std::is_same_v) { - w.String(values[i].c_str()); - } - } - w.EndArray(); - }; - - auto writeVector = [&](auto&& vector) { - return writeArray(vector.data(), vector.size()); - }; - - auto writeArray2D = [&](auto&& array2d) { - using T = typename std::decay_t::element_t; - w.StartArray(); - for (auto i = 0u; i < array2d.rows; ++i) { - w.StartArray(); - for (auto j = 0u; j < array2d.cols; ++j) { - if constexpr (std::is_same_v) { - w.Int(array2d(i, j)); - } else if constexpr (std::is_same_v || std::is_same_v) { - w.Double(array2d(i, j)); - } else if constexpr (std::is_same_v) { - w.String(array2d(i, j).c_str()); - } - } - w.EndArray(); - } - w.EndArray(); - }; - - auto writeLabeledArray = [&](auto&& array) { - w.Key(labels_rows_str); - writeVector(array.getLabelsRows()); - w.Key(labels_cols_str); - writeVector(array.getLabelsCols()); - w.Key("values"); - writeArray2D(array.getData()); - }; - - w.StartObject(); - if constexpr (isArray()) { - w.Key("values"); - writeArray(v.get(), v.size()); - } else if constexpr (isArray2D()) { - w.Key("values"); - writeArray2D(v.get>()); - } else if constexpr (isLabeledArray()) { - writeLabeledArray(v.get>()); - } else if constexpr (V == VariantType::Dict) { - // nothing to do for dicts - } - w.EndObject(); - } -} -} // namespace - struct VariantJSONHelpers { template - static Variant read(std::istream& s) - { - rapidjson::Reader reader; - rapidjson::IStreamWrapper isw(s); - VariantReader vreader; - bool ok = reader.Parse(isw, vreader); - - if (ok == false) { - std::stringstream error; - error << "Cannot parse serialized Variant, error: " << rapidjson::GetParseError_En(reader.GetParseErrorCode()) << " at offset: " << reader.GetErrorOffset(); - throw std::runtime_error(error.str()); - } - return vreader.result; - } + static Variant read(std::istream& s); - static void write(std::ostream& o, Variant const& v) - { - switch (v.type()) { - case VariantType::ArrayInt: - writeVariant(o, v); - break; - case VariantType::ArrayFloat: - writeVariant(o, v); - break; - case VariantType::ArrayDouble: - writeVariant(o, v); - break; - case VariantType::ArrayBool: - throw std::runtime_error("Bool vectors not implemented yet"); - // writeVariant(o, v); - break; - case VariantType::ArrayString: - writeVariant(o, v); - break; - case VariantType::Array2DInt: - writeVariant(o, v); - break; - case VariantType::Array2DFloat: - writeVariant(o, v); - break; - case VariantType::Array2DDouble: - writeVariant(o, v); - break; - case VariantType::LabeledArrayInt: - writeVariant(o, v); - break; - case VariantType::LabeledArrayFloat: - writeVariant(o, v); - break; - case VariantType::LabeledArrayDouble: - writeVariant(o, v); - break; - case VariantType::LabeledArrayString: - writeVariant(o, v); - break; - case VariantType::Dict: - writeVariant(o, v); - default: - break; - } - } + static void write(std::ostream& o, Variant const& v); }; } // namespace o2::framework diff --git a/Framework/Core/src/VariantJSONHelpers.cxx b/Framework/Core/src/VariantJSONHelpers.cxx new file mode 100644 index 0000000000000..fbb5abb331867 --- /dev/null +++ b/Framework/Core/src/VariantJSONHelpers.cxx @@ -0,0 +1,464 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. +#include "Framework/VariantJSONHelpers.h" +#include "Framework/Variant.h" + +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace o2::framework +{ +namespace +{ +template +struct VariantReader : public rapidjson::BaseReaderHandler, VariantReader> { + using Ch = rapidjson::UTF8<>::Ch; + using SizeType = rapidjson::SizeType; + + enum struct State { + IN_START, + IN_STOP, + IN_DATA, + IN_KEY, + IN_ARRAY, + IN_ROW, + IN_ERROR + }; + + VariantReader() + : states{}, + rows{0}, + cols{0} + { + debug << "Start" << std::endl; + states.push(State::IN_START); + } + + bool Null() + { + debug << "Null value encountered" << std::endl; + return true; + } + + bool Int(int i) + { + debug << "Int(" << i << ")" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if constexpr (!std::is_same_v>) { + states.push(State::IN_ERROR); + return true; + } else { + if (states.top() == State::IN_ARRAY || states.top() == State::IN_ROW) { + debug << "added to array" << std::endl; + accumulatedData.push_back(i); + return true; + } + } + states.push(State::IN_ERROR); + return true; + } + + bool Uint(unsigned i) + { + debug << "Uint -> Int" << std::endl; + return Int(static_cast(i)); + } + + bool Int64(int64_t i) + { + debug << "Int64 -> Int" << std::endl; + return Int(static_cast(i)); + } + + bool Uint64(uint64_t i) + { + debug << "Uint64 -> Int" << std::endl; + return Int(static_cast(i)); + } + + bool Double(double d) + { + debug << "Double(" << d << ")" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if constexpr (!(std::is_same_v> || std::is_same_v>)) { + states.push(State::IN_ERROR); + return true; + } + if (states.top() == State::IN_ARRAY || states.top() == State::IN_ROW) { + if constexpr (std::is_same_v>) { + debug << "added to array as double" << std::endl; + accumulatedData.push_back(d); + return true; + } else if constexpr (std::is_same_v>) { + debug << "added to array as float" << std::endl; + accumulatedData.push_back(static_cast(d)); + return true; + } + } + states.push(State::IN_ERROR); + return true; + } + + bool Bool(bool b) + { + debug << "Bool(" << b << ")" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if constexpr (!std::is_same_v>) { + states.push(State::IN_ERROR); + return false; + } else { + if (states.top() == State::IN_ARRAY) { + debug << "added to array" << std::endl; + accumulatedData.push_back(b); + return true; + } + states.push(State::IN_ERROR); + return true; + } + } + + bool String(const Ch* str, SizeType, bool) + { + debug << "String(" << str << ")" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if constexpr (!(V == VariantType::ArrayString || isLabeledArray())) { + states.push(State::IN_ERROR); + return true; + } else { + if (states.top() == State::IN_ARRAY || states.top() == State::IN_ROW) { + debug << "added to array" << std::endl; + if constexpr (isLabeledArray()) { + if (currentKey == labels_rows_str) { + labels_rows.push_back(str); + return true; + } + if (currentKey == labels_cols_str) { + labels_cols.push_back(str); + return true; + } + } + if (currentKey == "values") { + if constexpr (std::is_same_v>) { + accumulatedData.push_back(str); + } else { + states.push(State::IN_ERROR); + } + return true; + } + return true; + } + states.push(State::IN_ERROR); + return true; + } + } + + bool StartObject() + { + debug << "StartObject()" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if (states.top() == State::IN_START) { + states.push(State::IN_DATA); + return true; + } + states.push(State::IN_ERROR); + return true; + } + + bool Key(const Ch* str, SizeType, bool) + { + debug << "Key(" << str << ")" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + currentKey = str; + return false; + } + if (states.top() == State::IN_DATA) { + // no previous keys + states.push(State::IN_KEY); + currentKey = str; + return true; + } + if (states.top() == State::IN_KEY) { + currentKey = str; + if constexpr (!isLabeledArray()) { + debug << "extra keys in a single-key variant" << std::endl; + states.push(State::IN_ERROR); + return true; + } + return true; + } + currentKey = str; + states.push(State::IN_ERROR); + return true; + } + + bool EndObject(SizeType) + { + debug << "EndObject()" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if (states.top() == State::IN_KEY) { + if constexpr (isArray()) { + debug << "creating 1d-array variant" << std::endl; + result = Variant(accumulatedData); + } else if constexpr (isArray2D()) { + debug << "creating 2d-array variant" << std::endl; + assert(accumulatedData.size() == rows * cols); + result = Variant(Array2D{accumulatedData, rows, cols}); + } else if constexpr (isLabeledArray()) { + debug << "creating labeled array variant" << std::endl; + assert(accumulatedData.size() == rows * cols); + if (labels_rows.empty() == false) { + assert(labels_rows.size() == rows); + } + if (labels_cols.empty() == false) { + assert(labels_cols.size() == cols); + } + result = Variant(LabeledArray{Array2D{accumulatedData, rows, cols}, labels_rows, labels_cols}); + } + states.push(State::IN_STOP); + return true; + } + states.push(State::IN_ERROR); + return true; + } + + bool StartArray() + { + debug << "StartArray()" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if (states.top() == State::IN_KEY) { + states.push(State::IN_ARRAY); + return true; + } else if (states.top() == State::IN_ARRAY) { + if constexpr (isArray2D() || isLabeledArray()) { + states.push(State::IN_ROW); + return true; + } + } + states.push(State::IN_ERROR); + return true; + } + + bool EndArray(SizeType elementCount) + { + debug << "EndArray()" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if (states.top() == State::IN_ARRAY) { + // finish up array + states.pop(); + if constexpr (isArray2D() || isLabeledArray()) { + rows = elementCount; + } + return true; + } else if (states.top() == State::IN_ROW) { + // finish up row + states.pop(); + if constexpr (isArray2D() || isLabeledArray()) { + cols = elementCount; + } + return true; + } + states.push(State::IN_ERROR); + return true; + } + + std::stack states; + std::ostringstream debug; + + uint32_t rows; + uint32_t cols; + std::string currentKey; + std::vector> accumulatedData; + std::vector labels_rows; + std::vector labels_cols; + Variant result; +}; +} // namespace + +template +Variant VariantJSONHelpers::read(std::istream& s) +{ + rapidjson::Reader reader; + rapidjson::IStreamWrapper isw(s); + VariantReader vreader; + bool ok = reader.Parse(isw, vreader); + + if (ok == false) { + std::stringstream error; + error << "Cannot parse serialized Variant, error: " << rapidjson::GetParseError_En(reader.GetParseErrorCode()) << " at offset: " << reader.GetErrorOffset(); + throw std::runtime_error(error.str()); + } + return vreader.result; +} + +template +void writeVariant(std::ostream& o, Variant const& v) +{ + if constexpr (isArray() || isArray2D() || isLabeledArray()) { + using type = variant_array_element_type_t; + rapidjson::OStreamWrapper osw(o); + rapidjson::Writer w(osw); + + auto writeArray = [&](auto* values, size_t size) { + using T = std::remove_pointer_t; + w.StartArray(); + for (auto i = 0u; i < size; ++i) { + if constexpr (std::is_same_v) { + w.Int(values[i]); + } else if constexpr (std::is_same_v || std::is_same_v) { + w.Double(values[i]); + } else if constexpr (std::is_same_v) { + w.Bool(values[i]); + } else if constexpr (std::is_same_v) { + w.String(values[i].c_str()); + } + } + w.EndArray(); + }; + + auto writeVector = [&](auto&& vector) { + return writeArray(vector.data(), vector.size()); + }; + + auto writeArray2D = [&](auto&& array2d) { + using T = typename std::decay_t::element_t; + w.StartArray(); + for (auto i = 0u; i < array2d.rows; ++i) { + w.StartArray(); + for (auto j = 0u; j < array2d.cols; ++j) { + if constexpr (std::is_same_v) { + w.Int(array2d(i, j)); + } else if constexpr (std::is_same_v || std::is_same_v) { + w.Double(array2d(i, j)); + } else if constexpr (std::is_same_v) { + w.String(array2d(i, j).c_str()); + } + } + w.EndArray(); + } + w.EndArray(); + }; + + auto writeLabeledArray = [&](auto&& array) { + w.Key(labels_rows_str); + writeVector(array.getLabelsRows()); + w.Key(labels_cols_str); + writeVector(array.getLabelsCols()); + w.Key("values"); + writeArray2D(array.getData()); + }; + + w.StartObject(); + if constexpr (isArray()) { + w.Key("values"); + writeArray(v.get(), v.size()); + } else if constexpr (isArray2D()) { + w.Key("values"); + writeArray2D(v.get>()); + } else if constexpr (isLabeledArray()) { + writeLabeledArray(v.get>()); + } else if constexpr (V == VariantType::Dict) { + // nothing to do for dicts + } + w.EndObject(); + } +} + +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); + +void VariantJSONHelpers::write(std::ostream& o, Variant const& v) +{ + switch (v.type()) { + case VariantType::ArrayInt: + writeVariant(o, v); + break; + case VariantType::ArrayFloat: + writeVariant(o, v); + break; + case VariantType::ArrayDouble: + writeVariant(o, v); + break; + case VariantType::ArrayBool: + throw std::runtime_error("Bool vectors not implemented yet"); + // writeVariant(o, v); + break; + case VariantType::ArrayString: + writeVariant(o, v); + break; + case VariantType::Array2DInt: + writeVariant(o, v); + break; + case VariantType::Array2DFloat: + writeVariant(o, v); + break; + case VariantType::Array2DDouble: + writeVariant(o, v); + break; + case VariantType::LabeledArrayInt: + writeVariant(o, v); + break; + case VariantType::LabeledArrayFloat: + writeVariant(o, v); + break; + case VariantType::LabeledArrayDouble: + writeVariant(o, v); + break; + case VariantType::LabeledArrayString: + writeVariant(o, v); + break; + case VariantType::Dict: + writeVariant(o, v); + default: + break; + } +} +} // namespace o2::framework From 8de719349f3dc57e670b7ebf6b8206dee1e426b7 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 12 May 2025 11:43:12 +0200 Subject: [PATCH 0008/1426] GPU: Add GPUCA_RTC_CONSTEXPR macro for constexpr only in RTC --- GPU/Common/GPUCommonDef.h | 4 ++++ GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 1 + GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu | 1 + 3 files changed, 6 insertions(+) diff --git a/GPU/Common/GPUCommonDef.h b/GPU/Common/GPUCommonDef.h index d7e99f53d4ce8..d9a5bdf92b6ac 100644 --- a/GPU/Common/GPUCommonDef.h +++ b/GPU/Common/GPUCommonDef.h @@ -72,6 +72,10 @@ #define GPUCA_RTC_SPECIAL_CODE(...) #endif +#ifndef GPUCA_RTC_CONSTEXPR + #define GPUCA_RTC_CONSTEXPR +#endif + #ifndef GPUCA_DETERMINISTIC_CODE #ifdef GPUCA_DETERMINISTIC_MODE #define GPUCA_DETERMINISTIC_CODE(det, indet) det // In deterministic mode, take deterministic code path diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index 5706f32e73e96..acc77648d954b 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -38,6 +38,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") + std::string(GetProcessingSettings().rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + + std::string(GetProcessingSettings().rtc.optConstexpr ? "#define GPUCA_RTC_CONSTEXPR constexpr\n" : "#define GPUCA_RTC_CONSTEXPR\n") + GPUParamRTC::generateRTCCode(param(), GetProcessingSettings().rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu index 805397c9b430e..66c02d6ed251c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu @@ -18,6 +18,7 @@ // Keep some preprocessor calls unprocessed #define GPUCA_RTC_SPECIAL_CODE(...) GPUCA_RTC_SPECIAL_CODE(__VA_ARGS__) #define GPUCA_DETERMINISTIC_CODE(...) GPUCA_DETERMINISTIC_CODE(__VA_ARGS__) +#define GPUCA_RTC_CONSTEXPR GPUCA_RTC_CONSTEXPR // GPUReconstructionCUDAIncludesSystem.h prependended by CMakewithout preprocessor running #include "GPUReconstructionCUDADef.h" From 1dc506884cf82e11378a33ad1621319e11a17402 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 12 May 2025 11:43:40 +0200 Subject: [PATCH 0009/1426] GPU TPC: Compute alternative dEdx only if it has different settings than normal dEdx --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 4 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 17 +++-- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 70 +++++++++++-------- 3 files changed, 55 insertions(+), 36 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 6e7de7ee48ca6..d2aba503be6a6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -301,7 +301,9 @@ void* GPUTPCGMMerger::SetPointersOutput(void* mem) computePointerWithAlignment(mem, mOutputTracks, mNMaxTracks); if (mRec->GetParam().dodEdxEnabled) { computePointerWithAlignment(mem, mOutputTracksdEdx, mNMaxTracks); - computePointerWithAlignment(mem, mOutputTracksdEdxAlt, mNMaxTracks); + if (mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMask != mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + computePointerWithAlignment(mem, mOutputTracksdEdxAlt, mNMaxTracks); + } } computePointerWithAlignment(mem, mClusters, mNMaxOutputTrackClusters); if (mRec->GetParam().par.earlyTpcTransform) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 9dc6ddc59c2b4..9ead17ea5c7c0 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -106,6 +106,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks const uint32_t flagsRequired = getFlagsRequired(merger.Param().rec); TrackTPC* outputTracks = merger.OutputTracksTPCO2(); uint32_t* clusRefs = merger.OutputClusRefsTPCO2(); + const auto& param = merger.Param(); GPUTPCGMMerger::tmpSort* GPUrestrict() trackSort = merger.TrackSortO2(); uint2* GPUrestrict() tmpData = merger.ClusRefTmp(); @@ -130,9 +131,15 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks oTrack.setChi2(tracks[i].GetParam().GetChi2()); auto& outerPar = tracks[i].OuterParam(); - if (merger.Param().par.dodEdx && merger.Param().dodEdxEnabled) { - oTrack.setdEdx(tracksdEdx[i]); - oTrack.setdEdxAlt(tracksdEdxAlt[i]); + if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if (param.dodEdxEnabled) { + oTrack.setdEdx(tracksdEdx[i]); + if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + oTrack.setdEdxAlt(tracksdEdxAlt[i]); + } else { + oTrack.setdEdxAlt(tracksdEdx[i]); + } + } } auto snpOut = outerPar.P[2]; @@ -148,9 +155,9 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks outerPar.C[6], outerPar.C[7], outerPar.C[8], outerPar.C[9], outerPar.C[10], outerPar.C[11], outerPar.C[12], outerPar.C[13], outerPar.C[14]})); - if (merger.Param().par.dodEdx && merger.Param().dodEdxEnabled && merger.Param().rec.tpc.enablePID) { + if (param.par.dodEdx && param.dodEdxEnabled && param.rec.tpc.enablePID) { PIDResponse pidResponse{}; - auto pid = pidResponse.getMostProbablePID(oTrack, merger.Param().rec.tpc.PID_EKrangeMin, merger.Param().rec.tpc.PID_EKrangeMax, merger.Param().rec.tpc.PID_EPrangeMin, merger.Param().rec.tpc.PID_EPrangeMax, merger.Param().rec.tpc.PID_EDrangeMin, merger.Param().rec.tpc.PID_EDrangeMax, merger.Param().rec.tpc.PID_ETrangeMin, merger.Param().rec.tpc.PID_ETrangeMax, merger.Param().rec.tpc.PID_useNsigma, merger.Param().rec.tpc.PID_sigma); + auto pid = pidResponse.getMostProbablePID(oTrack, param.rec.tpc.PID_EKrangeMin, param.rec.tpc.PID_EKrangeMax, param.rec.tpc.PID_EPrangeMin, param.rec.tpc.PID_EPrangeMax, param.rec.tpc.PID_EDrangeMin, param.rec.tpc.PID_EDrangeMax, param.rec.tpc.PID_ETrangeMin, merger.Param().rec.tpc.PID_ETrangeMax, merger.Param().rec.tpc.PID_useNsigma, merger.Param().rec.tpc.PID_sigma); auto pidRemap = merger.Param().rec.tpc.PID_remap[pid]; if (pidRemap >= 0) { pid = pidRemap; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index f5bfbe985fb8c..0d8547263207b 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -216,11 +216,15 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ continue; } } else if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { - bool dodEdx = param.par.dodEdx && param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; - dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); - if (dodEdx) { - dEdx.fillSubThreshold(lastRow - wayDirection); - dEdxAlt.fillSubThreshold(lastRow - wayDirection); + if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + bool dodEdx = param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; + dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); + if (dodEdx) { + dEdx.fillSubThreshold(lastRow - wayDirection); + if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + dEdxAlt.fillSubThreshold(lastRow - wayDirection); + } + } } } @@ -367,31 +371,35 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ CADEBUG(printf("Reinit linearization\n")); prop.SetTrack(this, prop.GetAlpha()); } - if (param.par.dodEdx && param.dodEdxEnabled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters - bool acc = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0, accAlt = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) == 0; - if (acc || accAlt) { - float qtot = 0, qmax = 0, pad = 0, relTime = 0; - const int32_t clusterCount = (ihit - ihitMergeFirst) * wayDirection + 1; - for (int32_t iTmp = ihitMergeFirst; iTmp != ihit + wayDirection; iTmp += wayDirection) { - if (merger->GetConstantMem()->ioPtrs.clustersNative == nullptr) { - qtot += clustersXYZ[ihit].amp; - } else { - const ClusterNative& cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num]; - qtot += cl.qTot; - qmax = CAMath::Max(qmax, cl.qMax); - pad += cl.getPad(); - relTime += cl.getTime(); + if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if (param.dodEdxEnabled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters + bool acc = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0, accAlt = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) == 0; + if (acc || accAlt) { + float qtot = 0, qmax = 0, pad = 0, relTime = 0; + const int32_t clusterCount = (ihit - ihitMergeFirst) * wayDirection + 1; + for (int32_t iTmp = ihitMergeFirst; iTmp != ihit + wayDirection; iTmp += wayDirection) { + if (merger->GetConstantMem()->ioPtrs.clustersNative == nullptr) { + qtot += clustersXYZ[ihit].amp; + } else { + const ClusterNative& cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num]; + qtot += cl.qTot; + qmax = CAMath::Max(qmax, cl.qMax); + pad += cl.getPad(); + relTime += cl.getTime(); + } + } + qtot /= clusterCount; // TODO: Weighted Average + pad /= clusterCount; + relTime /= clusterCount; + relTime = relTime - CAMath::Round(relTime); + if (acc) { + dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); + } + if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if (accAlt) { + dEdxAlt.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); + } } - } - qtot /= clusterCount; // TODO: Weighted Average - pad /= clusterCount; - relTime /= clusterCount; - relTime = relTime - CAMath::Round(relTime); - if (acc) { - dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); - } - if (accAlt) { - dEdxAlt.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); } } } @@ -428,7 +436,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (param.par.dodEdx && param.dodEdxEnabled) { dEdx.computedEdx(merger->OutputTracksdEdx()[iTrk], param); - dEdxAlt.computedEdx(merger->OutputTracksdEdxAlt()[iTrk], param); + if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + dEdxAlt.computedEdx(merger->OutputTracksdEdxAlt()[iTrk], param); + } } Alpha = prop.GetAlpha(); MoveToReference(prop, param, Alpha); From 760f73e62a3a7898c81eee53e1d67012c58c39be Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 12 May 2025 21:19:41 +0200 Subject: [PATCH 0010/1426] GPU CMake: If deterministic mode is set to MaxOptO2, do not impose -O2 when BUILD_TYPE is DEBUG --- GPU/GPUTracking/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 2e26622d05291..52848692e7516 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -16,11 +16,12 @@ set(MODULE GPUTracking) if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") +elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_OPTO2}) set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2") + else() + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math") endif() -elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math") endif() set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_DENORMALS_FLAGS}") From 89b35ba2d75113e60b2045ed01e169b28d860a07 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 12 May 2025 21:22:52 +0200 Subject: [PATCH 0011/1426] GPU: Workaround for Clang Frontend issue This is fixed with Clang >= 20 and C++23 (P2280R4) --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 4 ++++ GPU/GPUTracking/Base/hip/CMakeLists.txt | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index acc77648d954b..67ad608c13417 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -38,7 +38,11 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") + std::string(GetProcessingSettings().rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + +#ifndef GPUCA_HIP_WORKAROUND_CONSTEXPR // TODO: Fixme, once we have C++ P2280R4 in Clang std::string(GetProcessingSettings().rtc.optConstexpr ? "#define GPUCA_RTC_CONSTEXPR constexpr\n" : "#define GPUCA_RTC_CONSTEXPR\n") + +#else + std::string("#define GPUCA_RTC_CONSTEXPR\n") + +#endif GPUParamRTC::generateRTCCode(param(), GetProcessingSettings().rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 6eded3499e46e..c89ef1769ad81 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -270,3 +270,8 @@ add_dependencies(GPUTrackingHIPExternalProvider O2::GPUTracking) # must not depe if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") add_dependencies(GPUTrackingHIPExternalProvider ${MODULE}_HIPIFIED) endif() + +set_source_files_properties("${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx" +TARGET_DIRECTORY O2::GPUTrackingHIP +PROPERTIES +COMPILE_DEFINITIONS "GPUCA_HIP_WORKAROUND_CONSTEXPR") From 4b72f186ec6e2981cae50338b97855efde70dc8a Mon Sep 17 00:00:00 2001 From: tubagundem Date: Tue, 13 May 2025 11:20:40 +0200 Subject: [PATCH 0012/1426] TPC: Fix digitizer workflow to load GEM params from CCDB before creating Polya file --- .../DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx b/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx index 75141425f7c49..a04f73a62fbf8 100644 --- a/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx +++ b/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx @@ -286,7 +286,7 @@ int getNumTPCLanes(std::vector const& sectors, ConfigContext const& configc // ------------------------------------------------------------------ -void initTPC() +void initTPC(long timestamp) { // We only want to do this for the DPL master // I am not aware of an easy way to query if "I am DPL master" so @@ -308,6 +308,12 @@ void initTPC() auto& cdb = o2::tpc::CDBInterface::instance(); cdb.setUseDefaults(); + + // IMPORTANT: load ParameterGEM from CCDB + auto& ccdbManager = o2::ccdb::BasicCCDBManager::instance(); + ccdbManager.getSpecific(o2::tpc::CDBTypeMap.at(o2::tpc::CDBType::ParGEM), timestamp); + LOGP(info, "initTPC: TPC GEM param updated for time {}", timestamp); + o2::tpc::ParameterGEM::Instance().printKeyValues(true, true); // by invoking this constructor we make sure that a common file will be created // in future we should take this from OCDB and just forward per message const static auto& ampl = o2::tpc::GEMAmplification::instance(); @@ -592,7 +598,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& configcontext) if (isEnabled(o2::detectors::DetID::TPC)) { if (!helpasked && ismaster) { - initTPC(); + initTPC(hbfu.startTime); } tpcsectors = o2::RangeTokenizer::tokenize(configcontext.options().get("tpc-sectors")); From 947a1a8cc06cd2e694cb8e5beb561e9392629af5 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 10:12:57 +0200 Subject: [PATCH 0013/1426] GPU TPC: Remove some obsolete code and track members (leftover from Run 2 by Sergey and totally forgotten), which were wasting performance --- GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 9 -------- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 21 +------------------ 2 files changed, 1 insertion(+), 29 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 578fe1eeb4ca7..6ef2ed2ede668 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -41,9 +41,6 @@ class GPUTPCGMMergedTrack { return mAlpha; } - GPUd() float LastX() const { return mLastX; } - GPUd() float LastY() const { return mLastY; } - GPUd() float LastZ() const { return mLastZ; } GPUd() bool OK() const { return mFlags & 0x01; } GPUd() bool Looper() const { return mFlags & 0x02; } GPUd() bool CSide() const { return mFlags & 0x04; } @@ -55,9 +52,6 @@ class GPUTPCGMMergedTrack GPUd() void SetFirstClusterRef(int32_t v) { mFirstClusterRef = v; } GPUd() void SetParam(const GPUTPCGMTrackParam& v) { mParam = v; } GPUd() void SetAlpha(float v) { mAlpha = v; } - GPUd() void SetLastX(float v) { mLastX = v; } - GPUd() void SetLastY(float v) { mLastY = v; } - GPUd() void SetLastZ(float v) { mLastZ = v; } GPUd() void SetOK(bool v) { if (v) { @@ -110,9 +104,6 @@ class GPUTPCGMMergedTrack gputpcgmmergertypes::GPUTPCOuterParam mOuterParam; //* outer param float mAlpha; //* alpha angle - float mLastX; //* outer X - float mLastY; //* outer Y - float mLastZ; //* outer Z uint32_t mFirstClusterRef; //* index of the first track cluster in corresponding cluster arrays // TODO: Change to 8 bit uint32_t mNClusters; //* number of track clusters diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 0d8547263207b..366f75cb05e56 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -1143,26 +1143,7 @@ GPUd() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() tr track.Param() = t; track.Alpha() = Alpha; - if (track.OK()) { - int32_t ind = track.FirstClusterRef(); - const GPUParam& GPUrestrict() param = merger->Param(); - float alphaa = param.Alpha(merger->Clusters()[ind].sector); - float xx, yy, zz; - if (merger->Param().par.earlyTpcTransform) { - xx = merger->ClustersXYZ()[ind].x; - yy = merger->ClustersXYZ()[ind].y; - zz = merger->ClustersXYZ()[ind].z - track.Param().GetTZOffset(); - } else { - const ClusterNative& GPUrestrict() cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[merger->Clusters()[ind].num]; - merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(merger->Clusters()[ind].sector, merger->Clusters()[ind].row, cl.getPad(), cl.getTime(), xx, yy, zz, track.Param().GetTZOffset()); - } - float sinA, cosA; - CAMath::SinCos(alphaa - track.Alpha(), sinA, cosA); - track.SetLastX(xx * cosA - yy * sinA); - track.SetLastY(xx * sinA + yy * cosA); - track.SetLastZ(zz); - // merger->DebugRefitMergedTrack(track); - } + // if (track.OK()) merger->DebugRefitMergedTrack(track); } GPUd() void GPUTPCGMTrackParam::Rotate(float alpha) From f3f10a25f8abaf9dcf62e764a963f68dfefdc6b4 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 10:17:56 +0200 Subject: [PATCH 0014/1426] GPU TPC: Rename some variables with misleading name --- ...GPUReconstructionCUDAKernelsSpecialize.inc | 4 +-- .../DataCompression/GPUTPCCompression.cxx | 2 +- .../GPUTPCCompressionKernels.cxx | 10 +++--- .../GPUChainTrackingDebugAndProfiling.cxx | 4 +-- .../Global/GPUChainTrackingMerger.cxx | 12 +++---- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 32 +++++++++---------- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 4 +-- GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 14 ++++---- GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 2 +- .../Merger/GPUTPCGMTracksToTPCSeeds.cxx | 6 ++-- .../Merger/GPUTPCGlobalDebugSortKernels.cxx | 4 +-- 12 files changed, 48 insertions(+), 48 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc index 8796f063abdc5..d3dd561dcea2f 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc @@ -109,13 +109,13 @@ inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed template <> inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } template <> inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } template <> diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index 82834a694d0ba..ec1636dfe7f59 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -125,7 +125,7 @@ void GPUTPCCompression::SetMaxData(const GPUTrackingInOutPointers& io) mMaxClusterFactorBase1024 = mMaxClusters > 100000000 ? mRec->MemoryScalers()->NTPCUnattachedHitsBase1024(mRec->GetParam().rec.tpc.rejectionStrategy) : 1024; mMaxClustersInCache = mMaxClusters * mMaxClusterFactorBase1024 / 1024; mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NOutputTrackClusters(); // TODO: Why is this not using ioPtrs? Could remove GPUConstantMem.h include - mMaxTracks = mRec->GetConstantMem().tpcMerger.NOutputTracks(); + mMaxTracks = mRec->GetConstantMem().tpcMerger.NMergedTracks(); if (mMaxClusters % 16) { mMaxClusters += 16 - (mMaxClusters % 16); } diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index bba97e9eace9b..73b195e8f4fe4 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -201,7 +201,7 @@ GPUdii() void GPUTPCCompressionKernels::ThreadclusterOffset[iSector][iRow]; - const uint32_t idOffsetOut = clusters->clusterOffset[iSector][iRow] * compressor.mMaxClusterFactorBase1024 / 1024; + const uint32_t idOffsetOut = clusters->clusterOffset[iSector][iRow] * compressor.mMaxClusterFactorBase1024 / 1024; // 32 bit enough for number of clusters per row * 1024 const uint32_t idOffsetOutMax = ((const uint32_t*)clusters->clusterOffset[iSector])[iRow + 1] * compressor.mMaxClusterFactorBase1024 / 1024; // Array out of bounds access is ok, since it goes to the correct nClustersTotal if (iThread == nThreads - 1) { smem.nCount = 0; @@ -214,7 +214,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread(clusters->nClusters[iSector][iRow]); for (uint32_t i = iThread; i < nn + nThreads; i += nThreads) { const int32_t idx = idOffset + i; - int32_t cidx = 0; + int32_t storeCluster = 0; do { if (i >= clusters->nClusters[iSector][iRow]) { break; @@ -239,13 +239,13 @@ GPUdii() void GPUTPCCompressionKernels::ThreadtpcTrackers[i].NTrackHits(), processors()->tpcTrackers[i].NMaxTrackHits()); } addToMap("TPC Clusterer Clusters", usageMap, mRec->MemoryScalers()->nTPCHits, mRec->MemoryScalers()->NTPCClusters(mRec->MemoryScalers()->nTPCdigits)); - addToMap("TPC Tracks", usageMap, processors()->tpcMerger.NOutputTracks(), processors()->tpcMerger.NMaxTracks()); + addToMap("TPC Tracks", usageMap, processors()->tpcMerger.NMergedTracks(), processors()->tpcMerger.NMaxTracks()); addToMap("TPC TrackHits", usageMap, processors()->tpcMerger.NOutputTrackClusters(), processors()->tpcMerger.NMaxOutputTrackClusters()); if (mRec->GetProcessingSettings().createO2Output) { @@ -181,7 +181,7 @@ void GPUChainTracking::PrintMemoryRelations() GPUInfo("MEMREL SectorTracks NCl %d NTrk %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTracks()); GPUInfo("MEMREL SectorTrackHits NCl %d NTrkH %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTrackHits()); } - GPUInfo("MEMREL Tracks NCl %d NTrk %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NOutputTracks()); + GPUInfo("MEMREL Tracks NCl %d NTrk %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTracks()); GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NOutputTrackClusters()); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 6e86be03e7950..bd1fa7796dadf 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -220,7 +220,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mOutputQueue.clear(); } - runKernel(doGPU ? GetGrid(Merger.NOutputTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0); + runKernel(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0); if (param().rec.tpc.retryRefit == 1) { runKernel(GetGridAuto(0), -1); } @@ -233,7 +233,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); if (param().rec.tpc.mergeLoopersAfterburner) { - runKernel(doGPU ? GetGrid(Merger.NOutputTracks(), 0, deviceType) : GetGridAuto(0, deviceType)); + runKernel(doGPU ? GetGrid(Merger.NMergedTracks(), 0, deviceType) : GetGridAuto(0, deviceType)); if (doGPU) { TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0); SynchronizeStream(0); // TODO: could probably synchronize on an event after runKernel @@ -255,10 +255,10 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) throw std::runtime_error("QA Scratch buffer exceeded"); } } - GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracks(), MergerShadowAll.OutputTracks(), Merger.NOutputTracks() * sizeof(*Merger.OutputTracks()), outputStream, 0, nullptr, waitEvent); + GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracks(), MergerShadowAll.OutputTracks(), Merger.NMergedTracks() * sizeof(*Merger.OutputTracks()), outputStream, 0, nullptr, waitEvent); waitEvent = nullptr; if (param().dodEdxEnabled) { - GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracksdEdx(), MergerShadowAll.OutputTracksdEdx(), Merger.NOutputTracks() * sizeof(*Merger.OutputTracksdEdx()), outputStream, 0); + GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracksdEdx(), MergerShadowAll.OutputTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.OutputTracksdEdx()), outputStream, 0); } GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0); if (param().par.earlyTpcTransform) { @@ -326,7 +326,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) } mIOPtrs.mergedTracks = Merger.OutputTracks(); - mIOPtrs.nMergedTracks = Merger.NOutputTracks(); + mIOPtrs.nMergedTracks = Merger.NMergedTracks(); mIOPtrs.mergedTrackHits = Merger.Clusters(); mIOPtrs.mergedTrackHitsXYZ = Merger.ClustersXYZ(); mIOPtrs.nMergedTrackHits = Merger.NOutputTrackClusters(); @@ -340,7 +340,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) if (doGPU) { processorsShadow()->ioPtrs.mergedTracks = MergerShadow.OutputTracks(); - processorsShadow()->ioPtrs.nMergedTracks = Merger.NOutputTracks(); + processorsShadow()->ioPtrs.nMergedTracks = Merger.NMergedTracks(); processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters(); processorsShadow()->ioPtrs.mergedTrackHitsXYZ = MergerShadow.ClustersXYZ(); processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NOutputTrackClusters(); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index d2aba503be6a6..e96bbeee774bf 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -415,7 +415,7 @@ int32_t GPUTPCGMMerger::CheckSectors() GPUd() void GPUTPCGMMerger::ClearTrackLinks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, bool output) { - const int32_t n = output ? mMemory->nOutputTracks : SectorTrackInfoLocalTotal(); + const int32_t n = output ? mMemory->nMergedTracks : SectorTrackInfoLocalTotal(); for (int32_t i = iBlock * nThreads + iThread; i < n; i += nThreads * nBlocks) { mTrackLinks[i] = -1; } @@ -1271,7 +1271,7 @@ GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { const ClusterNative* cls = Param().par.earlyTpcTransform ? nullptr : mConstantMem->ioPtrs.clustersNative->clustersLinear; - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { if (mOutputTracks[i].CSide() == 0 && mTrackLinks[i] >= 0) { if (mTrackLinks[mTrackLinks[i]] != (int32_t)i) { continue; @@ -1392,7 +1392,7 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i } } - // for (int32_t i = 0;i < mMemory->nOutputTracks;i++) {if (mOutputTracks[i].CCE() == false) {mOutputTracks[i].SetNClusters(0);mOutputTracks[i].SetOK(false);}} //Remove all non-CE tracks + // for (int32_t i = 0;i < mMemory->nMergedTracks;i++) {if (mOutputTracks[i].CCE() == false) {mOutputTracks[i].SetNClusters(0);mOutputTracks[i].SetOK(false);}} //Remove all non-CE tracks } namespace o2::gpu::internal @@ -1533,7 +1533,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread nHits = 0; for (int32_t ipart = 0; ipart < nParts; ipart++) { const GPUTPCGMSectorTrack* t = trackParts[ipart]; - CADEBUG(printf("Collect Track %d Part %d QPt %f DzDs %f\n", mMemory->nOutputTracks, ipart, t->QPt(), t->DzDs())); + CADEBUG(printf("Collect Track %d Part %d QPt %f DzDs %f\n", mMemory->nMergedTracks, ipart, t->QPt(), t->DzDs())); int32_t nTrackHits = t->NClusters(); trackCluster* c2 = trackClusters + nHits + nTrackHits - 1; for (int32_t i = 0; i < nTrackHits; i++, c2--) { @@ -1678,10 +1678,10 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread cl[i].leg = trackClusters[i].leg; } - uint32_t iOutputTrack = CAMath::AtomicAdd(&mMemory->nOutputTracks, 1u); + uint32_t iOutputTrack = CAMath::AtomicAdd(&mMemory->nMergedTracks, 1u); if (iOutputTrack >= mNMaxTracks) { raiseError(GPUErrors::ERROR_MERGER_TRACK_OVERFLOW, iOutputTrack, mNMaxTracks); - CAMath::AtomicExch(&mMemory->nOutputTracks, mNMaxTracks); + CAMath::AtomicExch(&mMemory->nMergedTracks, mNMaxTracks); continue; } @@ -1718,9 +1718,9 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread p1.QPt() = 100.f / Param().rec.bz0Pt10MeV; } - // if (nParts > 1) printf("Merged %d: QPt %f %d parts %d hits\n", mMemory->nOutputTracks, p1.QPt(), nParts, nHits); + // if (nParts > 1) printf("Merged %d: QPt %f %d parts %d hits\n", mMemory->nMergedTracks, p1.QPt(), nParts, nHits); - /*if (GPUQA::QAAvailable() && mRec->GetQA() && mRec->GetQA()->SuppressTrack(mMemory->nOutputTracks)) + /*if (GPUQA::QAAvailable() && mRec->GetQA() && mRec->GetQA()->SuppressTrack(mMemory->nMergedTracks)) { mergedTrack.SetOK(0); mergedTrack.SetNClusters(0); @@ -1742,14 +1742,14 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread GPUd() void GPUTPCGMMerger::SortTracksPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { mTrackOrderProcess[i] = i; } } GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nBlocks * nThreads) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nBlocks * nThreads) { mTrackSort[i] = i; } } @@ -1784,7 +1784,7 @@ GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_ ) // clang-format on }; - GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nOutputTracks, comp); + GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nMergedTracks, comp); #endif } @@ -1810,13 +1810,13 @@ GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int ) // clang-format on }; - GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nOutputTracks, comp); + GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nMergedTracks, comp); #endif } GPUd() void GPUTPCGMMerger::PrepareClustersForFit1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nBlocks * nThreads) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nBlocks * nThreads) { mTrackOrderAttach[mTrackSort[i]] = i; const GPUTPCGMMergedTrack& trk = mOutputTracks[i]; if (trk.OK()) { @@ -1848,7 +1848,7 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit2(int32_t nBlocks, int32_t nThr GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { mTrackSort[mTrackOrderAttach[i]] = i; } for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nThreads * nBlocks) { @@ -1858,7 +1858,7 @@ GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t GPUd() void GPUTPCGMMerger::Finalize1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { const GPUTPCGMMergedTrack& trk = mOutputTracks[i]; if (!trk.OK() || trk.NClusters() == 0) { continue; @@ -1893,7 +1893,7 @@ GPUd() void GPUTPCGMMerger::Finalize2(int32_t nBlocks, int32_t nThreads, int32_t GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { const float lowPtThresh = Param().rec.tpc.rejectQPtB5 * 1.1f; // Might need to merge tracks above the threshold with parts below the threshold - for (uint32_t i = get_global_id(0); i < mMemory->nOutputTracks; i += get_global_size(0)) { + for (uint32_t i = get_global_id(0); i < mMemory->nMergedTracks; i += get_global_size(0)) { const auto& trk = mOutputTracks[i]; const auto& p = trk.GetParam(); const float qptabs = CAMath::Abs(p.GetQPt()); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 6c6e0e02a2dc2..6c9c14b557798 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -69,7 +69,7 @@ class GPUTPCGMMerger : public GPUProcessor GPUAtomic(uint32_t) nRetryRefit; GPUAtomic(uint32_t) nLoopData; GPUAtomic(uint32_t) nUnpackedTracks; - GPUAtomic(uint32_t) nOutputTracks; + GPUAtomic(uint32_t) nMergedTracks; GPUAtomic(uint32_t) nOutputTrackClusters; GPUAtomic(uint32_t) nO2Tracks; GPUAtomic(uint32_t) nO2ClusRefs; @@ -103,7 +103,7 @@ class GPUTPCGMMerger : public GPUProcessor void* SetPointersOutputState(void* mem); void* SetPointersMemory(void* mem); - GPUhdi() int32_t NOutputTracks() const { return mMemory->nOutputTracks; } + GPUhdi() int32_t NMergedTracks() const { return mMemory->nMergedTracks; } GPUhdi() const GPUTPCGMMergedTrack* OutputTracks() const { return mOutputTracks; } GPUhdi() GPUTPCGMMergedTrack* OutputTracks() { return mOutputTracks; } GPUhdi() const GPUdEdxInfo* OutputTracksdEdx() const { return mOutputTracksdEdx; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index ac55f423b1c42..02d0ac98b05b0 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -94,7 +94,7 @@ void GPUTPCGMMerger::DumpMergeRanges(std::ostream& out, int32_t withinSector, in void GPUTPCGMMerger::DumpTrackLinks(std::ostream& out, bool output, const char* type) const { out << "\nTPC Merger Links " << type << "\n"; - const int32_t n = output ? mMemory->nOutputTracks : SectorTrackInfoLocalTotal(); + const int32_t n = output ? mMemory->nMergedTracks : SectorTrackInfoLocalTotal(); for (int32_t i = 0; i < n; i++) { if (mTrackLinks[i] != -1) { out << " " << i << ": " << mTrackLinks[i] << "\n"; @@ -138,7 +138,7 @@ void GPUTPCGMMerger::DumpCollected(std::ostream& out) const std::streamsize ss = out.precision(); out << std::setprecision(2); out << "\nTPC Merger Collected Tracks\n"; - for (uint32_t i = 0; i < mMemory->nOutputTracks; i++) { + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { const auto& trk = mOutputTracks[i]; const auto& p = trk.GetParam(); out << " Track " << i << ": Loop " << trk.Looper() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << "\n"; @@ -150,7 +150,7 @@ void GPUTPCGMMerger::DumpMergeCE(std::ostream& out) const { DumpTrackLinks(out, true, " for CE merging"); out << "\nTPC Merger Merge CE\n"; - for (uint32_t i = 0; i < mMemory->nOutputTracks; i++) { + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { const auto& trk = mOutputTracks[i]; if (trk.CCE()) { out << " Track " << i << ": CCE\n"; @@ -162,11 +162,11 @@ void GPUTPCGMMerger::DumpFitPrepare(std::ostream& out) const { out << "\nTPC Merger Refit Prepare\n"; out << " Sort\n"; - for (uint32_t i = 0; i < mMemory->nOutputTracks; i++) { + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { out << " " << i << ": " << mTrackOrderAttach[i] << "\n"; } out << " Clusters\n"; - for (uint32_t j = 0; j < mMemory->nOutputTracks; j++) { + for (uint32_t j = 0; j < mMemory->nMergedTracks; j++) { const auto& trk = mOutputTracks[j]; out << " Track " << j << ": "; for (uint32_t i = trk.FirstClusterRef(); i < trk.FirstClusterRef() + trk.NClusters(); i++) { @@ -195,7 +195,7 @@ void GPUTPCGMMerger::DumpRefit(std::ostream& out) const std::streamsize ss = out.precision(); out << std::setprecision(2); out << "\nTPC Merger Refit\n"; - for (uint32_t i = 0; i < mMemory->nOutputTracks; i++) { + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { const auto& trk = mOutputTracks[i]; if (trk.NClusters() == 0) { continue; @@ -212,7 +212,7 @@ void GPUTPCGMMerger::DumpRefit(std::ostream& out) const void GPUTPCGMMerger::DumpFinal(std::ostream& out) const { out << "\nTPC Merger Finalized\n"; - for (uint32_t j = 0; j < mMemory->nOutputTracks; j++) { + for (uint32_t j = 0; j < mMemory->nMergedTracks; j++) { const auto& trk = mOutputTracks[j]; if (trk.NClusters() == 0) { continue; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx index d72d59a6250e7..68763b3549547 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx @@ -21,7 +21,7 @@ using namespace o2::gpu; template <> GPUdii() void GPUTPCGMMergerTrackFit::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t mode) { - const int32_t iEnd = mode == -1 ? merger.Memory()->nRetryRefit : merger.NOutputTracks(); + const int32_t iEnd = mode == -1 ? merger.Memory()->nRetryRefit : merger.NMergedTracks(); GPUCA_TBB_KERNEL_LOOP(merger.GetRec(), int32_t, ii, iEnd, { const int32_t i = mode == -1 ? merger.RetryRefitIds()[ii] : mode ? merger.TrackOrderProcess()[ii] : ii; GPUTPCGMTrackParam::RefitTrack(merger.OutputTracks()[i], i, &merger, mode == -1); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 9ead17ea5c7c0..72e9f63e5da83 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -38,7 +38,7 @@ template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { const GPUTPCGMMergedTrack* tracks = merger.OutputTracks(); - const uint32_t nTracks = merger.NOutputTracks(); + const uint32_t nTracks = merger.NMergedTracks(); const GPUTPCGMMergedTrackHit* trackClusters = merger.Clusters(); const GPUdEdxInfo* tracksdEdx = merger.OutputTracksdEdx(); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx index 78eea63edecdd..ebc9d22560524 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx @@ -34,7 +34,7 @@ void GPUTPCGMTracksToTPCSeeds::CreateSeedsFromHLTTracks(TObjArray* seeds, AliTPC } seeds->Clear(); int32_t index = 0; - for (int32_t i = 0; i < merger->NOutputTracks(); i++) { + for (int32_t i = 0; i < merger->NMergedTracks(); i++) { const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; if (!track.OK()) { continue; @@ -112,7 +112,7 @@ void GPUTPCGMTracksToTPCSeeds::UpdateParamsOuter(TObjArray* seeds) return; } int32_t index = 0; - for (int32_t i = 0; i < merger->NOutputTracks(); i++) { + for (int32_t i = 0; i < merger->NMergedTracks(); i++) { const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; if (!track.OK()) { continue; @@ -134,7 +134,7 @@ void GPUTPCGMTracksToTPCSeeds::UpdateParamsInner(TObjArray* seeds) return; } int32_t index = 0; - for (int32_t i = 0; i < merger->NOutputTracks(); i++) { + for (int32_t i = 0; i < merger->NMergedTracks(); i++) { const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; if (!track.OK()) { continue; diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx index a21593b7ba9e9..e63bb82a9b09e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx @@ -100,7 +100,7 @@ GPUdii() void GPUTPCGlobalDebugSortKernels::Thread Date: Tue, 13 May 2025 10:25:11 +0200 Subject: [PATCH 0015/1426] GPU: Add additional optional debbug dumps for validation --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- .../Global/GPUChainTrackingDebug.h | 45 ++++++++++--------- .../Global/GPUChainTrackingMerger.cxx | 7 +-- .../Global/GPUChainTrackingSectorTracker.cxx | 4 +- GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 1 + GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 1 + GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 15 ++++++- 7 files changed, 47 insertions(+), 28 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 9e0aa32155f0d..9400a429fca81 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -295,7 +295,7 @@ AddOption(trdNCandidates, int32_t, 3, "", 0, "Number of branching track candidat AddOption(trdTrackModelO2, bool, false, "", 0, "Use O2 track model instead of GPU track model for TRD tracking") AddOption(debugLevel, int32_t, -1, "debug", 'd', "Set debug level (-2 = silent, -1 = autoselect (-2 for O2, 0 for standalone))") AddOption(allocDebugLevel, int32_t, 0, "allocDebug", 0, "Some debug output for memory allocations (without messing with normal debug level)") -AddOption(debugMask, uint32_t, 262143, "", 0, "Mask for debug output dumps to file") +AddOption(debugMask, uint32_t, (1 << 18) - 1, "debugMask", 0, "Mask for debug output dumps to file") AddOption(debugLogSuffix, std::string, "", "debugSuffix", 0, "Suffix for debug log files with --debug 6") AddOption(serializeGPU, int8_t, 0, "", 0, "Synchronize after each kernel call (bit 1) and DMA transfer (bit 2) and identify failures") AddOption(recoTaskTiming, bool, 0, "", 0, "Perform summary timing after whole reconstruction tasks") diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebug.h b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h index 810f40a1d8654..6c995f65f3dd3 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebug.h +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h @@ -23,28 +23,29 @@ namespace o2::gpu { // NOTE: Values below 262144 are activated by default with --debug 6 in GPUSettingsList.h::debugMask enum GPUChainTrackingDebugFlags : uint32_t { - TPCSectorTrackingData = 1, - TPCPreLinks = 2, - TPCLinks = 4, - TPCStartHits = 8, - TPCTracklets = 16, - TPCSectorTracks = 32, - TPCHitWeights = 256, - TPCCompressedClusters = 512, - TPCDecompressedClusters = 1024, - TPCMergingRanges = 2048, - TPCMergingSectorTracks = 4096, - TPCMergingMergedTracks = 8192, - TPCMergingCollectedTracks = 16384, - TPCMergingCE = 32768, - TPCMergingRefit = 65536, - TPCClustererClusters = 131072, - TPCClusterer = 262144, - TPCClustererDigits = 262144 << 1, - TPCClustererPeaks = 262144 << 2, - TPCClustererSuppressedPeaks = 262144 << 3, - TPCClustererChargeMap = 262144 << 4, - TPCClustererZeroedCharges = 262144 << 5 + TPCSectorTrackingData = 1 << 0, + TPCPreLinks = 1 << 1, + TPCLinks = 1 << 2, + TPCStartHits = 1 << 3, + TPCTracklets = 1 << 4, + TPCSectorTracks = 1 << 5, + TPCHitWeights = 1 << 6, + TPCMergingRanges = 1 << 7, + TPCMergingSectorTracks = 1 << 8, + TPCMergingMatching = 1 << 9, + TPCMergingCollectedTracks = 1 << 10, + TPCMergingCE = 1 << 11, + TPCMergingPrepareFit = 1 << 12, + TPCMergingRefit = 1 << 13, + TPCMergingLoopers = 1 << 14, + TPCCompressedClusters = 1 << 15, + TPCDecompressedClusters = 1 << 16, + TPCClustererClusters = 1 << 17, + TPCClustererDigits = 1 << 18, + TPCClustererPeaks = 1 << 19, + TPCClustererSuppressedPeaks = 1 << 20, + TPCClustererChargeMap = 1 << 21, + TPCClustererZeroedCharges = 1 << 22 }; template diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index bd1fa7796dadf..df80eabfb8761 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -143,7 +143,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMergedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile); runKernel(GetGridAuto(0, deviceType), false); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); @@ -158,7 +158,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1); RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMergedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); @@ -202,7 +202,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); - DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingPrepareFit, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile); if (doGPU) { CondWaitEvent(waitForTransfer, &mEvents->single); @@ -240,6 +240,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) } runKernel(GetGridAuto(0, deviceType)); runKernel(doGPU ? GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) : GetGridAuto(0, deviceType)); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingLoopers, Merger, &GPUTPCGMMerger::DumpLoopers, *mDebugFile); } DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, doGPU, Merger, &GPUTPCGMMerger::DumpFinal, *mDebugFile); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index ef38d53173c2b..67ef402961a20 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -176,7 +176,9 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } if (GetProcessingSettings().debugLevel >= 6) { - *mDebugFile << "\n\nReconstruction: Sector " << iSector << "/" << NSECTORS << std::endl; + if ((GetProcessingSettings().debugMask & 63)) { + *mDebugFile << "\n\nReconstruction: Sector " << iSector << "/" << NSECTORS << std::endl; + } if (GetProcessingSettings().debugMask & GPUChainTrackingDebugFlags::TPCSectorTrackingData) { if (doGPU) { TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &trk, -1, true); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 6ef2ed2ede668..73b14ba1b2fdf 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -95,6 +95,7 @@ class GPUTPCGMMergedTrack GPUd() void SetFlags(uint8_t v) { mFlags = v; } GPUd() void SetLegs(uint8_t v) { mLegs = v; } GPUd() uint8_t Legs() const { return mLegs; } + GPUd() uint8_t Flags() const { return mFlags; } GPUd() const gputpcgmmergertypes::GPUTPCOuterParam& OuterParam() const { return mOuterParam; } GPUd() gputpcgmmergertypes::GPUTPCOuterParam& OuterParam() { return mOuterParam; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 6c9c14b557798..ae85f20b17b48 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -201,6 +201,7 @@ class GPUTPCGMMerger : public GPUProcessor void DumpFitPrepare(std::ostream& out) const; void DumpRefit(std::ostream& out) const; void DumpFinal(std::ostream& out) const; + void DumpLoopers(std::ostream& out) const; template void MergedTrackStreamerInternal(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t sector1, int32_t sector2, int32_t mergeMode, float weight, float frac) const; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index 02d0ac98b05b0..3be32a2d87610 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -204,11 +204,24 @@ void GPUTPCGMMerger::DumpRefit(std::ostream& out) const const auto& po = trk.OuterParam(); out << " Track " << i << ": OK " << trk.OK() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << " / " << trk.NClustersFitted() << " Cov " << p.GetErr2Y() << "/" << p.GetErr2Z() << " dEdx " << (trk.OK() && Param().dodEdxEnabled ? mOutputTracksdEdx[i].dEdxTotTPC : -1.f) << "/" << (trk.OK() && Param().dodEdxEnabled ? mOutputTracksdEdx[i].dEdxMaxTPC : -1.f) - << " Outer " << po.P[0] << "/" << po.P[1] << "/" << po.P[2] << "/" << po.P[3] << "/" << po.P[4] << "\n"; + << " Outer " << po.P[0] << "/" << po.P[1] << "/" << po.P[2] << "/" << po.P[3] << "/" << po.P[4] + << " NFitted " << trk.NClustersFitted() << " legs " << (int)trk.Legs() << " flags " << (int)trk.Flags() << "\n"; } out << std::setprecision(ss); } +void GPUTPCGMMerger::DumpLoopers(std::ostream& out) const +{ + out << "\n TPC Merger Looper Afterburner\n"; + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { + if (i && i % 100 == 0) { + out << "\n"; + } + out << (int)mOutputTracks[i].MergedLooper() << " "; + } + out << "\n"; +} + void GPUTPCGMMerger::DumpFinal(std::ostream& out) const { out << "\nTPC Merger Finalized\n"; From d20b540fb0b5693ad6fa39b128d589160c91163f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 10:25:47 +0200 Subject: [PATCH 0016/1426] GPU TPC: Fix deterministic mode for TPC cluster compression / decompression / looper merging afterburner --- .../Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc | 4 ++-- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc index d3dd561dcea2f..1d633eb5e748f 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc @@ -82,14 +82,14 @@ struct GPUTPCGMMergerSortTracksQPt_comp { struct GPUTPCGMMergerMergeLoopers_comp { GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b) { - return CAMath::Abs(a.refz) < CAMath::Abs(b.refz); + return GPUCA_DETERMINISTIC_CODE(CAMath::Abs(a.refz) != CAMath::Abs(b.refz) ? CAMath::Abs(a.refz) < CAMath::Abs(b.refz) : a.id < b.id, CAMath::Abs(a.refz) < CAMath::Abs(b.refz)); } }; struct GPUTPCGMO2OutputSort_comp { GPUd() bool operator()(const GPUTPCGMMerger::tmpSort& a, const GPUTPCGMMerger::tmpSort& b) { - return (a.y > b.y); + return GPUCA_DETERMINISTIC_CODE(a.y != b.y ? a.y > b.y : a.x > b.x, a.y > b.y); } }; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index e96bbeee774bf..99ef548b2d78e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1948,7 +1948,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads, if (iThread || iBlock) { return; } - auto comp = [](const MergeLooperParam& a, const MergeLooperParam& b) { return CAMath::Abs(a.refz) < CAMath::Abs(b.refz); }; + auto comp = [](const MergeLooperParam& a, const MergeLooperParam& b) { return GPUCA_DETERMINISTIC_CODE(CAMath::Abs(a.refz) != CAMath::Abs(b.refz) ? CAMath::Abs(a.refz) < CAMath::Abs(b.refz) : a.id < b.id, CAMath::Abs(a.refz) < CAMath::Abs(b.refz)); }; GPUCommonAlgorithm::sortDeviceDynamic(mLooperCandidates, mLooperCandidates + mMemory->nLooperMatchCandidates, comp); #endif } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 72e9f63e5da83..624c9ab487c8d 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -88,7 +88,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, return; } GPUTPCGMMerger::tmpSort* GPUrestrict() trackSort = merger.TrackSortO2(); - auto comp = [](const auto& a, const auto& b) { return (a.y > b.y); }; + auto comp = [](const auto& a, const auto& b) { return GPUCA_DETERMINISTIC_CODE(a.y != b.y ? a.y > b.y : a.x > b.x, a.y > b.y); }; GPUCommonAlgorithm::sortDeviceDynamic(trackSort, trackSort + merger.Memory()->nO2Tracks, comp); #endif } From 7732f5c426049d1eba9711b31626d4fb86d701b7 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 10:51:13 +0200 Subject: [PATCH 0017/1426] GPU: Deduplicate sort comparisons: Use structs, since both hipcub and rocthrust do not work with lambdas for some reason --- ...GPUReconstructionCUDAKernelsSpecialize.inc | 82 ---------- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 143 ++++++++++-------- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 20 ++- 3 files changed, 98 insertions(+), 147 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc index 1d633eb5e748f..44cde3d4ac48a 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc @@ -14,88 +14,6 @@ #if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) -namespace o2::gpu::internal -{ -namespace // anonymous -{ -struct MergeBorderTracks_compMax { - GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) - { - return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); - } -}; -struct MergeBorderTracks_compMin { - GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) - { - return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); - } -}; - -struct GPUTPCGMMergerSortTracks_comp { - const GPUTPCGMMergedTrack* const mCmp; - GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} - GPUd() bool operator()(const int32_t aa, const int32_t bb) - { - const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; - if (a.CCE() != b.CCE()) { - return a.CCE() > b.CCE(); - } - if (a.Legs() != b.Legs()) { - return a.Legs() > b.Legs(); - } - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (a.NClusters() != b.NClusters()) { - return a.NClusters() > b.NClusters(); - } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return aa > bb; - , // !GPUCA_DETERMINISTIC_CODE - return a.NClusters() > b.NClusters(); - ) // clang-format on - } -}; - -struct GPUTPCGMMergerSortTracksQPt_comp { - const GPUTPCGMMergedTrack* const mCmp; - GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} - GPUd() bool operator()(const int32_t aa, const int32_t bb) - { - const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return a.GetParam().GetZ() > b.GetParam().GetZ(); - , // !GPUCA_DETERMINISTIC_CODE - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - ) // clang-format on - } -}; - -struct GPUTPCGMMergerMergeLoopers_comp { - GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b) - { - return GPUCA_DETERMINISTIC_CODE(CAMath::Abs(a.refz) != CAMath::Abs(b.refz) ? CAMath::Abs(a.refz) < CAMath::Abs(b.refz) : a.id < b.id, CAMath::Abs(a.refz) < CAMath::Abs(b.refz)); - } -}; - -struct GPUTPCGMO2OutputSort_comp { - GPUd() bool operator()(const GPUTPCGMMerger::tmpSort& a, const GPUTPCGMMerger::tmpSort& b) - { - return GPUCA_DETERMINISTIC_CODE(a.y != b.y ? a.y > b.y : a.x > b.x, a.y > b.y); - } -}; - -} // anonymous namespace -} // namespace o2::gpu::internal - template <> inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 99ef548b2d78e..b12375a10023a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -59,17 +59,13 @@ #include "SimulationDataFormat/MCCompLabel.h" #endif -namespace o2::gpu::internal -{ -} +static constexpr int32_t kMaxParts = 400; +static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS; + using namespace o2::gpu; -using namespace o2::gpu::internal; using namespace o2::tpc; using namespace gputpcgmmergertypes; -static constexpr int32_t kMaxParts = 400; -static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS; - namespace o2::gpu::internal { struct MergeLooperParam { @@ -78,8 +74,79 @@ struct MergeLooperParam { float y; uint32_t id; }; + +struct MergeBorderTracks_compMax { + GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) + { + return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); + } +}; +struct MergeBorderTracks_compMin { + GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) + { + return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); + } +}; + +struct GPUTPCGMMergerSortTracks_comp { + const GPUTPCGMMergedTrack* const mCmp; + GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} + GPUd() bool operator()(const int32_t aa, const int32_t bb) + { + const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; + const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; + if (a.CCE() != b.CCE()) { + return a.CCE() > b.CCE(); + } + if (a.Legs() != b.Legs()) { + return a.Legs() > b.Legs(); + } + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (a.NClusters() != b.NClusters()) { + return a.NClusters() > b.NClusters(); + } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return aa > bb; + , // !GPUCA_DETERMINISTIC_CODE + return a.NClusters() > b.NClusters(); + ) // clang-format on + } +}; + +struct GPUTPCGMMergerSortTracksQPt_comp { + const GPUTPCGMMergedTrack* const mCmp; + GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} + GPUd() bool operator()(const int32_t aa, const int32_t bb) + { + const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; + const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return a.GetParam().GetZ() > b.GetParam().GetZ(); + , // !GPUCA_DETERMINISTIC_CODE + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + ) // clang-format on + } +}; + +struct GPUTPCGMMergerMergeLoopers_comp { + GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b) + { + return GPUCA_DETERMINISTIC_CODE(CAMath::Abs(a.refz) != CAMath::Abs(b.refz) ? CAMath::Abs(a.refz) < CAMath::Abs(b.refz) : a.id < b.id, CAMath::Abs(a.refz) < CAMath::Abs(b.refz)); + } +}; + } // namespace o2::gpu::internal +using namespace o2::gpu::internal; + #ifndef GPUCA_GPUCODE #include "GPUQA.h" @@ -742,11 +809,11 @@ template <> GPUd() void GPUTPCGMMerger::MergeBorderTracks<3>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUTPCGMBorderRange* range, int32_t N, int32_t cmpMax) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread == 0) { + if (iThread == 0 && iBlock == 0) { if (cmpMax) { - GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); }); + GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, MergeBorderTracks_compMax()); } else { - GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); }); + GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, MergeBorderTracks_compMin()); } } #endif @@ -1757,60 +1824,18 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThr GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread || iBlock) { - return; + if (iThread == 0 && iBlock == 0) { + GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nMergedTracks, GPUTPCGMMergerSortTracks_comp(mOutputTracks)); } - // TODO: Fix this: Have to duplicate sort comparison: Thrust cannot use the Lambda but OpenCL cannot use the object - auto comp = [cmp = mOutputTracks](const int32_t aa, const int32_t bb) { - const GPUTPCGMMergedTrack& GPUrestrict() a = cmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = cmp[bb]; - if (a.CCE() != b.CCE()) { - return a.CCE() > b.CCE(); - } - if (a.Legs() != b.Legs()) { - return a.Legs() > b.Legs(); - } - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (a.NClusters() != b.NClusters()) { - return a.NClusters() > b.NClusters(); - } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return aa > bb; - , // !GPUCA_DETERMINISTIC_CODE - return a.NClusters() > b.NClusters(); - ) // clang-format on - }; - - GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nMergedTracks, comp); #endif } GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread || iBlock) { - return; + if (iThread == 0 && iBlock == 0) { + GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nMergedTracks, GPUTPCGMMergerSortTracksQPt_comp(mOutputTracks)); } - // TODO: Fix this: Have to duplicate sort comparison: Thrust cannot use the Lambda but OpenCL cannot use the object - auto comp = [cmp = mOutputTracks](const int32_t aa, const int32_t bb) { - const GPUTPCGMMergedTrack& GPUrestrict() a = cmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = cmp[bb]; - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return a.GetParam().GetZ() > b.GetParam().GetZ(); - , // !GPUCA_DETERMINISTIC_CODE - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - ) // clang-format on - }; - - GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nMergedTracks, comp); #endif } @@ -1945,11 +1970,9 @@ GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, GPUd() void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread || iBlock) { - return; + if (iThread == 0 && iBlock == 0) { + GPUCommonAlgorithm::sortDeviceDynamic(mLooperCandidates, mLooperCandidates + mMemory->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); } - auto comp = [](const MergeLooperParam& a, const MergeLooperParam& b) { return GPUCA_DETERMINISTIC_CODE(CAMath::Abs(a.refz) != CAMath::Abs(b.refz) ? CAMath::Abs(a.refz) < CAMath::Abs(b.refz) : a.id < b.id, CAMath::Abs(a.refz) < CAMath::Abs(b.refz)); }; - GPUCommonAlgorithm::sortDeviceDynamic(mLooperCandidates, mLooperCandidates + mMemory->nLooperMatchCandidates, comp); #endif } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 624c9ab487c8d..1e08058fb22dd 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -34,6 +34,18 @@ using namespace o2::tpc::constants; GPUdi() static constexpr uint8_t getFlagsReject() { return GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagNotFit; } GPUdi() static uint32_t getFlagsRequired(const GPUSettingsRec& rec) { return rec.tpc.dropSecondaryLegsInOutput ? gputpcgmmergertypes::attachGoodLeg : gputpcgmmergertypes::attachZero; } +namespace o2::gpu::internal +{ + +struct GPUTPCGMO2OutputSort_comp { + GPUd() bool operator()(const GPUTPCGMMerger::tmpSort& a, const GPUTPCGMMerger::tmpSort& b) + { + return GPUCA_DETERMINISTIC_CODE(a.y != b.y ? a.y > b.y : a.x > b.x, a.y > b.y); + } +}; + +} // namespace o2::gpu::internal + template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { @@ -84,12 +96,10 @@ template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread || iBlock) { - return; + if (iThread == 0 && iBlock == 0) { + GPUTPCGMMerger::tmpSort* GPUrestrict() trackSort = merger.TrackSortO2(); + GPUCommonAlgorithm::sortDeviceDynamic(trackSort, trackSort + merger.Memory()->nO2Tracks, internal::GPUTPCGMO2OutputSort_comp()); } - GPUTPCGMMerger::tmpSort* GPUrestrict() trackSort = merger.TrackSortO2(); - auto comp = [](const auto& a, const auto& b) { return GPUCA_DETERMINISTIC_CODE(a.y != b.y ? a.y > b.y : a.x > b.x, a.y > b.y); }; - GPUCommonAlgorithm::sortDeviceDynamic(trackSort, trackSort + merger.Memory()->nO2Tracks, comp); #endif } From f75693ddbe0b19eb445da5a9d9972f73fdd86b96 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 11:04:35 +0200 Subject: [PATCH 0018/1426] GPU: Remove obsolete files used for tests in Run 2 --- .../Merger/GPUTPCGMTracksToTPCSeeds.cxx | 149 ------------------ .../Merger/GPUTPCGMTracksToTPCSeeds.h | 29 ---- 2 files changed, 178 deletions(-) delete mode 100644 GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx delete mode 100644 GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.h diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx deleted file mode 100644 index ebc9d22560524..0000000000000 --- a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx +++ /dev/null @@ -1,149 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCGMTracksToTPCSeeds.cxx -/// \author David Rohr - -#include "GPUTPCGMTracksToTPCSeeds.h" -#include "GPUTPCGlobalMergerComponent.h" -#include "GPUTPCGMMergerTypes.h" -#include "GPUTPCGMMerger.h" -#include "GPULogging.h" -#include "AliTPCtracker.h" -#include "AliTPCtrack.h" -#include "AliTPCseed.h" -#include "AliTPCtrackerSector.h" -#include "TObjArray.h" -#include "AliTPCclusterMI.h" - -using namespace o2::gpu; - -void GPUTPCGMTracksToTPCSeeds::CreateSeedsFromHLTTracks(TObjArray* seeds, AliTPCtracker* tpctracker) -{ - const GPUTPCGMMerger* merger = GPUTPCGlobalMergerComponent::GetCurrentMerger(); - if (merger == nullptr) { - return; - } - seeds->Clear(); - int32_t index = 0; - for (int32_t i = 0; i < merger->NMergedTracks(); i++) { - const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; - if (!track.OK()) { - continue; - } - - AliTPCtrack tr; - tr.Set(track.GetParam().GetX(), track.GetAlpha(), track.GetParam().GetPar(), track.GetParam().GetCov()); - AliTPCseed* seed = new (tpctracker->NextFreeSeed()) AliTPCseed(tr); - for (int32_t j = 0; j < GPUCA_ROW_COUNT; j++) { - seed->SetClusterPointer(j, nullptr); - seed->SetClusterIndex(j, -1); - } - int32_t ncls = 0; - int32_t lastrow = -1; - int32_t lastleg = -1; - for (int32_t j = track.NClusters() - 1; j >= 0; j--) { - const GPUTPCGMMergedTrackHit& cls = merger->Clusters()[track.FirstClusterRef() + j]; - if (cls.state & GPUTPCGMMergedTrackHit::flagReject) { - continue; - } - if (lastrow != -1 && (cls.row < lastrow || cls.leg != lastleg)) { - break; - } - if (cls.row == lastrow) { - continue; - } - - AliTPCtrackerRow& row = tpctracker->GetRow(cls.sector % 18, cls.row); - uint32_t clIndexOffline = 0; - AliTPCclusterMI* clOffline = row.FindNearest2(cls.y, cls.z, 0.01f, 0.01f, clIndexOffline); - if (!clOffline) { - continue; - } - clIndexOffline = row.GetIndex(clIndexOffline); - - clOffline->Use(10); - seed->SetClusterPointer(cls.row, clOffline); - seed->SetClusterIndex2(cls.row, clIndexOffline); - - lastrow = cls.row; - lastleg = cls.leg; - ncls++; - } - - seed->SetRelativeSector(track.GetAlpha() / (M_PI / 9.f)); - seed->SetNumberOfClusters(ncls); - seed->SetNFoundable(ncls); - seed->SetChi2(track.GetParam().GetChi2()); - - float alpha = seed->GetAlpha(); - if (alpha >= 2.f * M_PI) { - alpha -= 2.f * M_PI; - } - if (alpha < 0) { - alpha += 2.f * M_PI; - } - seed->SetRelativeSector(track.GetAlpha() / (M_PI / 9.f)); - - seed->SetPoolID(tpctracker->GetLastSeedId()); - seed->SetIsSeeding(kTRUE); - seed->SetSeed1(GPUCA_ROW_COUNT - 1); - seed->SetSeed2(GPUCA_ROW_COUNT - 2); - seed->SetSeedType(0); - seed->SetFirstPoint(-1); - seed->SetLastPoint(-1); - seeds->AddLast(seed); // note, track is seed, don't free the seed - index++; - } -} - -void GPUTPCGMTracksToTPCSeeds::UpdateParamsOuter(TObjArray* seeds) -{ - const GPUTPCGMMerger* merger = GPUTPCGlobalMergerComponent::GetCurrentMerger(); - if (merger == nullptr) { - return; - } - int32_t index = 0; - for (int32_t i = 0; i < merger->NMergedTracks(); i++) { - const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; - if (!track.OK()) { - continue; - } - if (index > seeds->GetEntriesFast()) { - GPUError("Invalid number of offline seeds"); - return; - } - AliTPCseed* seed = (AliTPCseed*)seeds->UncheckedAt(index++); - const gputpcgmmergertypes::GPUTPCOuterParam& param = track.OuterParam(); - seed->Set(param.X, param.alpha, param.P, param.C); - } -} - -void GPUTPCGMTracksToTPCSeeds::UpdateParamsInner(TObjArray* seeds) -{ - const GPUTPCGMMerger* merger = GPUTPCGlobalMergerComponent::GetCurrentMerger(); - if (merger == nullptr) { - return; - } - int32_t index = 0; - for (int32_t i = 0; i < merger->NMergedTracks(); i++) { - const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; - if (!track.OK()) { - continue; - } - if (index > seeds->GetEntriesFast()) { - GPUError("Invalid number of offline seeds"); - return; - } - AliTPCseed* seed = (AliTPCseed*)seeds->UncheckedAt(index++); - seed->Set(track.GetParam().GetX(), track.GetAlpha(), track.GetParam().GetPar(), track.GetParam().GetCov()); - } -} diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.h b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.h deleted file mode 100644 index 029cb108d4119..0000000000000 --- a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCGMTracksToTPCSeeds.h -/// \author David Rohr - -#ifndef GPUTPCGMTRACKSTOTPCSEEDS_H -#define GPUTPCGMTRACKSTOTPCSEEDS_H - -class TObjArray; -class AliTPCtracker; - -class GPUTPCGMTracksToTPCSeeds -{ - public: - static void CreateSeedsFromHLTTracks(TObjArray* seeds, AliTPCtracker* tpctracker); - static void UpdateParamsOuter(TObjArray* seeds); - static void UpdateParamsInner(TObjArray* seeds); -}; - -#endif From 747fb860184729b2d219e7b0a044d09e15c7a1b5 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 11:04:56 +0200 Subject: [PATCH 0019/1426] GPU TPC: Some more member variable renaming --- ...GPUReconstructionCUDAKernelsSpecialize.inc | 4 +- .../Global/GPUChainTrackingMerger.cxx | 8 ++-- .../Global/GPUChainTrackingRefit.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 38 +++++++++---------- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 18 ++++----- GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 14 +++---- GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 10 ++--- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 12 +++--- .../Merger/GPUTPCGlobalDebugSortKernels.cxx | 10 ++--- 10 files changed, 59 insertions(+), 59 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc index 44cde3d4ac48a..85567d70d70d6 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc @@ -27,13 +27,13 @@ inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed template <> inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.MergedTracks())); } template <> inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.MergedTracks())); } template <> diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index df80eabfb8761..2b3d719a27dea 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -256,10 +256,10 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) throw std::runtime_error("QA Scratch buffer exceeded"); } } - GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracks(), MergerShadowAll.OutputTracks(), Merger.NMergedTracks() * sizeof(*Merger.OutputTracks()), outputStream, 0, nullptr, waitEvent); + GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracks(), MergerShadowAll.MergedTracks(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracks()), outputStream, 0, nullptr, waitEvent); waitEvent = nullptr; if (param().dodEdxEnabled) { - GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracksdEdx(), MergerShadowAll.OutputTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.OutputTracksdEdx()), outputStream, 0); + GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadowAll.MergedTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracksdEdx()), outputStream, 0); } GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0); if (param().par.earlyTpcTransform) { @@ -326,7 +326,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mRec->ReturnVolatileDeviceMemory(); } - mIOPtrs.mergedTracks = Merger.OutputTracks(); + mIOPtrs.mergedTracks = Merger.MergedTracks(); mIOPtrs.nMergedTracks = Merger.NMergedTracks(); mIOPtrs.mergedTrackHits = Merger.Clusters(); mIOPtrs.mergedTrackHitsXYZ = Merger.ClustersXYZ(); @@ -340,7 +340,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mIOPtrs.outputTracksTPCO2MC = Merger.OutputTracksTPCO2MC(); if (doGPU) { - processorsShadow()->ioPtrs.mergedTracks = MergerShadow.OutputTracks(); + processorsShadow()->ioPtrs.mergedTracks = MergerShadow.MergedTracks(); processorsShadow()->ioPtrs.nMergedTracks = Merger.NMergedTracks(); processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters(); processorsShadow()->ioPtrs.mergedTrackHitsXYZ = MergerShadow.ClustersXYZ(); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx index 4662b5464f710..5ca20a39d0462 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx @@ -33,7 +33,7 @@ int32_t GPUChainTracking::RunRefit() SetupGPUProcessor(&Refit, false); RefitShadow.SetPtrsFromGPUConstantMem(processorsShadow(), doGPU ? &processorsDevice()->param : nullptr); RefitShadow.SetPropagator(doGPU ? processorsShadow()->calibObjects.o2Propagator : GetO2Propagator()); - RefitShadow.mPTracks = (doGPU ? processorsShadow() : processors())->tpcMerger.OutputTracks(); + RefitShadow.mPTracks = (doGPU ? processorsShadow() : processors())->tpcMerger.MergedTracks(); WriteToConstantMemory(RecoStep::Refit, (char*)&processors()->trackingRefit - (char*)processors(), &RefitShadow, sizeof(RefitShadow), 0); // TransferMemoryResourcesToGPU(RecoStep::Refit, &Refit, 0); if (param().rec.trackingRefitGPUModel) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index b12375a10023a..f1a0816529c3a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -365,11 +365,11 @@ void* GPUTPCGMMerger::SetPointersRefitScratch(void* mem) void* GPUTPCGMMerger::SetPointersOutput(void* mem) { - computePointerWithAlignment(mem, mOutputTracks, mNMaxTracks); + computePointerWithAlignment(mem, mMergedTracks, mNMaxTracks); if (mRec->GetParam().dodEdxEnabled) { - computePointerWithAlignment(mem, mOutputTracksdEdx, mNMaxTracks); + computePointerWithAlignment(mem, mMergedTracksdEdx, mNMaxTracks); if (mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMask != mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMaskAlt) { - computePointerWithAlignment(mem, mOutputTracksdEdxAlt, mNMaxTracks); + computePointerWithAlignment(mem, mMergedTracksdEdxAlt, mNMaxTracks); } } computePointerWithAlignment(mem, mClusters, mNMaxOutputTrackClusters); @@ -1318,7 +1318,7 @@ GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const const float x0 = GPUTPCGeometry::Row2X(attempt == 0 ? 63 : cls.row); if (track->TransportToX(this, x0, Param().bzCLight, b, GPUCA_MAX_SIN_PHI_LOW)) { b.SetTrackID(itr); - b.SetNClusters(mOutputTracks[itr].NClusters()); + b.SetNClusters(mMergedTracks[itr].NClusters()); if (CAMath::Abs(b.Cov()[4]) >= 0.5f) { b.SetCov(4, 0.5f); // TODO: Is this needed and better than the cut in BorderTrack? } @@ -1339,11 +1339,11 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i { const ClusterNative* cls = Param().par.earlyTpcTransform ? nullptr : mConstantMem->ioPtrs.clustersNative->clustersLinear; for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { - if (mOutputTracks[i].CSide() == 0 && mTrackLinks[i] >= 0) { + if (mMergedTracks[i].CSide() == 0 && mTrackLinks[i] >= 0) { if (mTrackLinks[mTrackLinks[i]] != (int32_t)i) { continue; } - GPUTPCGMMergedTrack* trk[2] = {&mOutputTracks[i], &mOutputTracks[mTrackLinks[i]]}; + GPUTPCGMMergedTrack* trk[2] = {&mMergedTracks[i], &mMergedTracks[mTrackLinks[i]]}; if (!trk[1]->OK() || trk[1]->CCE()) { continue; @@ -1459,7 +1459,7 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i } } - // for (int32_t i = 0;i < mMemory->nMergedTracks;i++) {if (mOutputTracks[i].CCE() == false) {mOutputTracks[i].SetNClusters(0);mOutputTracks[i].SetOK(false);}} //Remove all non-CE tracks + // for (int32_t i = 0;i < mMemory->nMergedTracks;i++) {if (mMergedTracks[i].CCE() == false) {mMergedTracks[i].SetNClusters(0);mMergedTracks[i].SetOK(false);}} //Remove all non-CE tracks } namespace o2::gpu::internal @@ -1752,7 +1752,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread continue; } - GPUTPCGMMergedTrack& mergedTrack = mOutputTracks[iOutputTrack]; + GPUTPCGMMergedTrack& mergedTrack = mMergedTracks[iOutputTrack]; mergedTrack.SetFlags(0); mergedTrack.SetOK(1); @@ -1825,7 +1825,7 @@ GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_ { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS if (iThread == 0 && iBlock == 0) { - GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nMergedTracks, GPUTPCGMMergerSortTracks_comp(mOutputTracks)); + GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nMergedTracks, GPUTPCGMMergerSortTracks_comp(mMergedTracks)); } #endif } @@ -1834,7 +1834,7 @@ GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS if (iThread == 0 && iBlock == 0) { - GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nMergedTracks, GPUTPCGMMergerSortTracksQPt_comp(mOutputTracks)); + GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nMergedTracks, GPUTPCGMMergerSortTracksQPt_comp(mMergedTracks)); } #endif } @@ -1843,7 +1843,7 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit1(int32_t nBlocks, int32_t nThr { for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nBlocks * nThreads) { mTrackOrderAttach[mTrackSort[i]] = i; - const GPUTPCGMMergedTrack& trk = mOutputTracks[i]; + const GPUTPCGMMergedTrack& trk = mMergedTracks[i]; if (trk.OK()) { for (uint32_t j = 0; j < trk.NClusters(); j++) { mClusterAttachment[mClusters[trk.FirstClusterRef() + j].num] = attachAttached | attachGood; @@ -1884,7 +1884,7 @@ GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t GPUd() void GPUTPCGMMerger::Finalize1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { - const GPUTPCGMMergedTrack& trk = mOutputTracks[i]; + const GPUTPCGMMergedTrack& trk = mMergedTracks[i]; if (!trk.OK() || trk.NClusters() == 0) { continue; } @@ -1919,7 +1919,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, { const float lowPtThresh = Param().rec.tpc.rejectQPtB5 * 1.1f; // Might need to merge tracks above the threshold with parts below the threshold for (uint32_t i = get_global_id(0); i < mMemory->nMergedTracks; i += get_global_size(0)) { - const auto& trk = mOutputTracks[i]; + const auto& trk = mMergedTracks[i]; const auto& p = trk.GetParam(); const float qptabs = CAMath::Abs(p.GetQPt()); if (trk.NClusters() && qptabs * Param().qptB5Scaler > 5.f && qptabs * Param().qptB5Scaler <= lowPtThresh) { @@ -1983,7 +1983,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, #if GPUCA_MERGE_LOOPER_MC && !defined(GPUCA_GPUCODE) std::vector paramLabels(mMemory->nLooperMatchCandidates); for (uint32_t i = 0; i < mMemory->nLooperMatchCandidates; i++) { - paramLabels[i] = GetTrackLabel(mOutputTracks[params[i].id]); + paramLabels[i] = GetTrackLabel(mMergedTracks[params[i].id]); } /*std::vector dropped(mMemory->nLooperMatchCandidates); std::vector droppedMC(mMemory->nLooperMatchCandidates); @@ -2005,8 +2005,8 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, // bs |= 1; continue; } - const auto& trk1 = mOutputTracks[params[i].id]; - const auto& trk2 = mOutputTracks[params[j].id]; + const auto& trk1 = mMergedTracks[params[i].id]; + const auto& trk2 = mMergedTracks[params[j].id]; const auto& param1 = trk1.GetParam(); const auto& param2 = trk2.GetParam(); if (CAMath::Abs(param1.GetDzDs()) > 0.03f && CAMath::Abs(param2.GetDzDs()) > 0.03f && param1.GetDzDs() * param2.GetDzDs() * param1.GetQPt() * param2.GetQPt() < 0) { @@ -2045,7 +2045,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, const int64_t label2 = paramLabels[j]; bool labelEQ = label1 != -1 && label1 == label2; if (1 || EQ || labelEQ) { - // printf("Matching track %d/%d %u-%u (%ld/%ld): dist %f side %d %d, tgl %f %f, qpt %f %f, x %f %f, y %f %f\n", (int32_t)EQ, (int32_t)labelEQ, i, j, label1, label2, d, (int32_t)mOutputTracks[params[i].id].CSide(), (int32_t)mOutputTracks[params[j].id].CSide(), params[i].tgl, params[j].tgl, params[i].qpt, params[j].qpt, params[i].x, params[j].x, params[i].y, params[j].y); + // printf("Matching track %d/%d %u-%u (%ld/%ld): dist %f side %d %d, tgl %f %f, qpt %f %f, x %f %f, y %f %f\n", (int32_t)EQ, (int32_t)labelEQ, i, j, label1, label2, d, (int32_t)mMergedTracks[params[i].id].CSide(), (int32_t)mMergedTracks[params[j].id].CSide(), params[i].tgl, params[j].tgl, params[i].qpt, params[j].qpt, params[i].x, params[j].x, params[i].y, params[j].y); static auto& tup = GPUROOTDump::get("mergeloopers", "labeleq:sides:d2xy:tgl1:tgl2:qpt1:qpt2:dz:dzcorr:dtgl:dqpt:dznorm:bs"); tup.Fill((float)labelEQ, (trk1.CSide() ? 1 : 0) | (trk2.CSide() ? 2 : 0), d2xy, param1.GetDzDs(), param2.GetDzDs(), param1.GetQPt(), param2.GetQPt(), CAMath::Abs(params[j].refz) - CAMath::Abs(params[i].refz), dzcorr, dtgl, dqpt, dznorm, bs); static auto tup2 = GPUROOTDump::getNew("mergeloopers2", "labeleq:refz1:refz2:tgl1:tgl2:qpt1:qpt2:snp1:snp2:a1:a2:dzn:phasecor:phasedir:dzcorr"); @@ -2063,9 +2063,9 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, }*/ #endif if (EQ) { - mOutputTracks[params[j].id].SetMergedLooper(true); + mMergedTracks[params[j].id].SetMergedLooper(true); if (CAMath::Abs(param2.GetQPt() * Param().qptB5Scaler) >= Param().rec.tpc.rejectQPtB5) { - mOutputTracks[params[i].id].SetMergedLooper(true); + mMergedTracks[params[i].id].SetMergedLooper(true); } } } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index ae85f20b17b48..4487b6d937dc2 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -104,12 +104,12 @@ class GPUTPCGMMerger : public GPUProcessor void* SetPointersMemory(void* mem); GPUhdi() int32_t NMergedTracks() const { return mMemory->nMergedTracks; } - GPUhdi() const GPUTPCGMMergedTrack* OutputTracks() const { return mOutputTracks; } - GPUhdi() GPUTPCGMMergedTrack* OutputTracks() { return mOutputTracks; } - GPUhdi() const GPUdEdxInfo* OutputTracksdEdx() const { return mOutputTracksdEdx; } - GPUhdi() GPUdEdxInfo* OutputTracksdEdx() { return mOutputTracksdEdx; } - GPUhdi() const GPUdEdxInfo* OutputTracksdEdxAlt() const { return mOutputTracksdEdxAlt; } - GPUhdi() GPUdEdxInfo* OutputTracksdEdxAlt() { return mOutputTracksdEdxAlt; } + GPUhdi() const GPUTPCGMMergedTrack* MergedTracks() const { return mMergedTracks; } + GPUhdi() GPUTPCGMMergedTrack* MergedTracks() { return mMergedTracks; } + GPUhdi() const GPUdEdxInfo* MergedTracksdEdx() const { return mMergedTracksdEdx; } + GPUhdi() GPUdEdxInfo* MergedTracksdEdx() { return mMergedTracksdEdx; } + GPUhdi() const GPUdEdxInfo* MergedTracksdEdxAlt() const { return mMergedTracksdEdxAlt; } + GPUhdi() GPUdEdxInfo* MergedTracksdEdxAlt() { return mMergedTracksdEdxAlt; } GPUhdi() uint32_t NClusters() const { return mNClusters; } GPUhdi() uint32_t NMaxClusters() const { return mNMaxClusters; } GPUhdi() uint32_t NMaxTracks() const { return mNMaxTracks; } @@ -262,9 +262,9 @@ class GPUTPCGMMerger : public GPUProcessor uint16_t mMemoryResOutputO2Scratch = (uint16_t)-1; int32_t mNClusters = 0; // Total number of incoming clusters (from sector tracks) - GPUTPCGMMergedTrack* mOutputTracks = nullptr; //* array of output merged tracks - GPUdEdxInfo* mOutputTracksdEdx = nullptr; //* dEdx information - GPUdEdxInfo* mOutputTracksdEdxAlt = nullptr; //* dEdx alternative information + GPUTPCGMMergedTrack* mMergedTracks = nullptr; //* array of output merged tracks + GPUdEdxInfo* mMergedTracksdEdx = nullptr; //* dEdx information + GPUdEdxInfo* mMergedTracksdEdxAlt = nullptr; //* dEdx alternative information GPUTPCGMSectorTrack* mSectorTrackInfos = nullptr; //* additional information for sector tracks int32_t* mSectorTrackInfoIndex = nullptr; GPUTPCGMMergedTrackHit* mClusters = nullptr; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index 3be32a2d87610..9c924e74ec519 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -139,7 +139,7 @@ void GPUTPCGMMerger::DumpCollected(std::ostream& out) const out << std::setprecision(2); out << "\nTPC Merger Collected Tracks\n"; for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { - const auto& trk = mOutputTracks[i]; + const auto& trk = mMergedTracks[i]; const auto& p = trk.GetParam(); out << " Track " << i << ": Loop " << trk.Looper() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << "\n"; } @@ -151,7 +151,7 @@ void GPUTPCGMMerger::DumpMergeCE(std::ostream& out) const DumpTrackLinks(out, true, " for CE merging"); out << "\nTPC Merger Merge CE\n"; for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { - const auto& trk = mOutputTracks[i]; + const auto& trk = mMergedTracks[i]; if (trk.CCE()) { out << " Track " << i << ": CCE\n"; } @@ -167,7 +167,7 @@ void GPUTPCGMMerger::DumpFitPrepare(std::ostream& out) const } out << " Clusters\n"; for (uint32_t j = 0; j < mMemory->nMergedTracks; j++) { - const auto& trk = mOutputTracks[j]; + const auto& trk = mMergedTracks[j]; out << " Track " << j << ": "; for (uint32_t i = trk.FirstClusterRef(); i < trk.FirstClusterRef() + trk.NClusters(); i++) { out << j << "/" << (i - trk.FirstClusterRef()) << ": " << mClusters[i].num << "/" << (int32_t)mClusters[i].state << ", "; @@ -196,14 +196,14 @@ void GPUTPCGMMerger::DumpRefit(std::ostream& out) const out << std::setprecision(2); out << "\nTPC Merger Refit\n"; for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { - const auto& trk = mOutputTracks[i]; + const auto& trk = mMergedTracks[i]; if (trk.NClusters() == 0) { continue; } const auto& p = trk.GetParam(); const auto& po = trk.OuterParam(); out << " Track " << i << ": OK " << trk.OK() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << " / " << trk.NClustersFitted() << " Cov " << p.GetErr2Y() << "/" << p.GetErr2Z() - << " dEdx " << (trk.OK() && Param().dodEdxEnabled ? mOutputTracksdEdx[i].dEdxTotTPC : -1.f) << "/" << (trk.OK() && Param().dodEdxEnabled ? mOutputTracksdEdx[i].dEdxMaxTPC : -1.f) + << " dEdx " << (trk.OK() && Param().dodEdxEnabled ? mMergedTracksdEdx[i].dEdxTotTPC : -1.f) << "/" << (trk.OK() && Param().dodEdxEnabled ? mMergedTracksdEdx[i].dEdxMaxTPC : -1.f) << " Outer " << po.P[0] << "/" << po.P[1] << "/" << po.P[2] << "/" << po.P[3] << "/" << po.P[4] << " NFitted " << trk.NClustersFitted() << " legs " << (int)trk.Legs() << " flags " << (int)trk.Flags() << "\n"; } @@ -217,7 +217,7 @@ void GPUTPCGMMerger::DumpLoopers(std::ostream& out) const if (i && i % 100 == 0) { out << "\n"; } - out << (int)mOutputTracks[i].MergedLooper() << " "; + out << (int)mMergedTracks[i].MergedLooper() << " "; } out << "\n"; } @@ -226,7 +226,7 @@ void GPUTPCGMMerger::DumpFinal(std::ostream& out) const { out << "\nTPC Merger Finalized\n"; for (uint32_t j = 0; j < mMemory->nMergedTracks; j++) { - const auto& trk = mOutputTracks[j]; + const auto& trk = mMergedTracks[j]; if (trk.NClusters() == 0) { continue; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx index 68763b3549547..1631777d80482 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx @@ -24,7 +24,7 @@ GPUdii() void GPUTPCGMMergerTrackFit::Thread<0>(int32_t nBlocks, int32_t nThread const int32_t iEnd = mode == -1 ? merger.Memory()->nRetryRefit : merger.NMergedTracks(); GPUCA_TBB_KERNEL_LOOP(merger.GetRec(), int32_t, ii, iEnd, { const int32_t i = mode == -1 ? merger.RetryRefitIds()[ii] : mode ? merger.TrackOrderProcess()[ii] : ii; - GPUTPCGMTrackParam::RefitTrack(merger.OutputTracks()[i], i, &merger, mode == -1); + GPUTPCGMTrackParam::RefitTrack(merger.MergedTracks()[i], i, &merger, mode == -1); }); } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 1e08058fb22dd..eb22ca49e9242 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -49,10 +49,10 @@ struct GPUTPCGMO2OutputSort_comp { template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { - const GPUTPCGMMergedTrack* tracks = merger.OutputTracks(); + const GPUTPCGMMergedTrack* tracks = merger.MergedTracks(); const uint32_t nTracks = merger.NMergedTracks(); const GPUTPCGMMergedTrackHit* trackClusters = merger.Clusters(); - const GPUdEdxInfo* tracksdEdx = merger.OutputTracksdEdx(); + const GPUdEdxInfo* tracksdEdx = merger.MergedTracksdEdx(); constexpr uint8_t flagsReject = getFlagsReject(); const uint32_t flagsRequired = getFlagsRequired(merger.Param().rec); @@ -107,9 +107,9 @@ template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { constexpr float MinDelta = 0.1f; - const GPUTPCGMMergedTrack* tracks = merger.OutputTracks(); - GPUdEdxInfo* tracksdEdx = merger.OutputTracksdEdx(); - GPUdEdxInfo* tracksdEdxAlt = merger.OutputTracksdEdxAlt(); + const GPUTPCGMMergedTrack* tracks = merger.MergedTracks(); + GPUdEdxInfo* tracksdEdx = merger.MergedTracksdEdx(); + GPUdEdxInfo* tracksdEdxAlt = merger.MergedTracksdEdxAlt(); const int32_t nTracks = merger.NOutputTracksTPCO2(); const GPUTPCGMMergedTrackHit* trackClusters = merger.Clusters(); constexpr uint8_t flagsReject = getFlagsReject(); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 366f75cb05e56..4b616fce83f5f 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -94,10 +94,10 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ storeOuter = 0; if (iWay == nWays - 1) { StoreOuter(outerParam, prop, 0); - if (merger->OutputTracks()[iTrk].Looper()) { + if (merger->MergedTracks()[iTrk].Looper()) { storeOuter = 1; } - } else if (iWay == nWays - 2 && merger->OutputTracks()[iTrk].Looper()) { + } else if (iWay == nWays - 2 && merger->MergedTracks()[iTrk].Looper()) { storeOuter = 2; } } @@ -435,9 +435,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ // TODO: we have looping tracks here with 0 accepted clusters in the primary leg. In that case we should refit the track using only the primary leg. if (param.par.dodEdx && param.dodEdxEnabled) { - dEdx.computedEdx(merger->OutputTracksdEdx()[iTrk], param); + dEdx.computedEdx(merger->MergedTracksdEdx()[iTrk], param); if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { - dEdxAlt.computedEdx(merger->OutputTracksdEdxAlt()[iTrk], param); + dEdxAlt.computedEdx(merger->MergedTracksdEdxAlt()[iTrk], param); } } Alpha = prop.GetAlpha(); @@ -596,7 +596,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric return -1e6f; } - const float zOffset = Merger->Param().par.earlyTpcTransform ? ((Merger->OutputTracks()[iTrack].CSide() ^ (sector >= 18)) ? -mTZOffset : mTZOffset) : Merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, mTZOffset, Merger->Param().continuousMaxTimeBin); + const float zOffset = Merger->Param().par.earlyTpcTransform ? ((Merger->MergedTracks()[iTrack].CSide() ^ (sector >= 18)) ? -mTZOffset : mTZOffset) : Merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, mTZOffset, Merger->Param().continuousMaxTimeBin); const float y0 = row.Grid().YMin(); const float stepY = row.HstepY(); const float z0 = row.Grid().ZMin() - zOffset; // We can use our own ZOffset, since this is only used temporarily anyway @@ -1136,7 +1136,7 @@ GPUd() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() tr t.QPt() = 1.e-4f; } - CADEBUG(if (t.GetX() > 250) { printf("ERROR, Track %d at impossible X %f, Pt %f, Looper %d\n", iTrk, t.GetX(), CAMath::Abs(1.f / t.QPt()), (int32_t)merger->OutputTracks()[iTrk].Looper()); }); + CADEBUG(if (t.GetX() > 250) { printf("ERROR, Track %d at impossible X %f, Pt %f, Looper %d\n", iTrk, t.GetX(), CAMath::Abs(1.f / t.QPt()), (int32_t)merger->MergedTracks()[iTrk].Looper()); }); track.SetOK(ok); track.SetNClustersFitted(nTrackHits); diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx index e63bb82a9b09e..5af3ebb51b9d6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx @@ -105,8 +105,8 @@ GPUdii() void GPUTPCGlobalDebugSortKernels::Thread= 0) { int32_t firstIdx = j; - auto firstItem = merger.OutputTracks()[firstIdx]; + auto firstItem = merger.MergedTracks()[firstIdx]; int32_t currIdx = firstIdx; int32_t sourceIdx = tmp[currIdx]; tmp2[sourceIdx] = currIdx; do { tmp[currIdx] = -1; - merger.OutputTracks()[currIdx] = merger.OutputTracks()[sourceIdx]; + merger.MergedTracks()[currIdx] = merger.MergedTracks()[sourceIdx]; currIdx = sourceIdx; sourceIdx = tmp[currIdx]; tmp2[sourceIdx] = currIdx; } while (sourceIdx != firstIdx); tmp[currIdx] = -1; - merger.OutputTracks()[currIdx] = firstItem; + merger.MergedTracks()[currIdx] = firstItem; } } } From b0414adf124ee153273fd9fefc7080690119eb89 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 13 May 2025 23:00:50 +0200 Subject: [PATCH 0020/1426] Out of line FairTBuffer destructor (#14265) --- Framework/Core/include/Framework/TMessageSerializer.h | 3 +++ Framework/Core/src/TMessageSerializer.cxx | 3 +++ 2 files changed, 6 insertions(+) diff --git a/Framework/Core/include/Framework/TMessageSerializer.h b/Framework/Core/include/Framework/TMessageSerializer.h index 769d23a7a3427..b6e355638c1e3 100644 --- a/Framework/Core/include/Framework/TMessageSerializer.h +++ b/Framework/Core/include/Framework/TMessageSerializer.h @@ -40,6 +40,8 @@ class FairOutputTBuffer : public TBufferFile : TBufferFile(TBuffer::kWrite, msg.GetSize() - sizeof(char*), embedInItself(msg), false, fairMQrealloc) { } + + ~FairOutputTBuffer() override; // Helper function to keep track of the FairMQ message that holds the data // in the data itself. We can use this to make sure the message can be reallocated // even if we simply have a pointer to the data. Hopefully ROOT will not play dirty @@ -60,6 +62,7 @@ class FairInputTBuffer : public TBufferFile : TBufferFile(TBuffer::kRead, size - sizeof(char*), data + sizeof(char*), false, nullptr) { } + ~FairInputTBuffer() override; }; struct TMessageSerializer { diff --git a/Framework/Core/src/TMessageSerializer.cxx b/Framework/Core/src/TMessageSerializer.cxx index c5da4cc576242..81a1c6e537d09 100644 --- a/Framework/Core/src/TMessageSerializer.cxx +++ b/Framework/Core/src/TMessageSerializer.cxx @@ -15,6 +15,9 @@ using namespace o2::framework; +FairOutputTBuffer::~FairOutputTBuffer() = default; +FairInputTBuffer::~FairInputTBuffer() = default; + void* FairOutputTBuffer::embedInItself(fair::mq::Message& msg) { // The first bytes of the message are used to store the pointer to the message itself From f4a478c778dc6b2672f71096f7915c10d18543d2 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 13 May 2025 23:02:37 +0200 Subject: [PATCH 0021/1426] DPL: keep codechecker happy (#14270) --- Detectors/CTP/reconstruction/src/RawDataDecoder.cxx | 6 ++++-- Detectors/CTP/workflow/src/RawDecoderSpec.cxx | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx index 74bd08ce943ee..b216f5ec54570 100644 --- a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx +++ b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx @@ -615,8 +615,9 @@ int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, continue; } mClassCountersA[i]++; - if (cls->descriptor == nullptr) + if (cls->descriptor == nullptr) { continue; + } uint64_t clsinpmask = cls->descriptor->getInputsMask(); uint64_t diginpmask = digit.CTPInputMask.to_ullong(); if (!((clsinpmask & diginpmask) == clsinpmask)) { @@ -632,8 +633,9 @@ int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, // if inps => class mask for (auto const& cls : mCTPConfig.getCTPClasses()) { // cls.printStream(std::cout); - if (cls.descriptor == nullptr) + if (cls.descriptor == nullptr) { continue; + } uint64_t clsinpmask = cls.descriptor->getInputsMask(); // class definition uint64_t diginpmask = digit.CTPInputMask.to_ullong(); uint64_t digclsmask = digit.CTPClassMask.to_ullong(); diff --git a/Detectors/CTP/workflow/src/RawDecoderSpec.cxx b/Detectors/CTP/workflow/src/RawDecoderSpec.cxx index 3f7c729b351a3..2df6bc981ce44 100644 --- a/Detectors/CTP/workflow/src/RawDecoderSpec.cxx +++ b/Detectors/CTP/workflow/src/RawDecoderSpec.cxx @@ -71,8 +71,9 @@ void RawDecoderSpec::endOfStream(framework::EndOfStreamContext& ec) std::cout << std::endl; LOG(info) << " Lost due to the shift:" << mDecoder.getLostDueToShift(); LOG(info) << "Number of missing TF:" << nmiss << std::endl; - if (mDecoder.getErrorIR() || mDecoder.getErrorTCR()) + if (mDecoder.getErrorIR() || mDecoder.getErrorTCR()) { LOG(error) << "# of IR errors:" << mDecoder.getErrorIR() << " TCR errors:" << mDecoder.getErrorTCR() << std::endl; + } std::array clsA = mDecoder.getClassCountersA(); std::array clsB = mDecoder.getClassCountersB(); std::array clsEA = mDecoder.getClassErrorsA(); @@ -80,8 +81,9 @@ void RawDecoderSpec::endOfStream(framework::EndOfStreamContext& ec) for (int i = 0; i < o2::ctp::CTP_NCLASSES; i++) { bool print = clsA[i] > 0 || clsB[i] > 0 || clsEA[i] > 0 || clsEB[i] > 0; - if (clsEA[i]) + if (clsEA[i]) { LOG(error) << " Class without inputs:"; + } LOG(important) << "CLASS:" << i << " Cls=>Inp:" << clsA[i] << " Inp=>Cls:" << clsB[i] << " ErrorsCls=>Inps:" << clsEA[i] << " MissingInps=>Cls:" << clsEB[i]; } } From 17345d60e5a63f085e86d1064c315d6f88c326b3 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 14 May 2025 19:29:47 +0200 Subject: [PATCH 0022/1426] DPL: Out of line NumericBuilders (#14273) --- Framework/Core/include/Framework/TableBuilder.h | 6 ++++++ Framework/Core/src/TableBuilder.cxx | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/Framework/Core/include/Framework/TableBuilder.h b/Framework/Core/include/Framework/TableBuilder.h index 936a8a04d5a5a..8d7601cefc634 100644 --- a/Framework/Core/include/Framework/TableBuilder.h +++ b/Framework/Core/include/Framework/TableBuilder.h @@ -48,6 +48,12 @@ struct BulkInfo { size_t size; }; +extern template class arrow::NumericBuilder; +extern template class arrow::NumericBuilder; +extern template class arrow::NumericBuilder; +extern template class arrow::NumericBuilder; +extern template class arrow::NumericBuilder; + namespace o2::framework { namespace detail diff --git a/Framework/Core/src/TableBuilder.cxx b/Framework/Core/src/TableBuilder.cxx index eb19f8d3fe642..2169722efa9da 100644 --- a/Framework/Core/src/TableBuilder.cxx +++ b/Framework/Core/src/TableBuilder.cxx @@ -131,3 +131,9 @@ std::shared_ptr spawnerHelper(std::shared_ptr const& } } // namespace o2::framework + +template class arrow::NumericBuilder; +template class arrow::NumericBuilder; +template class arrow::NumericBuilder; +template class arrow::NumericBuilder; +template class arrow::NumericBuilder; From f44f2362e789b6e3a43214b5a0f48ba9c40838f7 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Wed, 14 May 2025 14:32:37 +0200 Subject: [PATCH 0023/1426] Update RecoContainer.cxx --- .../Detectors/GlobalTracking/src/RecoContainer.cxx | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx b/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx index d4b4e2b89cbb0..dd206ffe3b70d 100644 --- a/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx +++ b/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx @@ -1440,8 +1440,7 @@ RecoContainer::GlobalIDSet RecoContainer::getSingleDetectorRefs(GTrackID gidx) c table[GTrackID::TRD] = parent0.getTrackRef(); // there is no standalone TRD track, so use the index for the ITSTPCTRD track array } else if (src == GTrackID::TPCTRDTOF) { const auto& parent0 = getTOFMatch(gidx); // TPCTRD : TOF - const auto& parent1 = getITSTPCTRDTrack(parent0.getTrackRef()); - const auto& parent2 = getTPCITSTrack(parent1.getRefGlobalTrackId()); + const auto& parent1 = getTPCTRDTrack(parent0.getTrackRef()); table[GTrackID::TPCTRD] = parent0.getTrackRef(); table[GTrackID::TPC] = parent1.getRefGlobalTrackId(); table[GTrackID::TOF] = {unsigned(parent0.getIdxTOFCl()), GTrackID::TOF}; @@ -1547,8 +1546,6 @@ const o2::dataformats::MCTruthContainer* RecoContainer::getE void RecoContainer::getTrackTimeITSTPCTRDTOF(GTrackID gid, float& t, float& tErr) const { const auto& match = getITSTPCTRDTOFMatches()[gid]; - auto gidx = match.getTrackRef(); // this should be corresponding ITS-TPC-TRD track - // const auto& tofCl = getTOFClusters()[match.getTOFClIndex()]; t = (match.getSignal() - match.getLTIntegralOut().getTOF(o2::track::PID::Pion)) * PS2MUS; // tof time in \mus, FIXME: account for time of flight to R TOF tErr = 0.010f; } @@ -1557,8 +1554,6 @@ void RecoContainer::getTrackTimeITSTPCTRDTOF(GTrackID gid, float& t, float& tErr void RecoContainer::getTrackTimeTPCTRDTOF(GTrackID gid, float& t, float& tErr) const { const auto& match = getTPCTRDTOFMatches()[gid]; - auto gidx = match.getTrackRef(); // this should be corresponding ITS-TPC-TRD track - // const auto& tofCl = getTOFClusters()[match.getTOFClIndex()]; t = (match.getSignal() - match.getLTIntegralOut().getTOF(o2::track::PID::Pion)) * PS2MUS; // tof time in \mus, FIXME: account for time of flight to R TOF tErr = 0.010f; } @@ -1567,8 +1562,6 @@ void RecoContainer::getTrackTimeTPCTRDTOF(GTrackID gid, float& t, float& tErr) c void RecoContainer::getTrackTimeITSTPCTOF(GTrackID gid, float& t, float& tErr) const { const auto& match = getITSTPCTOFMatches()[gid]; - auto gidx = match.getTrackRef(); // this should be corresponding ITS-TPC track - // const auto& tofCl = getTOFClusters()[match.getTOFClIndex()]; t = (match.getSignal() - match.getLTIntegralOut().getTOF(o2::track::PID::Pion)) * PS2MUS; // tof time in \mus, FIXME: account for time of flight to R TOF tErr = 0.010f; } From 07096be128091de462d688c88e5f4cf0f5866729 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 15:56:55 +0200 Subject: [PATCH 0024/1426] GPU: Add some more type trait templates for GPU code --- GPU/Common/GPUCommonTypeTraits.h | 44 +++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/GPU/Common/GPUCommonTypeTraits.h b/GPU/Common/GPUCommonTypeTraits.h index 6d72565d1f1fb..f837019c11875 100644 --- a/GPU/Common/GPUCommonTypeTraits.h +++ b/GPU/Common/GPUCommonTypeTraits.h @@ -22,7 +22,7 @@ #include #endif #else -// We just reimplement some type traits in std for the GPU +// We just reimplement some type traits in std for the GPU // TODO: Check if meanwhile we can get rid of GPUCommonTypeTraits and GPUCommonArray, and just use the std headers. namespace std { template @@ -35,6 +35,7 @@ struct conditional { }; template using contitional_t = typename conditional::type; + template struct is_same { static constexpr bool value = false; @@ -45,6 +46,7 @@ struct is_same { }; template static constexpr bool is_same_v = is_same::value; + template struct enable_if { }; @@ -52,6 +54,7 @@ template struct enable_if { typedef T type; }; + template struct remove_cv { typedef T type; @@ -68,6 +71,9 @@ template struct remove_cv { typedef T type; }; +template +using remove_cv_t = typename remove_cv::type; + template struct remove_const { typedef T type; @@ -76,6 +82,9 @@ template struct remove_const { typedef T type; }; +template +using remove_const_t = typename remove_const::type; + template struct remove_volatile { typedef T type; @@ -84,6 +93,9 @@ template struct remove_volatile { typedef T type; }; +template +using remove_volatile_t = typename remove_volatile::type; + template struct is_pointer_t { static constexpr bool value = false; @@ -95,6 +107,36 @@ struct is_pointer_t { template struct is_pointer : is_pointer_t::type> { }; + +template +struct remove_reference { + typedef T type; +}; +template +struct remove_reference { + typedef T type; +}; +template +struct remove_reference { + typedef T type; +}; +template +using remove_reference_t = typename remove_reference::type; + +template +struct is_member_pointer_helper { + static constexpr bool value = false; +}; +template +struct is_member_pointer_helper { + static constexpr bool value = true; +}; +template +struct is_member_pointer : is_member_pointer_helper::type> { +}; +template +static constexpr bool is_member_pointer_v = is_member_pointer::value; + } // namespace std #endif From 4654958fe006df87ce60aaf48d61184db85e76d3 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 15:57:10 +0200 Subject: [PATCH 0025/1426] Revert "GPU: Workaround for Clang Frontend issue" This reverts commit 89b35ba2d75113e60b2045ed01e169b28d860a07. --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 4 ---- GPU/GPUTracking/Base/hip/CMakeLists.txt | 5 ----- 2 files changed, 9 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index 67ad608c13417..acc77648d954b 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -38,11 +38,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") + std::string(GetProcessingSettings().rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + -#ifndef GPUCA_HIP_WORKAROUND_CONSTEXPR // TODO: Fixme, once we have C++ P2280R4 in Clang std::string(GetProcessingSettings().rtc.optConstexpr ? "#define GPUCA_RTC_CONSTEXPR constexpr\n" : "#define GPUCA_RTC_CONSTEXPR\n") + -#else - std::string("#define GPUCA_RTC_CONSTEXPR\n") + -#endif GPUParamRTC::generateRTCCode(param(), GetProcessingSettings().rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index c89ef1769ad81..6eded3499e46e 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -270,8 +270,3 @@ add_dependencies(GPUTrackingHIPExternalProvider O2::GPUTracking) # must not depe if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") add_dependencies(GPUTrackingHIPExternalProvider ${MODULE}_HIPIFIED) endif() - -set_source_files_properties("${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx" -TARGET_DIRECTORY O2::GPUTrackingHIP -PROPERTIES -COMPILE_DEFINITIONS "GPUCA_HIP_WORKAROUND_CONSTEXPR") From 46ef93fdb9436f1b1bcebd01a3458235ed918c80 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 18:52:29 +0200 Subject: [PATCH 0026/1426] GPU: Template workaround to get static constexpr values as constexpr from references --- GPU/GPUTracking/Definitions/GPUGetConstexpr.h | 67 +++++++++++++++++++ GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 5 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 11 +-- 3 files changed, 76 insertions(+), 7 deletions(-) create mode 100644 GPU/GPUTracking/Definitions/GPUGetConstexpr.h diff --git a/GPU/GPUTracking/Definitions/GPUGetConstexpr.h b/GPU/GPUTracking/Definitions/GPUGetConstexpr.h new file mode 100644 index 0000000000000..8001b4e98c83f --- /dev/null +++ b/GPU/GPUTracking/Definitions/GPUGetConstexpr.h @@ -0,0 +1,67 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUGetConstexpr.h +/// \author David Rohr + +#ifndef GPUGETCONSTEXPR_H +#define GPUGETCONSTEXPR_H + +#include "GPUCommonDef.h" +#include "GPUCommonTypeTraits.h" + +// This is a temporary workaround required for clang (with c++20), until we can go to C++23 with P2280R4, which allows getting constexpr static values from references + +#if defined(__clang__) && __cplusplus >= 202002L && __cplusplus < 202302L + +namespace o2::gpu::internal +{ + +#define GPUCA_GET_CONSTEXPR(obj, val) ( \ + std::is_member_pointer_v::val)> ? o2::gpu::internal::getConstexpr(&std::remove_reference_t::val, o2::gpu::internal::getConstexprHelper::val), decltype(&obj)>(&obj).value) : o2::gpu::internal::getConstexpr(&std::remove_reference_t::val, o2::gpu::internal::getConstexprHelper::val), decltype(&obj)>().value)) + +template +struct getConstexprHelper; + +template + requires(!std::is_member_pointer_v) +struct getConstexprHelper { + GPUdi() constexpr getConstexprHelper(const void* = nullptr) {} + static constexpr const void* value = nullptr; +}; + +template + requires(std::is_member_pointer_v) +struct getConstexprHelper { + GPUdi() constexpr getConstexprHelper(const S& v) : value(v) {} + GPUdDefault() constexpr getConstexprHelper() = default; + const S value = nullptr; +}; + +GPUdi() constexpr auto getConstexpr(const auto* v, const void* = nullptr) +{ + return *v; +} + +GPUdi() constexpr auto getConstexpr(const auto v, const auto w) +{ + return w->*v; +} + +} // namespace o2::gpu::internal + +#else // __clang__ + +#define GPUCA_GET_CONSTEXPR(obj, val) (obj).val + +#endif + +#endif // GPUGETCONSTEXPR_H diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index eb22ca49e9242..7bb28a9f22e31 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -20,6 +20,7 @@ #include "DataFormatsTPC/PIDResponse.h" #include "TPCFastTransform.h" #include "CorrectionMapsHelper.h" +#include "GPUGetConstexpr.h" #ifndef GPUCA_GPUCODE #include "SimulationDataFormat/ConstMCTruthContainer.h" @@ -141,10 +142,10 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks oTrack.setChi2(tracks[i].GetParam().GetChi2()); auto& outerPar = tracks[i].OuterParam(); - if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { if (param.dodEdxEnabled) { oTrack.setdEdx(tracksdEdx[i]); - if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { oTrack.setdEdxAlt(tracksdEdxAlt[i]); } else { oTrack.setdEdxAlt(tracksdEdx[i]); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 4b616fce83f5f..1072e4b178bdf 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -39,6 +39,7 @@ #include "GPUTPCConvertImpl.h" #include "GPUTPCGMMergerTypes.h" #include "GPUParam.inc" +#include "GPUGetConstexpr.h" #ifdef GPUCA_CADEBUG_ENABLED #include "../utils/qconfig.h" @@ -216,12 +217,12 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ continue; } } else if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { - if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { bool dodEdx = param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); if (dodEdx) { dEdx.fillSubThreshold(lastRow - wayDirection); - if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { dEdxAlt.fillSubThreshold(lastRow - wayDirection); } } @@ -371,7 +372,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ CADEBUG(printf("Reinit linearization\n")); prop.SetTrack(this, prop.GetAlpha()); } - if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { if (param.dodEdxEnabled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters bool acc = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0, accAlt = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) == 0; if (acc || accAlt) { @@ -395,7 +396,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (acc) { dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); } - if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { if (accAlt) { dEdxAlt.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); } @@ -436,7 +437,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (param.par.dodEdx && param.dodEdxEnabled) { dEdx.computedEdx(merger->MergedTracksdEdx()[iTrk], param); - if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { dEdxAlt.computedEdx(merger->MergedTracksdEdxAlt()[iTrk], param); } } From 4d647840509e57b890f4ce71fdb062f2edf7b234 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 18:52:48 +0200 Subject: [PATCH 0027/1426] GPU: Simplify some type_traits use, get rid of ::values and ::type --- .../Base/cuda/GPUReconstructionCUDAInternals.h | 2 +- GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 4 ++-- GPU/GPUTracking/qa/GPUQAHelper.h | 8 ++++---- GPU/GPUTracking/utils/bitfield.h | 2 +- GPU/GPUTracking/utils/qconfig.cxx | 8 ++++---- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h index 0813c9d22ea09..493c09e448e5e 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h @@ -79,7 +79,7 @@ class GPUDebugTiming bool mDo; }; -static_assert(std::is_convertible::value, "CUDA event type incompatible to deviceEvent"); +static_assert(std::is_convertible_v, "CUDA event type incompatible to deviceEvent"); } // namespace o2::gpu diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index 949dd6195b262..ce05e159461e5 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -18,7 +18,7 @@ #include -static_assert(std::is_convertible::value, "OpenCL event type incompatible to deviceEvent"); +static_assert(std::is_convertible_v, "OpenCL event type incompatible to deviceEvent"); #define GPUErrorReturn(...) \ { \ diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index f1a0816529c3a..73ca449252d1d 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -235,7 +235,7 @@ int64_t GPUTPCGMMerger::GetTrackLabelA(const S& trk) const { GPUTPCGMSectorTrack* sectorTrack = nullptr; int32_t nClusters = 0; - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { sectorTrack = &mSectorTrackInfos[trk.TrackID()]; nClusters = sectorTrack->OrigTrack()->NHits(); } else { @@ -244,7 +244,7 @@ int64_t GPUTPCGMMerger::GetTrackLabelA(const S& trk) const auto acc = GPUTPCTrkLbl(resolveMCLabels(GetConstantMem()->ioPtrs.clustersNative ? GetConstantMem()->ioPtrs.clustersNative->clustersMCTruth : nullptr, GetConstantMem()->ioPtrs.mcLabelsTPC), 0.5f); for (int32_t i = 0; i < nClusters; i++) { int32_t id; - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sectorTrack->Sector()]; const GPUTPCHitId& ic = tracker.TrackHits()[sectorTrack->OrigTrack()->FirstHitID() + i]; id = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sectorTrack->Sector()][0]; diff --git a/GPU/GPUTracking/qa/GPUQAHelper.h b/GPU/GPUTracking/qa/GPUQAHelper.h index a7811c6fd55ed..a830562119467 100644 --- a/GPU/GPUTracking/qa/GPUQAHelper.h +++ b/GPU/GPUTracking/qa/GPUQAHelper.h @@ -48,7 +48,7 @@ class GPUTPCTrkLbl } inline void addLabel(uint32_t elementId) { - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { for (uint32_t i = 0; i < sizeof(mClusterLabels[elementId]) / sizeof(mClusterLabels[elementId].fClusterID[0]); i++) { const auto& element = mClusterLabels[elementId].fClusterID[i]; if (element.fMCID >= 0) { @@ -101,7 +101,7 @@ class GPUTPCTrkLbl } } auto& bestLabel = mLabels[bestLabelNum].first; - if constexpr (std::is_same::value && WEIGHT) { + if constexpr (std::is_same_v && WEIGHT) { *labelWeight = bestLabel.fWeight; *totalWeight = mTotalWeight; *maxCount = bestLabelCount; @@ -147,7 +147,7 @@ struct GPUTPCTrkLbl_ret { template class S, typename... Args> static inline auto GPUTPCTrkLbl(const S* x, Args... args) { - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { return internal::GPUTPCTrkLbl>(x, args...); } else { return internal::GPUTPCTrkLbl, U>(x, args...); @@ -159,7 +159,7 @@ static inline auto GPUTPCTrkLbl(const AliHLTTPCClusterMCLabel* x, Args... args) { using S = AliHLTTPCClusterMCLabel; using T = AliHLTTPCClusterMCWeight; - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { return internal::GPUTPCTrkLbl(x, args...); } else { return internal::GPUTPCTrkLbl(x, args...); diff --git a/GPU/GPUTracking/utils/bitfield.h b/GPU/GPUTracking/utils/bitfield.h index 9730f6c6c234f..a3a3ac9a5bd95 100644 --- a/GPU/GPUTracking/utils/bitfield.h +++ b/GPU/GPUTracking/utils/bitfield.h @@ -93,7 +93,7 @@ class bitfield } #if !defined(GPUCA_GPUCODE_DEVICE) - static_assert(std::is_integral::value, "Storage type non integral"); + static_assert(std::is_integral_v, "Storage type non integral"); static_assert(sizeof(S) >= sizeof(T), "Storage type has insufficient capacity"); #endif diff --git a/GPU/GPUTracking/utils/qconfig.cxx b/GPU/GPUTracking/utils/qconfig.cxx index cdb41ec5813f2..839954e52ded3 100644 --- a/GPU/GPUTracking/utils/qconfig.cxx +++ b/GPU/GPUTracking/utils/qconfig.cxx @@ -126,7 +126,7 @@ static inline int32_t qAddOptionMainTupleElem(qConfigSettings settings = settingsTup; return (qAddOptionType(settings, ref, i, argv, argc, def)); } -template ::value> +template > struct qAddOptionMainTupleStruct { static inline int32_t qAddOptionMainTuple(qConfigSettings::settingsType> settings, T& tup, int32_t& i, const char** argv, const int argc) { @@ -157,13 +157,13 @@ struct qConfigType { // Recursive handling of additional settings static inline void qProcessSetting(qConfigSettings& settings, qmin_t minval) { - static_assert(!std::is_same::value, "min option not supported for boolean settings"); + static_assert(!std::is_same_v, "min option not supported for boolean settings"); settings.checkMin = true; settings.min = minval.v; } static inline void qProcessSetting(qConfigSettings& settings, qmax_t maxval) { - static_assert(!std::is_same::value, "max option not supported for boolean settings"); + static_assert(!std::is_same_v, "max option not supported for boolean settings"); settings.checkMax = true; settings.max = maxval.v; } @@ -244,7 +244,7 @@ struct qConfigType { static inline void qConfigHelpOption(const char* name, const char* type, const char* def, const char* optname, char optnameshort, const char* preopt, char preoptshort, int32_t optionType, const char* help, Args&&... args) { auto settings = qConfigGetSettings(args...); - const bool boolType = optionType != 1 && std::is_same::value; + const bool boolType = optionType != 1 && std::is_same_v; const char* arguments = settings.doSet ? " (" : (settings.doDefault || optionType == 1 || boolType) ? " [arg] (" : optionType == 2 ? " [...] (" : " arg ("; char argBuffer[4] = {0}; uint32_t argBufferPos = 0; From 073cd1697027762311775ec251cea232c701db80 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 14 May 2025 20:43:33 +0200 Subject: [PATCH 0028/1426] GPU CMake: Use CUDA/HIP compilers to preprocess CUDA/HIP RTC files --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index c31dd0c8d3fe2..d9ee132d7c5f5 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -68,7 +68,7 @@ set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionCUDArtc) add_custom_command( OUTPUT ${GPU_RTC_BIN}.src COMMAND cp ${GPUDIR}/Base/cuda/GPUReconstructionCUDAIncludesSystem.h ${GPU_RTC_BIN}.src - COMMAND ${CMAKE_CXX_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_CUDA_STANDARD} -D__CUDA_ARCH__=${RTC_CUDA_ARCH} -D__CUDACC__ -x c++ -nostdinc -E -P ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src + COMMAND ${CMAKE_CUDA_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_CUDA_STANDARD} -D__CUDA_ARCH__=${RTC_CUDA_ARCH} -D__CUDACC__ -x c++ -E -Xcompiler "-nostdinc -P" ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src MAIN_DEPENDENCY ${GPU_RTC_SRC} IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} COMMAND_EXPAND_LISTS diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 6eded3499e46e..a47c659c8717d 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -115,7 +115,7 @@ set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionHIPrtc) add_custom_command( OUTPUT ${GPU_RTC_BIN}.src COMMAND cp ${GPUDIR}/Base/hip/GPUReconstructionHIPIncludesSystem.h ${GPU_RTC_BIN}.src - COMMAND ${CMAKE_CXX_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_HIP_STANDARD} -D__HIPCC__ -D__HIP_DEVICE_COMPILE__ -x c++ -nostdinc -E -P ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src + COMMAND ${CMAKE_HIP_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_HIP_STANDARD} -D__HIPCC__ -D__HIP_DEVICE_COMPILE__ -x c++ -nostdinc -E -P ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src MAIN_DEPENDENCY ${GPU_RTC_SRC} IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} DEPENDS ${MODULE}_HIPIFIED From 895906ee43086663b84232a35e935474a06d6cb7 Mon Sep 17 00:00:00 2001 From: Mario Sitta Date: Thu, 15 May 2025 15:41:14 +0200 Subject: [PATCH 0029/1426] Implementation of MFT rails inside Cage --- .../simulation/include/ITSSimulation/V3Cage.h | 21 +++ .../ITSMFT/ITS/simulation/src/V3Cage.cxx | 146 ++++++++++++++++++ 2 files changed, 167 insertions(+) diff --git a/Detectors/ITSMFT/ITS/simulation/include/ITSSimulation/V3Cage.h b/Detectors/ITSMFT/ITS/simulation/include/ITSSimulation/V3Cage.h index e90f0cfeb0aed..44d72284112c2 100644 --- a/Detectors/ITSMFT/ITS/simulation/include/ITSSimulation/V3Cage.h +++ b/Detectors/ITSMFT/ITS/simulation/include/ITSSimulation/V3Cage.h @@ -113,6 +113,16 @@ class V3Cage : public V11Geometry /// \param mgr The GeoManager (used only to get the proper material) TGeoVolume* createCageClosingCross(const TGeoManager* mgr = gGeoManager); + /// Creates and places the MFT rails inside the Cage + /// \param mother The mother volume to place the rails into + /// \param mgr The GeoManager (used only to get the proper material) + void createAndPlaceMFTRailsInsideCage(TGeoVolume* mother, const TGeoManager* mgr = gGeoManager); + + /// Creates a pair of MFT rails inside the Cage + /// \param motmed Medium material of the mother volume + /// \param mgr The GeoManager (used only to get the proper material) + TGeoVolume* createMFTRailsPair(const TGeoMedium* motmed, const TGeoManager* mgr = gGeoManager); + // Parameters static const Double_t sCageYInBarrel; ///< Global Y translation @@ -244,6 +254,17 @@ class V3Cage : public V11Geometry static const Double_t sCageCrossBarThick; ///< Closing cross bar thickness static const Double_t sCageCrossBarPhi; ///< Closing cross bar angle + // MFT Rails inside the Cage + static const Double_t sCageMFTRailZLen; ///< Total length of the rail + static const Double_t sCageMFTRailTotWidth; ///< Total width of the rail + static const Double_t sCageMFTRailExtWidth; ///< Width of the external part + static const Double_t sCageMFTRailIntWidth; ///< Width of the internal part + static const Double_t sCageMFTRailBaseWidth; ///< Width of the rail base + static const Double_t sCageMFTRailTotHeight; ///< Total height of the rail + static const Double_t sCageMFTRailExtHeight; ///< Height of the external part + static const Double_t sCageMFTRailIntHeight; ///< Height of the internal part + static const Double_t sCageMFTRailsXDist; ///< X distance between rails + ClassDefOverride(V3Cage, 0); // ITS v3 support geometry }; } // namespace its diff --git a/Detectors/ITSMFT/ITS/simulation/src/V3Cage.cxx b/Detectors/ITSMFT/ITS/simulation/src/V3Cage.cxx index 3b17d7afeef3d..528b74dc339f1 100644 --- a/Detectors/ITSMFT/ITS/simulation/src/V3Cage.cxx +++ b/Detectors/ITSMFT/ITS/simulation/src/V3Cage.cxx @@ -167,6 +167,16 @@ const Double_t V3Cage::sCageCrossZLength = 8 * sMm; const Double_t V3Cage::sCageCrossBarThick = 20 * sMm; const Double_t V3Cage::sCageCrossBarPhi = 25; // Deg +const Double_t V3Cage::sCageMFTRailZLen = 1807 * sMm; +const Double_t V3Cage::sCageMFTRailTotWidth = 27 * sMm; +const Double_t V3Cage::sCageMFTRailExtWidth = 24 * sMm; +const Double_t V3Cage::sCageMFTRailIntWidth = 17.5 * sMm; +const Double_t V3Cage::sCageMFTRailBaseWidth = 22 * sMm; +const Double_t V3Cage::sCageMFTRailTotHeight = 8.9 * sMm; +const Double_t V3Cage::sCageMFTRailExtHeight = 5.9 * sMm; +const Double_t V3Cage::sCageMFTRailIntHeight = 3.5 * sMm; +const Double_t V3Cage::sCageMFTRailsXDist = 44 * sMm; + ClassImp(V3Cage); V3Cage::V3Cage() @@ -251,6 +261,9 @@ void V3Cage::createAndPlaceCage(TGeoVolume* mother, const TGeoManager* mgr) zpos = sBPSuppZPos + sBPSuppCollarBeamWid / 2; mother->AddNode(cageBPSupport, 1, new TGeoTranslation(0, ypos, zpos)); + // The MFT Rails inside the Cage + createAndPlaceMFTRailsInsideCage(mother, mgr); + return; } @@ -1648,3 +1661,136 @@ TGeoVolume* V3Cage::createCageClosingCross(const TGeoManager* mgr) // Finally return the closing cross volume return closCrossVol; } + +void V3Cage::createAndPlaceMFTRailsInsideCage(TGeoVolume* mother, const TGeoManager* mgr) +{ + // + // Creates the MFT Rails located inside the Cage and place them + // + // Input: + // motmat : the material of the mother volume (for the container box) + // mgr : the GeoManager (used only to get the proper material) + // + // Output: + // + // Return: + // + // Created: 10 May 2025 Mario Sitta + // + + // Local variables + Double_t rdist, rpos, xpos, ypos, alpha; + Double_t xbox, ybox; + + // Create a pair of rails (a BBox container is returned) + TGeoVolume* cageMFTRails = createMFTRailsPair(mother->GetMedium(), mgr); + + // Now compute the radial distance and the XY position of the box + xbox = (static_cast(cageMFTRails->GetShape()))->GetDX(); + ybox = (static_cast(cageMFTRails->GetShape()))->GetDY(); + + rdist = TMath::Sqrt(sCageCoverRint * sCageCoverRint - xbox * xbox); + rpos = rdist - ybox; + + // Finally place the four pairs of rails inside the mother volume + xpos = rpos * TMath::Sin(sCageEndCapCableCutPhi * TMath::DegToRad()); + ypos = rpos * TMath::Cos(sCageEndCapCableCutPhi * TMath::DegToRad()) + sCageYInBarrel; + + alpha = -sCageEndCapCableCutPhi + 180; + mother->AddNode(cageMFTRails, 1, new TGeoCombiTrans(xpos, ypos, 0, new TGeoRotation("", alpha, 0, 0))); + alpha = sCageEndCapCableCutPhi + 180; + mother->AddNode(cageMFTRails, 2, new TGeoCombiTrans(-xpos, ypos, 0, new TGeoRotation("", alpha, 0, 0))); + + ypos = rpos * TMath::Cos(sCageEndCapCableCutPhi * TMath::DegToRad()) - sCageYInBarrel; + + alpha = sCageEndCapCableCutPhi; + mother->AddNode(cageMFTRails, 3, new TGeoCombiTrans(xpos, -ypos, 0, new TGeoRotation("", alpha, 0, 0))); + alpha = -sCageEndCapCableCutPhi; + mother->AddNode(cageMFTRails, 4, new TGeoCombiTrans(-xpos, -ypos, 0, new TGeoRotation("", alpha, 0, 0))); + + return; +} + +TGeoVolume* V3Cage::createMFTRailsPair(const TGeoMedium* motmed, const TGeoManager* mgr) +{ + // + // Creates a pair of MFT Rails located inside the Cage (from drawings + // ALI-MFT-DF-0057 and elements therein) + // A box containing a pair of rails is returned (a physical box + // is preferred over an Assembly for better performance) + // + // Input: + // motmat : the material of the mother volume (for the container box) + // mgr : the GeoManager (used only to get the proper material) + // + // Output: + // + // Return: + // A rail pair as a TGeoVolume + // + // Created: 10 May 2025 Mario Sitta + // + + // Local variables + const Int_t nv = 16; + Double_t xv[nv], yv[nv]; + Double_t deltah, xlen, ylen, zlen; + Double_t xpos, ypos; + + // The shape of a single rail: a Xtru + xv[0] = sCageMFTRailBaseWidth / 2; + yv[0] = 0.; + xv[1] = xv[0]; + yv[1] = sCageMFTRailTotHeight - sCageMFTRailExtHeight; + xv[2] = sCageMFTRailTotWidth / 2; + yv[2] = yv[1]; + xv[3] = xv[2]; + yv[3] = sCageMFTRailTotHeight; + xv[4] = sCageMFTRailIntWidth / 2; + yv[4] = yv[3]; + deltah = (sCageMFTRailExtHeight - sCageMFTRailIntHeight) / 2; + xv[5] = xv[4]; + yv[5] = yv[4] - deltah; + xv[6] = sCageMFTRailExtWidth / 2; + yv[6] = yv[5]; + xv[7] = xv[6]; + yv[7] = yv[6] - sCageMFTRailIntHeight; + + for (Int_t i = 8; i < nv; i++) { + xv[i] = -xv[15 - i]; + yv[i] = yv[15 - i]; + } + + zlen = sCageMFTRailZLen / 2; + + TGeoXtru* mftRailSh = new TGeoXtru(2); + mftRailSh->SetName("mftrailshape"); + mftRailSh->DefinePolygon(nv, xv, yv); + mftRailSh->DefineSection(0, -zlen); + mftRailSh->DefineSection(1, zlen); + + // The air container: a BBox + xlen = 2 * sCageMFTRailTotWidth + sCageMFTRailsXDist; + ylen = sCageMFTRailTotHeight / 2; + zlen = sCageMFTRailZLen / 2; + TGeoBBox* mftRailBoxSh = new TGeoBBox(xlen / 2, ylen, zlen); + + // We have the shape: now create the real volume + TGeoMedium* medAl = mgr->GetMedium(Form("%s_ALUMINUM$", GetDetName())); + + TGeoVolume* mftRailVol = new TGeoVolume("MFTRailInsideCage", mftRailSh, medAl); + mftRailVol->SetFillColor(kGray); + mftRailVol->SetLineColor(kGray); + + TGeoVolume* mftRailBoxVol = new TGeoVolume("MFTRailPairInsideCage", mftRailBoxSh, motmed); + + // Put the two rails inside the holding box + // (rail Y origin is on its lower face) + xpos = mftRailBoxSh->GetDX() - 0.5 * sCageMFTRailTotWidth; + ypos = mftRailBoxSh->GetDY(); + mftRailBoxVol->AddNode(mftRailVol, 1, new TGeoTranslation(xpos, -ypos, 0)); + mftRailBoxVol->AddNode(mftRailVol, 2, new TGeoTranslation(-xpos, -ypos, 0)); + + // Finally return the rails volume + return mftRailBoxVol; +} From c2cd436aaed5b1e0e21ba831f22b37b7184cd9b3 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 15 May 2025 14:32:10 +0200 Subject: [PATCH 0030/1426] GPU: Some work to prepare using and headers from system for GPU --- .../MathUtils/include/MathUtils/SMatrixGPU.h | 4 ++- .../include/CommonDataFormat/AbstractRef.h | 5 +++- Detectors/Raw/include/DetectorsRaw/RDHUtils.h | 6 +++-- GPU/GPUTracking/Base/GPUStdSystemHeaders.h | 25 +++++++++++++++++++ GPU/GPUTracking/Base/cuda/CMakeLists.txt | 4 ++- .../GPUReconstructionCUDAIncludesSystem.h | 7 +++--- GPU/GPUTracking/Base/hip/CMakeLists.txt | 3 ++- .../hip/GPUReconstructionHIPIncludesSystem.h | 4 +++ GPU/GPUTracking/Base/opencl/CMakeLists.txt | 8 +++++- .../Base/opencl/GPUReconstructionOCL.cl | 2 ++ GPU/GPUTracking/DataTypes/GPUDataTypes.h | 2 +- GPU/GPUTracking/Definitions/GPUGetConstexpr.h | 4 ++- GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 5 +++- GPU/GPUTracking/Standalone/cmake/config.cmake | 2 +- .../TRDTracking/GPUTRDTrackerKernels.cxx | 5 +++- GPU/GPUTracking/utils/bitfield.h | 5 +--- 16 files changed, 72 insertions(+), 19 deletions(-) create mode 100644 GPU/GPUTracking/Base/GPUStdSystemHeaders.h diff --git a/Common/MathUtils/include/MathUtils/SMatrixGPU.h b/Common/MathUtils/include/MathUtils/SMatrixGPU.h index 5ecdcd75a9906..675719cfc0751 100644 --- a/Common/MathUtils/include/MathUtils/SMatrixGPU.h +++ b/Common/MathUtils/include/MathUtils/SMatrixGPU.h @@ -29,7 +29,9 @@ #include "GPUCommonMath.h" #include "GPUCommonAlgorithm.h" #include "GPUCommonLogger.h" -#include "GPUCommonTypeTraits.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif namespace o2::math_utils::detail { diff --git a/DataFormats/common/include/CommonDataFormat/AbstractRef.h b/DataFormats/common/include/CommonDataFormat/AbstractRef.h index 403bab3cbd62f..72c195cfb7bc8 100644 --- a/DataFormats/common/include/CommonDataFormat/AbstractRef.h +++ b/DataFormats/common/include/CommonDataFormat/AbstractRef.h @@ -18,7 +18,10 @@ #include "GPUCommonDef.h" #include "GPUCommonRtypes.h" -#include "GPUCommonTypeTraits.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif + namespace o2::dataformats { diff --git a/Detectors/Raw/include/DetectorsRaw/RDHUtils.h b/Detectors/Raw/include/DetectorsRaw/RDHUtils.h index 2fac6f35d40c4..a5d8cc8615c79 100644 --- a/Detectors/Raw/include/DetectorsRaw/RDHUtils.h +++ b/Detectors/Raw/include/DetectorsRaw/RDHUtils.h @@ -19,13 +19,15 @@ #include "GPUCommonRtypes.h" #include "Headers/RAWDataHeader.h" #include "Headers/RDHAny.h" -#include "GPUCommonTypeTraits.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif #if !defined(GPUCA_GPUCODE) #include "CommonDataFormat/InteractionRecord.h" #endif #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) #include "Headers/DAQID.h" -#endif // GPUCA_GPUCODE / GPUCA_STANDALONE +#endif namespace o2 { diff --git a/GPU/GPUTracking/Base/GPUStdSystemHeaders.h b/GPU/GPUTracking/Base/GPUStdSystemHeaders.h new file mode 100644 index 0000000000000..6598085d309c7 --- /dev/null +++ b/GPU/GPUTracking/Base/GPUStdSystemHeaders.h @@ -0,0 +1,25 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUStdSystemHeaders.h +/// \author David Rohr + +#ifndef GPUSTDSYSTEMHEADERS_H +#define GPUSTDSYSTEMHEADERS_H + +#include +#include +#include +#include +#include +#include + +#endif diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index d9ee132d7c5f5..97972265b3007 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -67,7 +67,9 @@ set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionCUDArtc) # cmake-format: off add_custom_command( OUTPUT ${GPU_RTC_BIN}.src - COMMAND cp ${GPUDIR}/Base/cuda/GPUReconstructionCUDAIncludesSystem.h ${GPU_RTC_BIN}.src + COMMAND cp ${GPUDIR}/Base/GPUStdSystemHeaders.h ${GPU_RTC_BIN}.src + COMMAND cat ${GPUDIR}/Base/cuda/GPUReconstructionCUDAIncludesSystem.h | grep -v GPUStdSystemHeaders.h >> ${GPU_RTC_BIN}.src + COMMAND cat ${GPUDIR}/Base/GPUStdSystemHeaders.h >> ${GPU_RTC_BIN}.src COMMAND ${CMAKE_CUDA_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_CUDA_STANDARD} -D__CUDA_ARCH__=${RTC_CUDA_ARCH} -D__CUDACC__ -x c++ -E -Xcompiler "-nostdinc -P" ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src MAIN_DEPENDENCY ${GPU_RTC_SRC} IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h index 3f072059a9ad7..263d6939909c8 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h @@ -15,9 +15,10 @@ #ifndef O2_GPU_GPURECONSTRUCTIONCUDAINCLUDESSYSTEM_H #define O2_GPU_GPURECONSTRUCTIONCUDAINCLUDESSYSTEM_H -#include -#include -#include +#ifndef GPUCA_GPUCODE_GENRTC +#include "GPUStdSystemHeaders.h" +#endif + #include #include #include diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index a47c659c8717d..04a65922ad453 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -114,7 +114,8 @@ set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionHIPrtc) # cmake-format: off add_custom_command( OUTPUT ${GPU_RTC_BIN}.src - COMMAND cp ${GPUDIR}/Base/hip/GPUReconstructionHIPIncludesSystem.h ${GPU_RTC_BIN}.src + COMMAND cp ${GPUDIR}/Base/GPUStdSystemHeaders.h ${GPU_RTC_BIN}.src + COMMAND cat ${GPUDIR}/Base/hip/GPUReconstructionHIPIncludesSystem.h | grep -v GPUStdSystemHeaders.h >> ${GPU_RTC_BIN}.src COMMAND ${CMAKE_HIP_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_HIP_STANDARD} -D__HIPCC__ -D__HIP_DEVICE_COMPILE__ -x c++ -nostdinc -E -P ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src MAIN_DEPENDENCY ${GPU_RTC_SRC} IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} diff --git a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h index 1a3a1ff0108af..0228f993aaee3 100644 --- a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h +++ b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h @@ -15,6 +15,10 @@ #ifndef O2_GPU_RECONSTRUCTIONHIPINCLUDESSYSTEM_H #define O2_GPU_RECONSTRUCTIONHIPINCLUDESSYSTEM_H +#ifndef GPUCA_GPUCODE_GENRTC +#include "GPUStdSystemHeaders.h" +#endif + #include #include #include diff --git a/GPU/GPUTracking/Base/opencl/CMakeLists.txt b/GPU/GPUTracking/Base/opencl/CMakeLists.txt index 99ec36615a1d1..1a8a739adbecf 100644 --- a/GPU/GPUTracking/Base/opencl/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl/CMakeLists.txt @@ -23,7 +23,7 @@ endif() set(CL_SRC ${GPUDIR}/Base/opencl/GPUReconstructionOCL.cl) set(CL_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode) -set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -cl-std=CLC++2021 ${GPUCA_OCL_DENORMALS_FLAGS}) +set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -x cl -cl-std=CLC++2021 ${GPUCA_OCL_DENORMALS_FLAGS}) if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) set(OCL_FLAGS ${OCL_FLAGS} -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math) else() @@ -43,6 +43,11 @@ if (NOT DEFINED GPUCA_OCL_SPIRV_VERSION) set(GPUCA_OCL_SPIRV_VERSION 1.2) endif() +# execute_process(COMMAND bash -c "${LLVM_CLANG} -stdlib=libc++ -E -H -x c++ - <<< '#include ' 2>&1 1>/dev/null | grep type_traits | head -n 1 | sed 's/^\\.* *//'" +# OUTPUT_VARIABLE CLANG_STD_INCLUDE_DIR) +# get_filename_component(CLANG_STD_INCLUDE_DIR "${CLANG_STD_INCLUDE_DIR}" DIRECTORY) +# get_filename_component(CLANG_STD_INCLUDE_DIR "${CLANG_STD_INCLUDE_DIR}" ABSOLUTE) # TODO: For using in OpenCL, we would need to add -I${CLANG_STD_INCLUDE_DIR} + if(OPENCL_ENABLED_SPIRV) # BUILD OpenCL intermediate code for SPIR-V target # executes clang to create llvm IL code # Add -fintegrated-objemitter once we switch to clang >= 17 @@ -71,6 +76,7 @@ if(OPENCL_ENABLED) # BUILD OpenCL source code for runtime compilation target add_custom_command( OUTPUT ${CL_BIN}.src COMMAND ${LLVM_CLANG} + -target spir64 -Wno-unused-command-line-argument ${OCL_FLAGS} ${OCL_DEFINECL} diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl index ffdc34d6c9881..3f58c0fea75e9 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl @@ -70,6 +70,8 @@ typedef signed char int8_t; #endif #define assert(param) +#include "GPUCommonDef.h" +#include "GPUCommonTypeTraits.h" // TODO: Once possible in OpenCL, should use GPUStdSystemHeaders.h here #include "GPUConstantMem.h" #include "GPUReconstructionIncludesDeviceAll.h" diff --git a/GPU/GPUTracking/DataTypes/GPUDataTypes.h b/GPU/GPUTracking/DataTypes/GPUDataTypes.h index 6cc1e7266e722..3e9623e23559b 100644 --- a/GPU/GPUTracking/DataTypes/GPUDataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUDataTypes.h @@ -20,9 +20,9 @@ // These are basic and non-complex data types, which will also be visible on the GPU. // Please add complex data types required on the host but not GPU to GPUHostDataTypes.h and forward-declare! #ifndef GPUCA_GPUCODE_DEVICE +#include // for bitfield below #include #endif -#include "GPUCommonTypeTraits.h" #include "GPUTRDDef.h" struct AliHLTTPCClusterMCLabel; diff --git a/GPU/GPUTracking/Definitions/GPUGetConstexpr.h b/GPU/GPUTracking/Definitions/GPUGetConstexpr.h index 8001b4e98c83f..b70890738e1b7 100644 --- a/GPU/GPUTracking/Definitions/GPUGetConstexpr.h +++ b/GPU/GPUTracking/Definitions/GPUGetConstexpr.h @@ -16,7 +16,9 @@ #define GPUGETCONSTEXPR_H #include "GPUCommonDef.h" -#include "GPUCommonTypeTraits.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif // This is a temporary workaround required for clang (with c++20), until we can go to C++23 with P2280R4, which allows getting constexpr static values from references diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index 502a70cb57762..a1993ec4a0ffa 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -28,7 +28,10 @@ #include "GPUCommonArray.h" #include "GPUParam.h" #include "GPUTrackParamConvert.h" -#include "GPUCommonTypeTraits.h" + +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif using namespace o2::gpu; using namespace o2::track; diff --git a/GPU/GPUTracking/Standalone/cmake/config.cmake b/GPU/GPUTracking/Standalone/cmake/config.cmake index 1de0cfa27d7ee..0c7544aff7035 100644 --- a/GPU/GPUTracking/Standalone/cmake/config.cmake +++ b/GPU/GPUTracking/Standalone/cmake/config.cmake @@ -30,7 +30,7 @@ set(GPUCA_BUILD_DEBUG 0) set(GPUCA_BUILD_DEBUG_SANITIZE 0) set(GPUCA_DETERMINISTIC_MODE 0) # OFF / NO_FAST_MATH / OPTO2 / GPU / WHOLEO2 #set(GPUCA_CUDA_GCCBIN c++-14) -#set(GPUCA_OPENCL_CLANGBIN clang-19) +#set(GPUCA_OPENCL_CLANGBIN clang-20) set(HIP_AMDGPUTARGET "default") # "gfx906;gfx908;gfx90a" set(CUDA_COMPUTETARGET "default") # 86 89 #set(GPUCA_CUDA_COMPILE_MODE perkernel) # onefile / perkernel / rtc diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx index d18f04e554043..dea4cdbca430e 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx @@ -15,10 +15,13 @@ #include "GPUTRDTrackerKernels.h" #include "GPUTRDGeometry.h" #include "GPUConstantMem.h" -#include "GPUCommonTypeTraits.h" #include "GPUReconstructionThreading.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif + using namespace o2::gpu; template diff --git a/GPU/GPUTracking/utils/bitfield.h b/GPU/GPUTracking/utils/bitfield.h index a3a3ac9a5bd95..c5df80f1d6277 100644 --- a/GPU/GPUTracking/utils/bitfield.h +++ b/GPU/GPUTracking/utils/bitfield.h @@ -15,10 +15,6 @@ #ifndef Q_BITFIELD_H #define Q_BITFIELD_H -#if !defined(GPUCA_GPUCODE_DEVICE) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) -#include -#endif - template class bitfield { @@ -93,6 +89,7 @@ class bitfield } #if !defined(GPUCA_GPUCODE_DEVICE) + static_assert(std::is_void_v, "type_traits header missing"); static_assert(std::is_integral_v, "Storage type non integral"); static_assert(sizeof(S) >= sizeof(T), "Storage type has insufficient capacity"); #endif From a9e52c1e0175a24e673c74ec9bf2d8bf03017c0b Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 15 May 2025 14:34:58 +0200 Subject: [PATCH 0031/1426] GPU: Rename some misleading flag names --- GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMPropagator.h | 8 ++++---- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 6 +++--- GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h b/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h index 3c86dbfcd8d18..4ddd70efb5d1c 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h @@ -35,7 +35,7 @@ struct GPUTPCGMMergedTrackHit { flagRejectDistance = 0x20, flagRejectErr = 0x40, flagReject = 0x60, - flagNotFit = 0x80 }; + flagHighIncl = 0x80 }; }; struct GPUTPCGMMergedTrackHitXYZ { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 73ca449252d1d..1d5a7a0b1df47 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1895,7 +1895,7 @@ GPUd() void GPUTPCGMMerger::Finalize1(int32_t nBlocks, int32_t nThreads, int32_t uint8_t clusterState = mClusters[trk.FirstClusterRef() + j].state; if (!(clusterState & GPUTPCGMMergedTrackHit::flagReject)) { weight |= attachGood; - } else if (clusterState & GPUTPCGMMergedTrackHit::flagNotFit) { + } else if (clusterState & GPUTPCGMMergedTrackHit::flagHighIncl) { weight |= attachHighIncl; } if (mClusters[trk.FirstClusterRef() + j].leg == goodLeg) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 7bb28a9f22e31..74a8df388d163 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -32,7 +32,7 @@ using namespace o2::gpu; using namespace o2::tpc; using namespace o2::tpc::constants; -GPUdi() static constexpr uint8_t getFlagsReject() { return GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagNotFit; } +GPUdi() static constexpr uint8_t getFlagsReject() { return GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagHighIncl; } GPUdi() static uint32_t getFlagsRequired(const GPUSettingsRec& rec) { return rec.tpc.dropSecondaryLegsInOutput ? gputpcgmmergertypes::attachGoodLeg : gputpcgmmergertypes::attachZero; } namespace o2::gpu::internal diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h index db7a3b5884a12..97b307ce7a550 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h @@ -53,9 +53,9 @@ class GPUTPCGMPropagator updateErrorFitFailed = -1, updateErrorClusterRejected = 2, updateErrorClusterRejectedDistance = 2, - updateErrorEdgeCluster = 3, - updateErrorClusterRejectedInInterpolation = 4, - updateErrorClusterRejectedInUpdate = 5 + updateErrorClusterRejectedInInterpolation = 3, + updateErrorClusterRejectedInUpdate = 4, + updateErrorClusterRejectedEdge = 5 }; enum RejectChi2Mode { rejectDirect = 1, @@ -188,7 +188,7 @@ class GPUTPCGMPropagator GPUTPCGMPhysicalTrackModel mT0; MaterialCorrection mMaterial; FieldRegion mFieldRegion = TPC; - bool mSeedingErrors = 0; + bool mSeedingErrors = 0; // TODO: Hide variable in Run3 mode bool mFitInProjections = 1; // fit (Y,SinPhi,QPt) and (Z,DzDs) paramteres separatelly bool mPropagateBzOnly = 0; // Use Bz only in propagation bool mToyMCEvents = 0; // events are simulated with simple home-made simulation diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 1072e4b178bdf..aed42e4f98f0c 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -299,7 +299,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (mC[0] > param.rec.tpc.trackFitCovLimit || mC[2] > param.rec.tpc.trackFitCovLimit) { break; } - MarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagNotFit); + MarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagHighIncl); nMissed2++; NTolerated++; CADEBUG(printf(" --- break (%d, %d)\n", err, err2)); @@ -334,7 +334,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ #endif GPUCA_DEBUG_STREAMER_CHECK(GPUTPCGMPropagator::DebugStreamerVals debugVals;); if (param.rec.tpc.rejectEdgeClustersInTrackFit && uncorrectedY > -1e6f && param.rejectEdgeClusterByY(uncorrectedY, cluster.row, CAMath::Sqrt(mC[0]))) { // uncorrectedY > -1e6f implies allowModification - retVal = GPUTPCGMPropagator::updateErrorEdgeCluster; + retVal = GPUTPCGMPropagator::updateErrorClusterRejectedEdge; } else { const float time = merger->GetConstantMem()->ioPtrs.clustersNative ? merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].getTime() : -1.f; const float invSqrtCharge = merger->GetConstantMem()->ioPtrs.clustersNative ? CAMath::InvSqrt(merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].qMax) : 0.f; @@ -363,7 +363,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ lastUpdateX = mX; covYYUpd = mC[0]; nMissed = nMissed2 = 0; - UnmarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagNotFit); + UnmarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagHighIncl); N++; ihitStart = ihit; float dy = mP[0] - prop.Model().Y(); diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index a1993ec4a0ffa..b74c1bb6a4534 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -268,7 +268,7 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov if constexpr (std::is_same_v) { const auto& hit = mPtrackHits[trkX.FirstClusterRef() + i]; cl = &mPclusterNative->clustersLinear[hit.num]; - if (hit.state & (GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagNotFit)) { + if (hit.state & (GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagHighIncl)) { cl = nullptr; if (i + direction != stop) { i += direction; From 132943deb4940ec93748ebbc419ccaa90ca29247 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 15 May 2025 14:57:54 +0200 Subject: [PATCH 0032/1426] GPU: Use instead of GPUCommonArray for CUDA / ROCm, not yet for OpenCL... --- .../DCAFitter/include/DCAFitter/DCAFitterN.h | 68 +++++++++---------- .../MathUtils/include/MathUtils/SMatrixGPU.h | 8 +-- .../include/MathUtils/detail/basicMath.h | 11 +-- .../include/MathUtils/detail/trigonometric.h | 13 ++-- .../ITS/include/DataFormatsITS/TrackITS.h | 8 +-- .../include/ReconstructionDataFormats/DCA.h | 4 +- .../TrackParametrization.h | 10 +-- .../TrackParametrizationWithError.h | 8 +-- .../ReconstructionDataFormats/TrackUtils.h | 6 +- .../ReconstructionDataFormats/Vertex.h | 12 ++-- .../src/TrackParametrization.cxx | 6 +- .../src/TrackParametrizationWithError.cxx | 10 +-- Detectors/AOD/src/AODProducerWorkflowSpec.cxx | 2 +- Detectors/Align/src/AlignableDetectorTPC.cxx | 4 +- .../Base/include/DetectorsBase/Propagator.h | 6 +- Detectors/Base/src/Propagator.cxx | 8 +-- .../postprocessing/studies/src/Efficiency.cxx | 12 ++-- .../studies/src/ImpactParameter.cxx | 2 +- .../GPU/ITStrackingGPU/VertexingKernels.h | 4 +- .../ITS/tracking/GPU/cuda/VertexingKernels.cu | 4 +- .../tracking/include/ITStracking/Cluster.h | 15 ++-- .../tracking/include/ITStracking/Constants.h | 28 ++++---- Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx | 8 ++- Detectors/TPC/qc/src/Tracks.cxx | 6 +- .../TPC/workflow/src/MIPTrackFilterSpec.cxx | 2 +- Detectors/TPC/workflow/src/TPCRefitter.cxx | 2 +- .../TPC/workflow/src/TPCTimeSeriesSpec.cxx | 10 +-- GPU/Common/GPUCommonArray.h | 12 ++-- GPU/Common/GPUCommonTypeTraits.h | 3 +- GPU/GPUTracking/Base/GPUStdSystemHeaders.h | 1 + .../Base/opencl/GPUReconstructionOCL.cl | 1 + GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 4 +- .../TRDTracking/GPUTRDInterfaces.h | 8 +-- 33 files changed, 156 insertions(+), 150 deletions(-) diff --git a/Common/DCAFitter/include/DCAFitter/DCAFitterN.h b/Common/DCAFitter/include/DCAFitter/DCAFitterN.h index 569b3ea49e515..aac451f1f8978 100644 --- a/Common/DCAFitter/include/DCAFitter/DCAFitterN.h +++ b/Common/DCAFitter/include/DCAFitter/DCAFitterN.h @@ -110,11 +110,11 @@ class DCAFitterN using MatSymND = o2::math_utils::SMatrix>; using MatStdND = o2::math_utils::SMatrix>; using TrackCoefVtx = MatStd3D; - using ArrTrack = o2::gpu::gpustd::array; // container for prongs (tracks) at single vertex cand. - using ArrTrackCovI = o2::gpu::gpustd::array; // container for inv.cov.matrices at single vertex cand. - using ArrTrCoef = o2::gpu::gpustd::array; // container of TrackCoefVtx coefficients at single vertex cand. - using ArrTrDer = o2::gpu::gpustd::array; // container of Track 1st and 2nd derivative over their X param - using ArrTrPos = o2::gpu::gpustd::array; // container of Track positions + using ArrTrack = std::array; // container for prongs (tracks) at single vertex cand. + using ArrTrackCovI = std::array; // container for inv.cov.matrices at single vertex cand. + using ArrTrCoef = std::array; // container of TrackCoefVtx coefficients at single vertex cand. + using ArrTrDer = std::array; // container of Track 1st and 2nd derivative over their X param + using ArrTrPos = std::array; // container of Track positions public: enum BadCovPolicy : uint8_t { // if encountering non-positive defined cov. matrix, the choice is: @@ -158,7 +158,7 @@ class DCAFitterN GPUd() const auto getPCACandidatePos(int cand = 0) const { const auto& vd = mPCA[mOrder[cand]]; - return o2::gpu::gpustd::array{static_cast(vd[0]), static_cast(vd[1]), static_cast(vd[2])}; + return std::array{static_cast(vd[0]), static_cast(vd[1]), static_cast(vd[2])}; } ///< return position of quality-ordered candidate in the internal structures @@ -213,7 +213,7 @@ class DCAFitterN GPUd() MatSym3D calcPCACovMatrix(int cand = 0) const; - o2::gpu::gpustd::array calcPCACovMatrixFlat(int cand = 0) const + std::array calcPCACovMatrixFlat(int cand = 0) const { auto m = calcPCACovMatrix(cand); return {static_cast(m(0, 0)), static_cast(m(1, 0)), static_cast(m(1, 1)), static_cast(m(2, 0)), static_cast(m(2, 1)), static_cast(m(2, 2))}; @@ -364,39 +364,39 @@ class DCAFitterN private: // vectors of 1st derivatives of track local residuals over X parameters - o2::gpu::gpustd::array, N> mDResidDx; + std::array, N> mDResidDx; // vectors of 1nd derivatives of track local residuals over X parameters // (cross-derivatives DR/(dx_j*dx_k) = 0 for j!=k, therefore the hessian is diagonal) - o2::gpu::gpustd::array, N> mD2ResidDx2; + std::array, N> mD2ResidDx2; VecND mDChi2Dx; // 1st derivatives of chi2 over tracks X params MatSymND mD2Chi2Dx2; // 2nd derivatives of chi2 over tracks X params (symmetric matrix) MatSymND mCosDif; // matrix with cos(alp_j-alp_i) for j mOrigTrPtr; - o2::gpu::gpustd::array mTrAux; // Aux track info for each track at each cand. vertex - CrossInfo mCrossings; // info on track crossing - - o2::gpu::gpustd::array mTrcEInv; // errors for each track at each cand. vertex - o2::gpu::gpustd::array mCandTr; // tracks at each cond. vertex (Note: Errors are at seed XY point) - o2::gpu::gpustd::array mTrCFVT; // TrackCoefVtx for each track at each cand. vertex - o2::gpu::gpustd::array mTrDer; // Track derivativse - o2::gpu::gpustd::array mTrPos; // Track positions - o2::gpu::gpustd::array mTrRes; // Track residuals - o2::gpu::gpustd::array mPCA; // PCA for each vertex candidate - o2::gpu::gpustd::array mChi2 = {0}; // Chi2 at PCA candidate - o2::gpu::gpustd::array mNIters; // number of iterations for each seed - o2::gpu::gpustd::array mTrPropDone{}; // Flag that the tracks are fully propagated to PCA - o2::gpu::gpustd::array mPropFailed{}; // Flag that some propagation failed for this PCA candidate + std::array mOrigTrPtr; + std::array mTrAux; // Aux track info for each track at each cand. vertex + CrossInfo mCrossings; // info on track crossing + + std::array mTrcEInv; // errors for each track at each cand. vertex + std::array mCandTr; // tracks at each cond. vertex (Note: Errors are at seed XY point) + std::array mTrCFVT; // TrackCoefVtx for each track at each cand. vertex + std::array mTrDer; // Track derivativse + std::array mTrPos; // Track positions + std::array mTrRes; // Track residuals + std::array mPCA; // PCA for each vertex candidate + std::array mChi2 = {0}; // Chi2 at PCA candidate + std::array mNIters; // number of iterations for each seed + std::array mTrPropDone{}; // Flag that the tracks are fully propagated to PCA + std::array mPropFailed{}; // Flag that some propagation failed for this PCA candidate LogLogThrottler mLoggerBadCov{}; LogLogThrottler mLoggerBadInv{}; LogLogThrottler mLoggerBadProp{}; MatSym3D mWeightInv; // inverse weight of single track, [sum{M^T E M}]^-1 in EQ.T - o2::gpu::gpustd::array mOrder{0}; + std::array mOrder{0}; int mCurHyp = 0; int mCrossIDCur = 0; int mCrossIDAlt = -1; BadCovPolicy mBadCovPolicy{BadCovPolicy::Discard}; // what to do in case of non-pos-def. cov. matrix, see BadCovPolicy enum - o2::gpu::gpustd::array mFitStatus{}; // fit status of each hypothesis fit + std::array mFitStatus{}; // fit status of each hypothesis fit bool mAllowAltPreference = true; // if the fit converges to alternative PCA seed, abandon the current one bool mUseAbsDCA = false; // use abs. distance minimization rather than chi2 bool mWeightedFinalPCA = false; // recalculate PCA as a cov-matrix weighted mean, even if absDCA method was used @@ -657,7 +657,7 @@ template GPUd() void DCAFitterN::calcChi2Derivatives() { //< calculate 1st and 2nd derivatives of wighted DCA (chi2) over track parameters X, see EQ.Chi2 in the ref - o2::gpu::gpustd::array, N> covIDrDx; // tempory vectors of covI_j * dres_j/dx_i + std::array, N> covIDrDx; // tempory vectors of covI_j * dres_j/dx_i // chi2 1st derivative for (int i = N; i--;) { @@ -1175,13 +1175,13 @@ GPUd() o2::track::TrackParCov DCAFitterN::createParentTrackParCov(in { const auto& trP = getTrack(0, cand); const auto& trN = getTrack(1, cand); - o2::gpu::gpustd::array covV = {0.}; - o2::gpu::gpustd::array pvecV = {0.}; + std::array covV = {0.}; + std::array pvecV = {0.}; int q = 0; for (int it = 0; it < N; it++) { const auto& trc = getTrack(it, cand); - o2::gpu::gpustd::array pvecT = {0.}; - o2::gpu::gpustd::array covT = {0.}; + std::array pvecT = {0.}; + std::array covT = {0.}; trc.getPxPyPzGlo(pvecT); trc.getCovXYZPxPyPzGlo(covT); constexpr int MomInd[6] = {9, 13, 14, 18, 19, 20}; // cov matrix elements for momentum component @@ -1210,18 +1210,18 @@ GPUd() o2::track::TrackPar DCAFitterN::createParentTrackPar(int cand const auto& trP = getTrack(0, cand); const auto& trN = getTrack(1, cand); const auto& wvtx = getPCACandidate(cand); - o2::gpu::gpustd::array pvecV = {0.}; + std::array pvecV = {0.}; int q = 0; for (int it = 0; it < N; it++) { const auto& trc = getTrack(it, cand); - o2::gpu::gpustd::array pvecT = {0.}; + std::array pvecT = {0.}; trc.getPxPyPzGlo(pvecT); for (int i = 0; i < 3; i++) { pvecV[i] += pvecT[i]; } q += trc.getCharge(); } - const o2::gpu::gpustd::array vertex = {(float)wvtx[0], (float)wvtx[1], (float)wvtx[2]}; + const std::array vertex = {(float)wvtx[0], (float)wvtx[1], (float)wvtx[2]}; return o2::track::TrackPar(vertex, pvecV, q, sectorAlpha); } diff --git a/Common/MathUtils/include/MathUtils/SMatrixGPU.h b/Common/MathUtils/include/MathUtils/SMatrixGPU.h index 675719cfc0751..8158a93666a92 100644 --- a/Common/MathUtils/include/MathUtils/SMatrixGPU.h +++ b/Common/MathUtils/include/MathUtils/SMatrixGPU.h @@ -25,12 +25,12 @@ #define ALICEO2_SMATRIX_GPU_H #include "GPUCommonDef.h" -#include "GPUCommonArray.h" #include "GPUCommonMath.h" #include "GPUCommonAlgorithm.h" #include "GPUCommonLogger.h" #ifndef GPUCA_GPUCODE_DEVICE #include +#include #endif namespace o2::math_utils::detail @@ -283,14 +283,14 @@ struct make_indices : make_indices_impl<0, indices<>, N> { }; template -constexpr auto do_make(F f, indices) -> gpu::gpustd::array +constexpr auto do_make(F f, indices) -> std::array { - gpu::gpustd::array retarr = {f(I0 + I)...}; + std::array retarr = {f(I0 + I)...}; return retarr; } template -constexpr auto make(F f) -> gpu::gpustd::array +constexpr auto make(F f) -> std::array { return do_make(f, typename make_indices::type()); } diff --git a/Common/MathUtils/include/MathUtils/detail/basicMath.h b/Common/MathUtils/include/MathUtils/detail/basicMath.h index 3565764435a68..1abe6ee878c39 100644 --- a/Common/MathUtils/include/MathUtils/detail/basicMath.h +++ b/Common/MathUtils/include/MathUtils/detail/basicMath.h @@ -16,14 +16,15 @@ #ifndef MATHUTILS_INCLUDE_MATHUTILS_DETAIL_BASICMATH_H_ #define MATHUTILS_INCLUDE_MATHUTILS_DETAIL_BASICMATH_H_ +#include "GPUCommonDef.h" +#include "GPUCommonMath.h" +#include "CommonConstants/MathConstants.h" + #ifndef GPUCA_GPUCODE_DEVICE #include #include +#include #endif -#include "GPUCommonArray.h" -#include "GPUCommonDef.h" -#include "GPUCommonMath.h" -#include "CommonConstants/MathConstants.h" namespace o2 { @@ -130,4 +131,4 @@ GPUdi() double log(double x) } // namespace math_utils } // namespace o2 -#endif /* MATHUTILS_INCLUDE_MATHUTILS_DETAIL_BASICMATH_H_ */ \ No newline at end of file +#endif /* MATHUTILS_INCLUDE_MATHUTILS_DETAIL_BASICMATH_H_ */ diff --git a/Common/MathUtils/include/MathUtils/detail/trigonometric.h b/Common/MathUtils/include/MathUtils/detail/trigonometric.h index 462affdceb17f..457210202ca54 100644 --- a/Common/MathUtils/include/MathUtils/detail/trigonometric.h +++ b/Common/MathUtils/include/MathUtils/detail/trigonometric.h @@ -16,16 +16,17 @@ #ifndef MATHUTILS_INCLUDE_MATHUTILS_DETAIL_TRIGONOMETRIC_H_ #define MATHUTILS_INCLUDE_MATHUTILS_DETAIL_TRIGONOMETRIC_H_ -#ifndef GPUCA_GPUCODE_DEVICE -#include -#include -#endif -#include "GPUCommonArray.h" #include "GPUCommonDef.h" #include "GPUCommonMath.h" #include "CommonConstants/MathConstants.h" #include "MathUtils/detail/basicMath.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#include +#include +#endif + namespace o2 { namespace math_utils @@ -156,7 +157,7 @@ GPUhdi() std::tuple rotateZInv(T xG, T yG, T snAlp, T csAlp) #endif template -GPUhdi() void rotateZ(gpu::gpustd::array& xy, T alpha) +GPUhdi() void rotateZ(std::array& xy, T alpha) { // transforms vector in tracking frame alpha to global frame T sin, cos; diff --git a/DataFormats/Detectors/ITSMFT/ITS/include/DataFormatsITS/TrackITS.h b/DataFormats/Detectors/ITSMFT/ITS/include/DataFormatsITS/TrackITS.h index e9931b89ecd4a..06d4fba51bd54 100644 --- a/DataFormats/Detectors/ITSMFT/ITS/include/DataFormatsITS/TrackITS.h +++ b/DataFormats/Detectors/ITSMFT/ITS/include/DataFormatsITS/TrackITS.h @@ -170,14 +170,14 @@ class TrackITSExt : public TrackITS using TrackITS::TrackITS; // inherit base constructors GPUh() TrackITSExt(o2::track::TrackParCov&& parCov, short ncl, float chi2, - o2::track::TrackParCov&& outer, o2::gpu::gpustd::array cls) + o2::track::TrackParCov&& outer, std::array cls) : TrackITS(parCov, chi2, outer), mIndex{cls} { setNumberOfClusters(ncl); } GPUh() TrackITSExt(o2::track::TrackParCov& parCov, short ncl, float chi2, std::uint32_t rof, - o2::track::TrackParCov& outer, o2::gpu::gpustd::array cls) + o2::track::TrackParCov& outer, std::array cls) : TrackITS(parCov, chi2, outer), mIndex{cls} { setNumberOfClusters(ncl); @@ -205,13 +205,13 @@ class TrackITSExt : public TrackITS mIndex[layer] = idx; } - GPUh() o2::gpu::gpustd::array& getClusterIndexes() + GPUh() std::array& getClusterIndexes() { return mIndex; } private: - o2::gpu::gpustd::array mIndex = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; ///< Indices of associated clusters + std::array mIndex = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; ///< Indices of associated clusters ClassDefNV(TrackITSExt, 2); }; } // namespace its diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/DCA.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/DCA.h index 922470f8992f5..6eb41b798e101 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/DCA.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/DCA.h @@ -14,10 +14,10 @@ #include "GPUCommonDef.h" #include "GPUCommonRtypes.h" -#include "GPUCommonArray.h" #ifndef GPUCA_GPUCODE_DEVICE #include +#include #endif /// \author ruben.shahoyan@cern.ch @@ -67,7 +67,7 @@ class DCA private: float mY = 0.f; float mZ = 0.f; - gpu::gpustd::array mCov; ///< s2y, syz, s2z + std::array mCov; ///< s2y, syz, s2z ClassDefNV(DCA, 1); }; diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h index bfd56eb8f024f..f240e34861eeb 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h @@ -29,7 +29,6 @@ #include "GPUCommonDef.h" #include "GPUCommonRtypes.h" #include "GPUCommonMath.h" -#include "GPUCommonArray.h" #include "GPUROOTCartesianFwd.h" #ifndef GPUCA_GPUCODE_DEVICE @@ -39,6 +38,7 @@ #include #include #include +#include #endif #ifndef GPUCA_ALIGPUCODE // Used only by functions that are hidden on the GPU @@ -128,9 +128,9 @@ class TrackParametrization public: using value_t = value_T; - using dim2_t = gpu::gpustd::array; - using dim3_t = gpu::gpustd::array; - using params_t = gpu::gpustd::array; + using dim2_t = std::array; + using dim3_t = std::array; + using params_t = std::array; struct yzerr_t { // 2 measurement with error dim2_t yz; @@ -209,7 +209,7 @@ class TrackParametrization GPUd() math_utils::Point3D getXYZGlo() const; GPUd() void getXYZGlo(dim3_t& xyz) const; GPUd() bool getPxPyPzGlo(dim3_t& pxyz) const; - GPUd() bool getPosDirGlo(gpu::gpustd::array& posdirp) const; + GPUd() bool getPosDirGlo(std::array& posdirp) const; // methods for track params estimate at other point GPUd() bool getYZAt(value_t xk, value_t b, value_t& y, value_t& z) const; diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrizationWithError.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrizationWithError.h index cc783298e14cd..cd9d1517a81b1 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrizationWithError.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrizationWithError.h @@ -38,14 +38,14 @@ class TrackParametrizationWithError : public TrackParametrization static_assert(std::is_floating_point_v); #endif - using covMat_t = gpu::gpustd::array; + using covMat_t = std::array; using MatrixDSym5 = o2::math_utils::SMatrix>; using MatrixD5 = o2::math_utils::SMatrix>; GPUhd() TrackParametrizationWithError(); GPUd() TrackParametrizationWithError(value_t x, value_t alpha, const params_t& par, const covMat_t& cov, int charge = 1, const PID pid = PID::Pion); GPUd() TrackParametrizationWithError(const dim3_t& xyz, const dim3_t& pxpypz, - const gpu::gpustd::array& cv, int sign, bool sectorAlpha = true, const PID pid = PID::Pion); + const std::array& cv, int sign, bool sectorAlpha = true, const PID pid = PID::Pion); GPUhdDefault() TrackParametrizationWithError(const TrackParametrizationWithError& src) = default; GPUdDefault() TrackParametrizationWithError(TrackParametrizationWithError&& src) = default; @@ -57,7 +57,7 @@ class TrackParametrizationWithError : public TrackParametrization using TrackParametrization::set; GPUd() void set(value_t x, value_t alpha, const params_t& par, const covMat_t& cov, int charge = 1, const PID pid = PID::Pion); GPUd() void set(value_t x, value_t alpha, const value_t* par, const value_t* cov, int charge = 1, const PID pid = PID::Pion); - GPUd() void set(const dim3_t& xyz, const dim3_t& pxpypz, const gpu::gpustd::array& cv, int sign, bool sectorAlpha = true, const PID pid = PID::Pion); + GPUd() void set(const dim3_t& xyz, const dim3_t& pxpypz, const std::array& cv, int sign, bool sectorAlpha = true, const PID pid = PID::Pion); GPUd() const covMat_t& getCov() const; GPUd() value_t getSigmaY2() const; GPUd() value_t getSigmaZY() const; @@ -77,7 +77,7 @@ class TrackParametrizationWithError : public TrackParametrization GPUd() value_t getCovarElem(int i, int j) const; GPUd() value_t getDiagError2(int i) const; - GPUd() bool getCovXYZPxPyPzGlo(gpu::gpustd::array& c) const; + GPUd() bool getCovXYZPxPyPzGlo(std::array& c) const; GPUd() void print() const; GPUd() void printHexadecimal(); diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackUtils.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackUtils.h index 0ee0ca4461ab0..8a79130d64eda 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackUtils.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackUtils.h @@ -18,9 +18,9 @@ #define INCLUDE_RECONSTRUCTIONDATAFORMATS_TRACKUTILS_H_ #include "GPUCommonRtypes.h" -#include "GPUCommonArray.h" #ifndef GPUCA_GPUCODE_DEVICE +#include #include #endif @@ -39,11 +39,11 @@ template GPUd() value_T BetheBlochSolidOpt(value_T bg); template -GPUd() void g3helx3(value_T qfield, value_T step, gpu::gpustd::array& vect); +GPUd() void g3helx3(value_T qfield, value_T step, std::array& vect); //____________________________________________________ template -GPUd() void g3helx3(value_T qfield, value_T step, gpu::gpustd::array& vect) +GPUd() void g3helx3(value_T qfield, value_T step, std::array& vect) { /****************************************************************** * * diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/Vertex.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/Vertex.h index d14bc7ac786c8..fc89f162a0727 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/Vertex.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/Vertex.h @@ -14,7 +14,6 @@ #include "GPUCommonDef.h" #include "GPUCommonMath.h" -#include "GPUCommonArray.h" #include #include "CommonDataFormat/TimeStamp.h" @@ -22,6 +21,7 @@ #include #include #include +#include #endif namespace o2 @@ -42,7 +42,7 @@ class VertexBase static constexpr int kNCov = 6; GPUhdDefault() VertexBase() = default; GPUhdDefault() ~VertexBase() = default; - GPUhd() VertexBase(const math_utils::Point3D& pos, const gpu::gpustd::array& cov) : mPos(pos), mCov(cov) + GPUhd() VertexBase(const math_utils::Point3D& pos, const std::array& cov) : mPos(pos), mCov(cov) { } @@ -65,7 +65,7 @@ class VertexBase GPUd() float getSigmaY() const { return gpu::CAMath::Sqrt(getSigmaY2()); } GPUd() float getSigmaZ() const { return gpu::CAMath::Sqrt(getSigmaZ2()); } - GPUd() const gpu::gpustd::array& getCov() const { return mCov; } + GPUd() const std::array& getCov() const { return mCov; } GPUd() math_utils::Point3D getXYZ() const { return mPos; } GPUd() math_utils::Point3D& getXYZ() { return mPos; } @@ -101,14 +101,14 @@ class VertexBase setSigmaXZ(sxz); setSigmaYZ(syz); } - GPUd() void setCov(const gpu::gpustd::array& cov) { mCov = cov; } + GPUd() void setCov(const std::array& cov) { mCov = cov; } bool operator==(const VertexBase& other) const; bool operator!=(const VertexBase& other) const { return !(*this == other); } protected: math_utils::Point3D mPos{0., 0., 0.}; ///< cartesian position - gpu::gpustd::array mCov{}; ///< errors, see CovElems enum + std::array mCov{}; ///< errors, see CovElems enum ClassDefNV(VertexBase, 1); }; @@ -130,7 +130,7 @@ class Vertex : public VertexBase GPUhdDefault() Vertex() = default; GPUhdDefault() ~Vertex() = default; - GPUhd() Vertex(const math_utils::Point3D& pos, const gpu::gpustd::array& cov, ushort nCont, float chi2) + GPUhd() Vertex(const math_utils::Point3D& pos, const std::array& cov, ushort nCont, float chi2) : VertexBase(pos, cov), mChi2(chi2), mNContributors(nCont) { } diff --git a/DataFormats/Reconstruction/src/TrackParametrization.cxx b/DataFormats/Reconstruction/src/TrackParametrization.cxx index 4b68ea425bfbd..1bdf9b55a60a0 100644 --- a/DataFormats/Reconstruction/src/TrackParametrization.cxx +++ b/DataFormats/Reconstruction/src/TrackParametrization.cxx @@ -130,7 +130,7 @@ GPUd() bool TrackParametrization::getPxPyPzGlo(dim3_t& pxyz) const //____________________________________________________ template -GPUd() bool TrackParametrization::getPosDirGlo(gpu::gpustd::array& posdirp) const +GPUd() bool TrackParametrization::getPosDirGlo(std::array& posdirp) const { // fill vector with lab x,y,z,px/p,py/p,pz/p,p,sinAlpha,cosAlpha value_t ptI = getPtInv(); @@ -231,7 +231,7 @@ GPUd() bool TrackParametrization::propagateParamTo(value_t xk, const di step *= gpu::CAMath::Sqrt(1.f + getTgl() * getTgl()); // // get the track x,y,z,px/p,py/p,pz/p,p,sinAlpha,cosAlpha in the Global System - gpu::gpustd::array vecLab{0.f}; + std::array vecLab{0.f}; if (!getPosDirGlo(vecLab)) { return false; } @@ -250,7 +250,7 @@ GPUd() bool TrackParametrization::propagateParamTo(value_t xk, const di costet = b[2] / bb; sintet = bt / bb; } - gpu::gpustd::array vect{costet * cosphi * vecLab[0] + costet * sinphi * vecLab[1] - sintet * vecLab[2], + std::array vect{costet * cosphi * vecLab[0] + costet * sinphi * vecLab[1] - sintet * vecLab[2], -sinphi * vecLab[0] + cosphi * vecLab[1], sintet * cosphi * vecLab[0] + sintet * sinphi * vecLab[1] + costet * vecLab[2], costet * cosphi * vecLab[3] + costet * sinphi * vecLab[4] - sintet * vecLab[5], diff --git a/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx b/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx index 81963adf79938..aee24238f1247 100644 --- a/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx +++ b/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx @@ -259,7 +259,7 @@ GPUd() bool TrackParametrizationWithError::propagateToDCA(const o2::dat //______________________________________________________________ template GPUd() TrackParametrizationWithError::TrackParametrizationWithError(const dim3_t& xyz, const dim3_t& pxpypz, - const gpu::gpustd::array& cv, int charge, bool sectorAlpha, const PID pid) + const std::array& cv, int charge, bool sectorAlpha, const PID pid) { // construct track param and covariance from kinematics and lab errors set(xyz, pxpypz, cv, charge, sectorAlpha, pid); @@ -268,7 +268,7 @@ GPUd() TrackParametrizationWithError::TrackParametrizationWithError(con //______________________________________________________________ template GPUd() void TrackParametrizationWithError::set(const dim3_t& xyz, const dim3_t& pxpypz, - const gpu::gpustd::array& cv, int charge, bool sectorAlpha, const PID pid) + const std::array& cv, int charge, bool sectorAlpha, const PID pid) { // set track param and covariance from kinematics and lab errors @@ -475,7 +475,7 @@ GPUd() bool TrackParametrizationWithError::propagateTo(value_t xk, cons step *= gpu::CAMath::Sqrt(1.f + this->getTgl() * this->getTgl()); // // get the track x,y,z,px/p,py/p,pz/p,p,sinAlpha,cosAlpha in the Global System - gpu::gpustd::array vecLab{0.f}; + std::array vecLab{0.f}; if (!this->getPosDirGlo(vecLab)) { return false; } @@ -542,7 +542,7 @@ GPUd() bool TrackParametrizationWithError::propagateTo(value_t xk, cons costet = b[2] / bb; sintet = bt / bb; } - gpu::gpustd::array vect{costet * cosphi * vecLab[0] + costet * sinphi * vecLab[1] - sintet * vecLab[2], + std::array vect{costet * cosphi * vecLab[0] + costet * sinphi * vecLab[1] - sintet * vecLab[2], -sinphi * vecLab[0] + cosphi * vecLab[1], sintet * cosphi * vecLab[0] + sintet * sinphi * vecLab[1] + costet * vecLab[2], costet * cosphi * vecLab[3] + costet * sinphi * vecLab[4] - sintet * vecLab[5], @@ -1115,7 +1115,7 @@ GPUd() bool TrackParametrizationWithError::correctForMaterial(value_t x //______________________________________________________________ template -GPUd() bool TrackParametrizationWithError::getCovXYZPxPyPzGlo(gpu::gpustd::array& cv) const +GPUd() bool TrackParametrizationWithError::getCovXYZPxPyPzGlo(std::array& cv) const { //--------------------------------------------------------------------- // This function returns the global covariance matrix of the track params diff --git a/Detectors/AOD/src/AODProducerWorkflowSpec.cxx b/Detectors/AOD/src/AODProducerWorkflowSpec.cxx index 2b133770357fc..8fb2db9c3c9a6 100644 --- a/Detectors/AOD/src/AODProducerWorkflowSpec.cxx +++ b/Detectors/AOD/src/AODProducerWorkflowSpec.cxx @@ -2615,7 +2615,7 @@ AODProducerWorkflowDPL::TrackQA AODProducerWorkflowDPL::processBarrelTrackQA(int o2::track::TrackParametrization tpcTMP = tpcOrig; /// get backup of the track const o2::base::Propagator::MatCorrType mMatType = o2::base::Propagator::MatCorrType::USEMatCorrLUT; /// should be parameterized const o2::dataformats::VertexBase v = mVtx.getMeanVertex(collisionID < 0 ? 0.f : data.getPrimaryVertex(collisionID).getZ()); - o2::gpu::gpustd::array dcaInfo{-999., -999.}; + std::array dcaInfo{-999., -999.}; if (prop->propagateToDCABxByBz({v.getX(), v.getY(), v.getZ()}, tpcTMP, 2.f, mMatType, &dcaInfo)) { trackQAHolder.tpcdcaR = 100. * dcaInfo[0] / sqrt(1. + trackPar.getQ2Pt() * trackPar.getQ2Pt()); trackQAHolder.tpcdcaZ = 100. * dcaInfo[1] / sqrt(1. + trackPar.getQ2Pt() * trackPar.getQ2Pt()); diff --git a/Detectors/Align/src/AlignableDetectorTPC.cxx b/Detectors/Align/src/AlignableDetectorTPC.cxx index f66d9e3f3ab95..b3d2102559974 100644 --- a/Detectors/Align/src/AlignableDetectorTPC.cxx +++ b/Detectors/Align/src/AlignableDetectorTPC.cxx @@ -214,8 +214,8 @@ int AlignableDetectorTPC::processPoints(GIndex gid, int npntCut, bool inv) auto* sectSensor = (AlignableSensorTPC*)getSensor(currentSector); const auto* sysE = sectSensor->getAddError(); // additional syst error - gpu::gpustd::array p = {y, z}; - gpu::gpustd::array c = {0, 0, 0}; + std::array p = {y, z}; + std::array c = {0, 0, 0}; mController->getTPCParam()->GetClusterErrors2(sector, currentRow, z, trkParam.getSnp(), trkParam.getTgl(), -1.f, 0.f, 0.f, c[0], c[2]); // TODO: Note this disables occupancy / charge components of the error estimation mController->getTPCParam()->UpdateClusterError2ByState(clusterState, c[0], c[2]); int nrComb = std::abs(row - currentRow) + 1; diff --git a/Detectors/Base/include/DetectorsBase/Propagator.h b/Detectors/Base/include/DetectorsBase/Propagator.h index a9e2ce6e0383d..dbdef47e4edc0 100644 --- a/Detectors/Base/include/DetectorsBase/Propagator.h +++ b/Detectors/Base/include/DetectorsBase/Propagator.h @@ -17,7 +17,6 @@ #define ALICEO2_BASE_PROPAGATOR_ #include "GPUCommonRtypes.h" -#include "GPUCommonArray.h" #include "CommonConstants/PhysicsConstants.h" #include "ReconstructionDataFormats/Track.h" #include "ReconstructionDataFormats/DCA.h" @@ -25,6 +24,7 @@ #include "DetectorsBase/MatLayerCylSet.h" #ifndef GPUCA_GPUCODE +#include #include #endif @@ -111,12 +111,12 @@ class PropagatorImpl GPUd() bool propagateToDCA(const o2::math_utils::Point3D& vtx, o2::track::TrackParametrization& track, value_type bZ, value_type maxStep = MAX_STEP, MatCorrType matCorr = MatCorrType::USEMatCorrLUT, - gpu::gpustd::array* dca = nullptr, track::TrackLTIntegral* tofInfo = nullptr, + std::array* dca = nullptr, track::TrackLTIntegral* tofInfo = nullptr, int signCorr = 0, value_type maxD = 999.f) const; GPUd() bool propagateToDCABxByBz(const o2::math_utils::Point3D& vtx, o2::track::TrackParametrization& track, value_type maxStep = MAX_STEP, MatCorrType matCorr = MatCorrType::USEMatCorrLUT, - gpu::gpustd::array* dca = nullptr, track::TrackLTIntegral* tofInfo = nullptr, + std::array* dca = nullptr, track::TrackLTIntegral* tofInfo = nullptr, int signCorr = 0, value_type maxD = 999.f) const; PropagatorImpl(PropagatorImpl const&) = delete; diff --git a/Detectors/Base/src/Propagator.cxx b/Detectors/Base/src/Propagator.cxx index 754c0c14e6f60..1c44cea65c69c 100644 --- a/Detectors/Base/src/Propagator.cxx +++ b/Detectors/Base/src/Propagator.cxx @@ -170,7 +170,7 @@ GPUd() bool PropagatorImpl::PropagateToXBxByBz(TrackParCov_t& track, va signCorr = -dir; // sign of eloss correction is not imposed } - gpu::gpustd::array b{}; + std::array b{}; while (math_utils::detail::abs(dx) > Epsilon) { auto step = math_utils::detail::min(math_utils::detail::abs(dx), maxStep); if (dir < 0) { @@ -239,7 +239,7 @@ GPUd() bool PropagatorImpl::PropagateToXBxByBz(TrackPar_t& track, value signCorr = -dir; // sign of eloss correction is not imposed } - gpu::gpustd::array b{}; + std::array b{}; while (math_utils::detail::abs(dx) > Epsilon) { auto step = math_utils::detail::min(math_utils::detail::abs(dx), maxStep); if (dir < 0) { @@ -553,7 +553,7 @@ GPUd() bool PropagatorImpl::propagateToDCABxByBz(const o2::dataformats: template GPUd() bool PropagatorImpl::propagateToDCA(const math_utils::Point3D& vtx, TrackPar_t& track, value_type bZ, value_type maxStep, PropagatorImpl::MatCorrType matCorr, - gpu::gpustd::array* dca, track::TrackLTIntegral* tofInfo, + std::array* dca, track::TrackLTIntegral* tofInfo, int signCorr, value_type maxD) const { // propagate track to DCA to the vertex @@ -601,7 +601,7 @@ GPUd() bool PropagatorImpl::propagateToDCA(const math_utils::Point3D GPUd() bool PropagatorImpl::propagateToDCABxByBz(const math_utils::Point3D& vtx, TrackPar_t& track, value_type maxStep, PropagatorImpl::MatCorrType matCorr, - gpu::gpustd::array* dca, track::TrackLTIntegral* tofInfo, + std::array* dca, track::TrackLTIntegral* tofInfo, int signCorr, value_type maxD) const { // propagate track to DCA to the vertex diff --git a/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx b/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx index bca1ec1e85001..494603641cde5 100644 --- a/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx +++ b/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx @@ -593,7 +593,7 @@ int EfficiencyStudy::getDCAClusterTrackMC(int countDuplicated = 0) LOGP(info, "--------------- getDCAClusterTrackMC"); o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; - o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; + std::array clusOriginalDCA, clusDuplicatedDCA; auto propagator = o2::base::Propagator::Instance(); auto bz = o2::base::Propagator::Instance()->getNominalBz(); @@ -833,7 +833,7 @@ void EfficiencyStudy::countDuplicatedAfterCuts() LOGP(info, "--------------- countDuplicatedAfterCuts"); o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; - o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; + std::array clusOriginalDCA, clusDuplicatedDCA; auto propagator = o2::base::Propagator::Instance(); unsigned int rofIndexTrack = 0; @@ -1019,7 +1019,7 @@ void EfficiencyStudy::studyDCAcutsMC() // if not, keep it as a fake match -> increase the fake match counter // the efficiency of each one will be match counter / total of the duplicated clusters o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; - o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; + std::array clusOriginalDCA, clusDuplicatedDCA; auto propagator = o2::base::Propagator::Instance(); unsigned int rofIndexTrack = 0; @@ -1346,7 +1346,7 @@ void EfficiencyStudy::studyClusterSelectionMC() } o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; - o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; + std::array clusOriginalDCA, clusDuplicatedDCA; auto propagator = o2::base::Propagator::Instance(); unsigned int rofIndexTrack = 0; @@ -2176,7 +2176,7 @@ void EfficiencyStudy::getEfficiency(bool isMC) LOGP(info, "getEfficiency()"); o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; - o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; + std::array clusOriginalDCA, clusDuplicatedDCA; auto propagator = o2::base::Propagator::Instance(); unsigned int rofIndexTrack = 0; @@ -2860,4 +2860,4 @@ DataProcessorSpec getEfficiencyStudy(mask_t srcTracksMask, mask_t srcClustersMas Options{}}; } -} // namespace o2::its::study \ No newline at end of file +} // namespace o2::its::study diff --git a/Detectors/ITSMFT/ITS/postprocessing/studies/src/ImpactParameter.cxx b/Detectors/ITSMFT/ITS/postprocessing/studies/src/ImpactParameter.cxx index 5ca1bf2bd5c8f..c0aaabddaca1b 100644 --- a/Detectors/ITSMFT/ITS/postprocessing/studies/src/ImpactParameter.cxx +++ b/Detectors/ITSMFT/ITS/postprocessing/studies/src/ImpactParameter.cxx @@ -358,7 +358,7 @@ void ImpactParameterStudy::process(o2::globaltracking::RecoContainer& recoData) auto trueID = trueVec_globID_contr[it]; const o2::track::TrackParCov& trc = recoData.getTrackParam(trueID); auto pt = trc.getPt(); - o2::gpu::gpustd::array dcaInfo{-999., -999.}; + std::array dcaInfo{-999., -999.}; // LOGP(info, " ---> Bz={}", o2::base::Propagator::Instance()->getNominalBz()); o2::track::TrackPar trcTmp{trc}; if (o2::base::Propagator::Instance()->propagateToDCABxByBz({Pvtx_refitted.getX(), Pvtx_refitted.getY(), Pvtx_refitted.getZ()}, trcTmp, 2.f, matCorr, &dcaInfo)) { diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/VertexingKernels.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/VertexingKernels.h index 6ae042d081688..059b1cdc29082 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/VertexingKernels.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/VertexingKernels.h @@ -22,8 +22,6 @@ #include "ITStrackingGPU/VertexerTraitsGPU.h" #include "ITStrackingGPU/TracerGPU.h" -#include "GPUCommonArray.h" - namespace o2::its::gpu { #ifdef GPUCA_GPUCODE // GPUg() global kernels must only when compiled by GPU compiler @@ -56,4 +54,4 @@ void trackletFinderHandler(const Cluster* clustersNextLayer, // 0 2 const float phiCut, const size_t maxTrackletsPerCluster = 1e2); } // namespace o2::its::gpu -#endif \ No newline at end of file +#endif diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexingKernels.cu index 3aab0624ef556..acbd77585df37 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexingKernels.cu @@ -564,7 +564,7 @@ GPUg() void computeVertexKernel( histZ[iBin] = 0; } if (sumWZ > minContributors || vertIndex == 0) { - new (vertices + vertIndex) Vertex{o2::math_utils::Point3D(beamPosition[0], beamPosition[1], wZ / sumWZ), o2::gpu::gpustd::array{ex, 0, ey, 0, 0, ez}, static_cast(sumWZ), 0}; + new (vertices + vertIndex) Vertex{o2::math_utils::Point3D(beamPosition[0], beamPosition[1], wZ / sumWZ), std::array{ex, 0, ey, 0, 0, ez}, static_cast(sumWZ), 0}; } else { new (vertices + vertIndex) Vertex{}; } @@ -577,4 +577,4 @@ GPUg() void computeVertexKernel( */ } // namespace gpu } // namespace its -} // namespace o2 \ No newline at end of file +} // namespace o2 diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cluster.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cluster.h index 0f136edfebfb3..2bf1316470316 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cluster.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cluster.h @@ -16,15 +16,14 @@ #ifndef TRACKINGITSU_INCLUDE_CACLUSTER_H_ #define TRACKINGITSU_INCLUDE_CACLUSTER_H_ -#ifndef GPUCA_GPUCODE_DEVICE -#include -#endif - #include "GPUCommonRtypes.h" -#include "GPUCommonArray.h" #include "ITStracking/Definitions.h" #include "ITStracking/MathUtils.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif + namespace o2 { namespace its @@ -61,15 +60,15 @@ GPUhdi() void Cluster::print() const struct TrackingFrameInfo { TrackingFrameInfo() = default; - TrackingFrameInfo(float x, float y, float z, float xTF, float alpha, o2::gpu::gpustd::array&& posTF, o2::gpu::gpustd::array&& covTF); + TrackingFrameInfo(float x, float y, float z, float xTF, float alpha, std::array&& posTF, std::array&& covTF); float xCoordinate; float yCoordinate; float zCoordinate; float xTrackingFrame; float alphaTrackingFrame; - o2::gpu::gpustd::array positionTrackingFrame = {-1., -1.}; - o2::gpu::gpustd::array covarianceTrackingFrame = {999., 999., 999.}; + std::array positionTrackingFrame = {-1., -1.}; + std::array covarianceTrackingFrame = {999., 999., 999.}; GPUdi() void print() const { #if !defined(GPUCA_GPUCODE_DEVICE) || (!defined(__OPENCL__) && defined(GPUCA_GPU_DEBUG_PRINT)) diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h index da02149fbc432..ec075b0f10d04 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h @@ -16,17 +16,17 @@ #ifndef TRACKINGITSU_INCLUDE_CONSTANTS_H_ #define TRACKINGITSU_INCLUDE_CONSTANTS_H_ -#ifndef GPUCA_GPUCODE_DEVICE -#include -#include -#endif - #include "ITStracking/Definitions.h" #include "CommonConstants/MathConstants.h" #include "GPUCommonMath.h" #include "GPUCommonDef.h" -#include "GPUCommonArray.h" + +#ifndef GPUCA_GPUCODE_DEVICE +#include +#include +#include +#endif namespace o2 { @@ -54,9 +54,9 @@ constexpr int ClustersPerCell{3}; constexpr int UnusedIndex{-1}; constexpr float Resolution{0.0005f}; -GPUhdi() constexpr o2::gpu::gpustd::array VertexerHistogramVolume() +GPUhdi() constexpr std::array VertexerHistogramVolume() { - return o2::gpu::gpustd::array{{1.98, 1.98, 40.f}}; + return std::array{{1.98, 1.98, 40.f}}; } } // namespace its @@ -66,24 +66,24 @@ constexpr int LayersNumber{7}; constexpr int TrackletsPerRoad{LayersNumber - 1}; constexpr int CellsPerRoad{LayersNumber - 2}; -GPUhdi() constexpr o2::gpu::gpustd::array LayersZCoordinate() +GPUhdi() constexpr std::array LayersZCoordinate() { constexpr double s = 1.; // safety margin - return o2::gpu::gpustd::array{16.333f + s, 16.333f + s, 16.333f + s, 42.140f + s, 42.140f + s, 73.745f + s, 73.745f + s}; + return std::array{16.333f + s, 16.333f + s, 16.333f + s, 42.140f + s, 42.140f + s, 73.745f + s, 73.745f + s}; } -GPUhdi() constexpr o2::gpu::gpustd::array LayersRCoordinate() +GPUhdi() constexpr std::array LayersRCoordinate() { - return o2::gpu::gpustd::array{{2.33959f, 3.14076f, 3.91924f, 19.6213f, 24.5597f, 34.388f, 39.3329f}}; + return std::array{{2.33959f, 3.14076f, 3.91924f, 19.6213f, 24.5597f, 34.388f, 39.3329f}}; } constexpr int ZBins{256}; constexpr int PhiBins{128}; constexpr float InversePhiBinSize{PhiBins / constants::math::TwoPi}; -GPUhdi() constexpr o2::gpu::gpustd::array InverseZBinSize() +GPUhdi() constexpr std::array InverseZBinSize() { constexpr auto zSize = LayersZCoordinate(); - return o2::gpu::gpustd::array{0.5f * ZBins / (zSize[0]), 0.5f * ZBins / (zSize[1]), 0.5f * ZBins / (zSize[2]), + return std::array{0.5f * ZBins / (zSize[0]), 0.5f * ZBins / (zSize[1]), 0.5f * ZBins / (zSize[2]), 0.5f * ZBins / (zSize[3]), 0.5f * ZBins / (zSize[4]), 0.5f * ZBins / (zSize[5]), 0.5f * ZBins / (zSize[6])}; } diff --git a/Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx b/Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx index 630ad9acf59d2..1557c636e2345 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx @@ -17,7 +17,9 @@ #include "ITStracking/MathUtils.h" #include "ITStracking/IndexTableUtils.h" -#include "GPUCommonArray.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif namespace o2 { @@ -92,8 +94,8 @@ bool Cluster::operator==(const Cluster& rhs) const this->indexTableBinIndex == rhs.indexTableBinIndex; } -TrackingFrameInfo::TrackingFrameInfo(float x, float y, float z, float xTF, float alpha, o2::gpu::gpustd::array&& posTF, - o2::gpu::gpustd::array&& covTF) +TrackingFrameInfo::TrackingFrameInfo(float x, float y, float z, float xTF, float alpha, std::array&& posTF, + std::array&& covTF) : xCoordinate{x}, yCoordinate{y}, zCoordinate{z}, xTrackingFrame{xTF}, alphaTrackingFrame{alpha}, positionTrackingFrame{posTF}, covarianceTrackingFrame{covTF} { // Nothing to do diff --git a/Detectors/TPC/qc/src/Tracks.cxx b/Detectors/TPC/qc/src/Tracks.cxx index 8e6f0d702df1b..5f29e80c89d2e 100644 --- a/Detectors/TPC/qc/src/Tracks.cxx +++ b/Detectors/TPC/qc/src/Tracks.cxx @@ -13,6 +13,7 @@ #include #include +#include // root includes #include "TFile.h" @@ -21,7 +22,6 @@ // o2 includes #include "DataFormatsTPC/TrackTPC.h" #include "DataFormatsTPC/dEdxInfo.h" -#include "GPUCommonArray.h" #include "DetectorsBase/Propagator.h" #include "TPCQC/Tracks.h" #include "TPCQC/Helpers.h" @@ -179,7 +179,7 @@ bool Tracks::processTrack(const o2::tpc::TrackTPC& track) if (propagator->getMatLUT() && propagator->hasMagFieldSet()) { // ---| fill DCA histos |--- - o2::gpu::gpustd::array dca; + std::array dca; o2::track::TrackPar propTrack(track); if (propagator->propagateToDCABxByBz(mPositionOfPV, propTrack, 2.f, o2::base::Propagator::MatCorrType::USEMatCorrLUT, &dca)) { const auto phi = o2::math_utils::to02PiGen(track.getPhi()); @@ -348,4 +348,4 @@ void Tracks::dumpToFile(std::string_view filename) arr.Write(arr.GetName(), TObject::kSingleKey); } f->Close(); -} \ No newline at end of file +} diff --git a/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx b/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx index 1329dea236b1f..33b9039298264 100644 --- a/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx +++ b/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx @@ -178,7 +178,7 @@ bool MIPTrackFilterDevice::acceptDCA(const TrackTPC& track) } auto propagator = o2::base::Propagator::Instance(); - o2::gpu::gpustd::array dca; + std::array dca; const o2::math_utils::Point3D refPoint{0, 0, 0}; o2::track::TrackPar propTrack(track); const auto ok = propagator->propagateToDCABxByBz(refPoint, propTrack, 2., o2::base::Propagator::MatCorrType::USEMatCorrLUT, &dca); diff --git a/Detectors/TPC/workflow/src/TPCRefitter.cxx b/Detectors/TPC/workflow/src/TPCRefitter.cxx index 3ebe32d12ddb8..b2e41c8e808da 100644 --- a/Detectors/TPC/workflow/src/TPCRefitter.cxx +++ b/Detectors/TPC/workflow/src/TPCRefitter.cxx @@ -421,7 +421,7 @@ void TPCRefitterSpec::finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) bool TPCRefitterSpec::getDCAs(const o2::track::TrackPar& track, float& dcar, float& dcaz) { auto propagator = o2::base::Propagator::Instance(); - o2::gpu::gpustd::array dca; + std::array dca; const o2::math_utils::Point3D refPoint{0, 0, 0}; o2::track::TrackPar propTrack(track); const auto ok = propagator->propagateToDCABxByBz(refPoint, propTrack, 2., o2::base::Propagator::MatCorrType::USEMatCorrLUT, &dca); diff --git a/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx b/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx index 4d20654d07c83..a9f1e7d71da8e 100644 --- a/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx +++ b/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx @@ -1143,7 +1143,7 @@ class TPCTimeSeries : public Task auto propagator = o2::base::Propagator::Instance(); // propagate track to DCA - o2::gpu::gpustd::array dca; + std::array dca; const o2::math_utils::Point3D refPoint{0, 0, 0}; // coarse propagation @@ -1252,7 +1252,7 @@ class TPCTimeSeries : public Task // make propagation for ITS-TPC Track // check if the track was assigned to ITS track - o2::gpu::gpustd::array dcaITSTPC{0, 0}; + std::array dcaITSTPC{0, 0}; float deltaP0 = -999; float deltaP1 = -999; float deltaP2 = -999; @@ -1270,7 +1270,7 @@ class TPCTimeSeries : public Task // store TPC only DCAs // propagate to vertex in case the track belongs to vertex const bool contributeToVertex = (idxITSTPC.back() != -1); - o2::gpu::gpustd::array dcaITSTPCTmp{-1, -1}; + std::array dcaITSTPCTmp{-1, -1}; if (contributeToVertex) { if (propagator->propagateToDCA(vertex.getXYZ(), trackITSTPCTmp, propagator->getNominalBz(), mFineStep, mMatType, &dcaITSTPCTmp)) { @@ -1279,7 +1279,7 @@ class TPCTimeSeries : public Task } // propagate TPC track to vertex - o2::gpu::gpustd::array dcaTPCTmp{-1, -1}; + std::array dcaTPCTmp{-1, -1}; if (propagator->propagateToDCA(vertex.getXYZ(), track, propagator->getNominalBz(), mFineStep, mMatType, &dcaTPCTmp)) { dcaTPCAtVertex = dcaTPCTmp[0]; } @@ -1401,7 +1401,7 @@ class TPCTimeSeries : public Task const bool contributeToVertex = (idxITSTPC.back() != -1); if (hasITSTPC && contributeToVertex) { o2::track::TrackParCov trackITSTPCTmp = tracksITSTPC[idxITSTPC.front()]; - o2::gpu::gpustd::array dcaITSTPCTmp{-1, -1}; + std::array dcaITSTPCTmp{-1, -1}; if (propagator->propagateToDCA(vertex.getXYZ(), trackITSTPCTmp, propagator->getNominalBz(), mFineStep, mMatType, &dcaITSTPCTmp)) { o2::track::TrackParCov trackTPC = tracksTPC[iTrk]; if (trackTPC.rotate(trackITSTPCTmp.getAlpha()) && propagator->propagateTo(trackTPC, trackITSTPCTmp.getX(), false, mMaxSnp, mFineStep, mMatType)) { diff --git a/GPU/Common/GPUCommonArray.h b/GPU/Common/GPUCommonArray.h index c9babbf5548b4..e83ca8c4a69fc 100644 --- a/GPU/Common/GPUCommonArray.h +++ b/GPU/Common/GPUCommonArray.h @@ -15,12 +15,14 @@ #ifndef GPUCOMMONARRAY_H #define GPUCOMMONARRAY_H -#ifndef GPUCA_GPUCODE_DEVICE +#if !defined(GPUCA_GPUCODE_DEVICE) || defined(__CUDACC__) || defined(__HIPCC__) // TODO: Get rid of GPUCommonArray once OpenCL supports +#ifndef GPUCA_GPUCODE_COMPILEKERNELS #include #endif +#else #include "GPUCommonDef.h" -namespace o2::gpu::gpustd +namespace std { #ifdef GPUCA_GPUCODE_DEVICE template @@ -43,5 +45,7 @@ GPUd() array(T, E...)->array; template using array = std::array; #endif -} // namespace o2::gpu::gpustd -#endif \ No newline at end of file +} // namespace std +#endif + +#endif // GPUCOMMONARRAY_H diff --git a/GPU/Common/GPUCommonTypeTraits.h b/GPU/Common/GPUCommonTypeTraits.h index f837019c11875..a51a4ac50683f 100644 --- a/GPU/Common/GPUCommonTypeTraits.h +++ b/GPU/Common/GPUCommonTypeTraits.h @@ -17,12 +17,11 @@ #include "GPUCommonDef.h" -#if !defined(GPUCA_GPUCODE_DEVICE) || defined(__CUDACC__) || defined(__HIPCC__) +#if !defined(GPUCA_GPUCODE_DEVICE) || defined(__CUDACC__) || defined(__HIPCC__) // TODO: Get rid of GPUCommonTypeTraits once OpenCL supports #ifndef GPUCA_GPUCODE_COMPILEKERNELS #include #endif #else -// We just reimplement some type traits in std for the GPU // TODO: Check if meanwhile we can get rid of GPUCommonTypeTraits and GPUCommonArray, and just use the std headers. namespace std { template diff --git a/GPU/GPUTracking/Base/GPUStdSystemHeaders.h b/GPU/GPUTracking/Base/GPUStdSystemHeaders.h index 6598085d309c7..08f9be7d98380 100644 --- a/GPU/GPUTracking/Base/GPUStdSystemHeaders.h +++ b/GPU/GPUTracking/Base/GPUStdSystemHeaders.h @@ -21,5 +21,6 @@ #include #include #include +#include #endif diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl index 3f58c0fea75e9..ea74c43703597 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl @@ -72,6 +72,7 @@ typedef signed char int8_t; #include "GPUCommonDef.h" #include "GPUCommonTypeTraits.h" // TODO: Once possible in OpenCL, should use GPUStdSystemHeaders.h here +#include "GPUCommonArray.h" // TODO: Same #include "GPUConstantMem.h" #include "GPUReconstructionIncludesDeviceAll.h" diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index b74c1bb6a4534..7c7ce8d07ffb9 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -387,8 +387,8 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov TrackParCovChi2 = 0.f; } CADEBUG(printf("\t%21sPropaga Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) --- Res %8.3f %8.3f --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f\n", "", trk.getAlpha(), x, trk.getParams()[0], trk.getParams()[1], trk.getParams()[4], trk.getParams()[4], trk.getParams()[2], trk.getParams()[2], trk.getParams()[0] - y, trk.getParams()[1] - z, sqrtf(trk.getCov()[0]), sqrtf(trk.getCov()[2]), sqrtf(trk.getCov()[5]), sqrtf(trk.getCov()[14]), trk.getCov()[10])); - gpu::gpustd::array p = {y, z}; - gpu::gpustd::array c = {0, 0, 0}; + std::array p = {y, z}; + std::array c = {0, 0, 0}; GPUTPCGMPropagator::GetErr2(c[0], c[2], *mPparam, getPar(trk)[2], getPar(trk)[3], z, x, y, currentRow, clusterState, sector, time, invAvgCharge, invCharge, false); TrackParCovChi2 += trk.getPredictedChi2(p, c); if (!trk.update(p, c)) { diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h index f6b8bea29822a..d26d46495069b 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h @@ -59,8 +59,8 @@ class propagatorInterface GPUdi() bool update(const float p[2], const float cov[3]) { if (mParam) { - gpustd::array pTmp = {p[0], p[1]}; - gpustd::array covTmp = {cov[0], cov[1], cov[2]}; + std::array pTmp = {p[0], p[1]}; + std::array covTmp = {cov[0], cov[1], cov[2]}; return mParam->update(pTmp, covTmp); } else { return false; @@ -69,8 +69,8 @@ class propagatorInterface GPUdi() float getPredictedChi2(const float p[2], const float cov[3]) { if (mParam) { - gpustd::array pTmp = {p[0], p[1]}; - gpustd::array covTmp = {cov[0], cov[1], cov[2]}; + std::array pTmp = {p[0], p[1]}; + std::array covTmp = {cov[0], cov[1], cov[2]}; return mParam->getPredictedChi2(pTmp, covTmp); } else { return 99999.f; From a850e9eb3e6a634a1e87a70170c05ad6d8bce3af Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 15 May 2025 22:18:30 +0200 Subject: [PATCH 0033/1426] GPU Common: Workaround for removing gpustd::array, temporary alias for O2Physics --- .../ReconstructionDataFormats/TrackParametrization.h | 1 + GPU/Common/GPUCommonArray.h | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h index f240e34861eeb..1d0a5f1a9f1fd 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h @@ -29,6 +29,7 @@ #include "GPUCommonDef.h" #include "GPUCommonRtypes.h" #include "GPUCommonMath.h" +#include "GPUCommonArray.h" #include "GPUROOTCartesianFwd.h" #ifndef GPUCA_GPUCODE_DEVICE diff --git a/GPU/Common/GPUCommonArray.h b/GPU/Common/GPUCommonArray.h index e83ca8c4a69fc..fa86d7bb4a021 100644 --- a/GPU/Common/GPUCommonArray.h +++ b/GPU/Common/GPUCommonArray.h @@ -48,4 +48,10 @@ using array = std::array; } // namespace std #endif +namespace o2::gpu::gpustd +{ +template +using array = ::std::array; // temporary alias, to remove dependent types +} // o2::gpu::gpustd + #endif // GPUCOMMONARRAY_H From b3c66e8a48c77a932a914a5adcb3562ebef3487a Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Fri, 16 May 2025 09:53:57 +0200 Subject: [PATCH 0034/1426] DPL: Drop obsolete API (#14279) The new plugin based mechanism does not need the bulk insertion anymore. --- .../Core/include/Framework/TableBuilder.h | 106 +----------------- .../Core/test/benchmark_TableBuilder.cxx | 33 ------ Framework/Core/test/test_TableBuilder.cxx | 34 ------ 3 files changed, 4 insertions(+), 169 deletions(-) diff --git a/Framework/Core/include/Framework/TableBuilder.h b/Framework/Core/include/Framework/TableBuilder.h index 8d7601cefc634..0b35d5be083e4 100644 --- a/Framework/Core/include/Framework/TableBuilder.h +++ b/Framework/Core/include/Framework/TableBuilder.h @@ -42,12 +42,6 @@ class Table; class Array; } // namespace arrow -template -struct BulkInfo { - const T ptr; - size_t size; -}; - extern template class arrow::NumericBuilder; extern template class arrow::NumericBuilder; extern template class arrow::NumericBuilder; @@ -200,34 +194,6 @@ struct BuilderUtils { } } - template - static arrow::Status bulkAppend(HolderType& holder, size_t bulkSize, const PTR ptr) - { - return holder.builder->AppendValues(ptr, bulkSize, nullptr); - } - - template - static arrow::Status bulkAppendChunked(HolderType& holder, BulkInfo info) - { - // Appending nullptr is a no-op. - if (info.ptr == nullptr) { - return arrow::Status::OK(); - } - if constexpr (std::is_same_v>) { - if (appendToList>(holder.builder, info.ptr, info.size).ok() == false) { - throw runtime_error("Unable to append to column"); - } else { - return arrow::Status::OK(); - } - } else { - if (holder.builder->AppendValues(info.ptr, info.size, nullptr).ok() == false) { - throw runtime_error("Unable to append to column"); - } else { - return arrow::Status::OK(); - } - } - } - template static arrow::Status append(HolderType& holder, std::pair ip) { @@ -518,14 +484,6 @@ struct TableBuilderHelpers { return {BuilderTraits::make_datatype()...}; } - template - static std::vector> makeFields(std::array const& names) - { - char const* const* names_ptr = names.data(); - return { - std::make_shared(*names_ptr++, BuilderMaker::make_datatype(), true, nullptr)...}; - } - /// Invokes the append method for each entry in the tuple template static bool append(std::tuple& holders, VALUES&& values) @@ -542,19 +500,6 @@ struct TableBuilderHelpers { (BuilderUtils::unsafeAppend(std::get(holders), std::get(values)), ...); } - template - static bool bulkAppend(std::tuple& holders, size_t bulkSize, PTRS ptrs) - { - return (BuilderUtils::bulkAppend(std::get(holders), bulkSize, std::get(ptrs)).ok() && ...); - } - - /// Return true if all columns are done. - template - static bool bulkAppendChunked(std::tuple& holders, INFOS infos) - { - return (BuilderUtils::bulkAppendChunked(std::get(holders), std::get(infos)).ok() && ...); - } - /// Invokes the append method for each entry in the tuple template static bool finalize(std::vector>& arrays, std::tuple& holders) @@ -575,15 +520,9 @@ constexpr auto tuple_to_pack(std::tuple&&) return framework::pack{}; } -template -concept BulkInsertable = (std::integral> && !std::same_as>); - template struct InsertionTrait { - static consteval DirectInsertion policy() - requires(!BulkInsertable); - static consteval CachedInsertion policy() - requires(BulkInsertable); + static consteval DirectInsertion policy(); using Policy = decltype(policy()); }; @@ -658,7 +597,9 @@ class TableBuilder template auto makeBuilders(std::array const& columnNames, size_t nRows) { - mSchema = std::make_shared(TableBuilderHelpers::makeFields(columnNames)); + char const* const* names_ptr = columnNames.data(); + mSchema = std::make_shared( + std::vector>({std::make_shared(*names_ptr++, BuilderMaker::make_datatype(), true, nullptr)...})); mHolders = makeHolders(mMemoryPool, nRows); mFinalizer = [](std::vector>& arrays, void* holders) -> bool { @@ -768,45 +709,6 @@ class TableBuilder }(typename T::table_t::persistent_columns_t{}); } - template - auto preallocatedPersist(std::array const& columnNames, int nRows) - { - constexpr size_t nColumns = NCOLUMNS; - validate(); - mArrays.resize(nColumns); - makeBuilders(columnNames, nRows); - - // Callback used to fill the builders - return [holders = mHolders](unsigned int /*slot*/, typename BuilderMaker::FillType... args) -> void { - TableBuilderHelpers::unsafeAppend(*(HoldersTupleIndexed*)holders, std::forward_as_tuple(args...)); - }; - } - - template - auto bulkPersist(std::array const& columnNames, size_t nRows) - { - validate(); - // Should not be called more than once - mArrays.resize(NCOLUMNS); - makeBuilders(columnNames, nRows); - - return [holders = mHolders](unsigned int /*slot*/, size_t batchSize, typename BuilderMaker::FillType const*... args) -> void { - TableBuilderHelpers::bulkAppend(*(HoldersTupleIndexed*)holders, batchSize, std::forward_as_tuple(args...)); - }; - } - - template - auto bulkPersistChunked(std::array const& columnNames, size_t nRows) - { - validate(); - mArrays.resize(NCOLUMNS); - makeBuilders(columnNames, nRows); - - return [holders = mHolders](unsigned int /*slot*/, BulkInfo::STLValueType const*>... args) -> bool { - return TableBuilderHelpers::bulkAppendChunked(*(HoldersTupleIndexed*)holders, std::forward_as_tuple(args...)); - }; - } - /// Reserve method to expand the columns as needed. template auto reserveArrays(std::tuple& holders, int s) diff --git a/Framework/Core/test/benchmark_TableBuilder.cxx b/Framework/Core/test/benchmark_TableBuilder.cxx index 59d1450e895bd..5b9dee866c8a3 100644 --- a/Framework/Core/test/benchmark_TableBuilder.cxx +++ b/Framework/Core/test/benchmark_TableBuilder.cxx @@ -62,39 +62,6 @@ static void BM_TableBuilderScalarReserved(benchmark::State& state) BENCHMARK(BM_TableBuilderScalarReserved)->Arg(1 << 21); BENCHMARK(BM_TableBuilderScalarReserved)->Range(8, 8 << 16); -static void BM_TableBuilderScalarPresized(benchmark::State& state) -{ - using namespace o2::framework; - for (auto _ : state) { - TableBuilder builder; - auto rowWriter = builder.preallocatedPersist({"x"}, state.range(0)); - for (auto i = 0; i < state.range(0); ++i) { - rowWriter(0, 0.f); - } - auto table = builder.finalize(); - } -} - -BENCHMARK(BM_TableBuilderScalarPresized)->Arg(1 << 20); -BENCHMARK(BM_TableBuilderScalarPresized)->Range(8, 8 << 16); - -static void BM_TableBuilderScalarBulk(benchmark::State& state) -{ - using namespace o2::framework; - auto chunkSize = state.range(0) / 256; - std::vector buffer(chunkSize, 0.); // We assume data is chunked in blocks 256th of the total size - for (auto _ : state) { - TableBuilder builder; - auto bulkWriter = builder.bulkPersist({"x"}, state.range(0)); - for (auto i = 0; i < state.range(0) / chunkSize; ++i) { - bulkWriter(0, chunkSize, buffer.data()); - } - auto table = builder.finalize(); - } -} - -BENCHMARK(BM_TableBuilderScalarBulk)->Range(256, 1 << 20); - static void BM_TableBuilderSimple(benchmark::State& state) { using namespace o2::framework; diff --git a/Framework/Core/test/test_TableBuilder.cxx b/Framework/Core/test/test_TableBuilder.cxx index b08fee5ad4e6a..00cbbbc59b725 100644 --- a/Framework/Core/test/test_TableBuilder.cxx +++ b/Framework/Core/test/test_TableBuilder.cxx @@ -162,30 +162,6 @@ TEST_CASE("TestTableBuilderStruct") } } -TEST_CASE("TestTableBuilderBulk") -{ - using namespace o2::framework; - TableBuilder builder; - auto bulkWriter = builder.bulkPersist({"x", "y"}, 10); - int x[] = {0, 1, 2, 3, 4, 5, 6, 7}; - int y[] = {0, 1, 2, 3, 4, 5, 6, 7}; - - bulkWriter(0, 8, x, y); - - auto table = builder.finalize(); - REQUIRE(table->num_columns() == 2); - REQUIRE(table->num_rows() == 8); - REQUIRE(table->schema()->field(0)->name() == "x"); - REQUIRE(table->schema()->field(1)->name() == "y"); - REQUIRE(table->schema()->field(0)->type()->id() == arrow::int32()->id()); - REQUIRE(table->schema()->field(1)->type()->id() == arrow::int32()->id()); - - for (int64_t i = 0; i < 8; ++i) { - auto p = std::dynamic_pointer_cast>(table->column(0)->chunk(0)); - REQUIRE(p->Value(i) == i); - } -} - TEST_CASE("TestTableBuilderMore") { using namespace o2::framework; @@ -288,13 +264,3 @@ TEST_CASE("TestColumnCount") int count2 = TableBuilder::countColumns(); REQUIRE(count2 == 3); } - -TEST_CASE("TestMakeFields") -{ - auto fields = TableBuilderHelpers::makeFields({"i", "f"}); - REQUIRE(fields.size() == 2); - REQUIRE(fields[0]->name() == "i"); - REQUIRE(fields[1]->name() == "f"); - REQUIRE(fields[0]->type()->name() == "int32"); - REQUIRE(fields[1]->type()->name() == "float"); -} From d89ef683194da14326ccc2915810cf402d6921d7 Mon Sep 17 00:00:00 2001 From: shahoian Date: Thu, 15 May 2025 23:29:48 +0200 Subject: [PATCH 0035/1426] Promote ITS/MFT wrong orbit/ROF count messages to critical --- Detectors/ITSMFT/common/reconstruction/src/RUDecodeData.cxx | 2 +- Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Detectors/ITSMFT/common/reconstruction/src/RUDecodeData.cxx b/Detectors/ITSMFT/common/reconstruction/src/RUDecodeData.cxx index a9ed2748ec004..187a1bc114ee7 100644 --- a/Detectors/ITSMFT/common/reconstruction/src/RUDecodeData.cxx +++ b/Detectors/ITSMFT/common/reconstruction/src/RUDecodeData.cxx @@ -125,7 +125,7 @@ bool RUDecodeData::checkLinkInSync(int icab, const o2::InteractionRecord ir) link->statistics.errorCounts[GBTLinkDecodingStat::ErrOldROF]++; linkHBFToDump[(uint64_t(link->subSpec) << 32) + link->hbfEntry] = link->irHBF.orbit; if (link->needToPrintError(link->statistics.errorCounts[GBTLinkDecodingStat::ErrOldROF]) && !ROFRampUpStage) { - LOGP(error, "{} (cable {}) has IR={} for current majority IR={} -> {}", link->describe(), + LOGP(critical, "{} (cable {}) has IR={} for current majority IR={} -> {}", link->describe(), cableHWID[icab], link->ir.asString(), ir.asString(), link->statistics.ErrNames[GBTLinkDecodingStat::ErrOldROF]); } #endif diff --git a/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx b/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx index 7042cb7433ac5..da1af34376ff1 100644 --- a/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx +++ b/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx @@ -202,7 +202,7 @@ void STFDecoder::run(ProcessingContext& pc) if ((expectedTFSize != nTriggersProcessed) && mROFErrRepIntervalMS > 0 && mTFCounter > 1 && nTriggersProcessed > 0) { long currTS = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); if (currTS - lastErrReportTS > mROFErrRepIntervalMS) { - LOGP(error, "Inconsistent number of ROF per TF. From parameters: {} from readout: {} (muting further reporting for {} ms)", expectedTFSize, nTriggersProcessed, mROFErrRepIntervalMS); + LOGP(critical, "Inconsistent number of ROF per TF. From parameters: {} from readout: {} (muting further reporting for {} ms)", expectedTFSize, nTriggersProcessed, mROFErrRepIntervalMS); lastErrReportTS = currTS; } } From 5b5f1d96e60af52e9bf70796b23e1a4f5f0c544c Mon Sep 17 00:00:00 2001 From: Marco Giacalone Date: Fri, 16 May 2025 16:09:42 +0200 Subject: [PATCH 0036/1426] Include/adapt QED for special runs (#14242) * Include QED for special runs --- Generators/include/Generators/QEDGenParam.h | 11 ++++++----- Generators/share/external/QEDepem.C | 2 ++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Generators/include/Generators/QEDGenParam.h b/Generators/include/Generators/QEDGenParam.h index 1c78b14cfc516..0a79f113fc4ac 100644 --- a/Generators/include/Generators/QEDGenParam.h +++ b/Generators/include/Generators/QEDGenParam.h @@ -24,11 +24,12 @@ namespace o2 namespace eventgen { struct QEDGenParam : public o2::conf::ConfigurableParamHelper { - - float yMin = -6.f; ///< min Y - float yMax = 6.f; ///< max Y - float ptMin = 0.4e-3; ///< min pT - float ptMax = 10.f; ///< min pT + float yMin = -6.f; ///< min Y + float yMax = 6.f; ///< max Y + float ptMin = 0.4e-3; ///< min pT + float ptMax = 10.f; ///< min pT + float cmEnergy = 5360.f; ///< center of mass energy per nucleon pair in GeV + float Z = 82.f; ///< atomic number of the projectile/target (only symmetric systems are compatible for now) // float xSectionQED = -1; ///< estimated QED x-section in barns float xSectionHad = 8.; ///< reference hadronic x-section for the same system diff --git a/Generators/share/external/QEDepem.C b/Generators/share/external/QEDepem.C index 1e464ec69be00..d9103d1476df0 100644 --- a/Generators/share/external/QEDepem.C +++ b/Generators/share/external/QEDepem.C @@ -35,6 +35,8 @@ o2::eventgen::GeneratorTGenerator* QEDepem() genBg->SetPtRange(qedParam.ptMin, qedParam.ptMax); // Set pt limits (GeV) for e+-: 1MeV corresponds to max R=13.3mm at 5kGaus genBg->SetOrigin(diamond.position[0], diamond.position[1], diamond.position[2]); // vertex position in space genBg->SetSigma(diamond.width[0], diamond.width[1], diamond.width[2]); // vertex sigma + genBg->SetCMEnergy(qedParam.cmEnergy); // center of mass energy per nucleon pair in GeV + genBg->SetZ(qedParam.Z); // atomic number of the projectile/target (only symmetric systems are compatible for now) genBg->SetTimeOrigin(0.); // vertex position in time initialized = genBg->Init(); if (!initialized) { From 64315adcaddfc72a231bc8c15427b03d6f239e87 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 16 May 2025 15:43:44 +0200 Subject: [PATCH 0037/1426] GPU: Check that we do not optimize RTC when we have 2 different GPUReconstruction instances --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 9 ++++++++- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 3ef995b9f9561..c76bf11c3e25d 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -347,15 +347,22 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mProcessingSettings->nTPCClustererLanes = GPUCA_NSECTORS; } + if (GetProcessingSettings().doublePipeline) { + mProcessingSettings->rtctech.allowOptimizedSlaveReconstruction = true; + } if (GetProcessingSettings().doublePipeline && (mChains.size() != 1 || mChains[0]->SupportsDoublePipeline() == false || !IsGPU() || GetProcessingSettings().memoryAllocationStrategy != GPUMemoryResource::ALLOCATION_GLOBAL)) { GPUError("Must use double pipeline mode only with exactly one chain that must support it"); return 1; } - if (mMaster == nullptr && GetProcessingSettings().doublePipeline) { mPipelineContext.reset(new GPUReconstructionPipelineContext); } + if (mMaster && GetProcessingSettings().rtc.enable && (GetProcessingSettings().rtc.optConstexpr || GetProcessingSettings().rtc.optSpecialCode) && !GetProcessingSettings().rtctech.allowOptimizedSlaveReconstruction) { + GPUError("Not allowed to create optimized RTC code with more than one GPUReconstruction instances"); + return 1; + } + mDeviceMemorySize = mHostMemorySize = 0; for (uint32_t i = 0; i < mChains.size(); i++) { if (mChains[i]->EarlyConfigure()) { diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 9400a429fca81..b9be1db881816 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -228,6 +228,7 @@ AddOption(runTest, int32_t, 0, "", 0, "Do not run the actual benchmark, but just AddOption(cacheMutex, bool, true, "", 0, "Use a file lock to serialize access to the cache folder") AddOption(ignoreCacheValid, bool, false, "", 0, "If set, allows to use RTC cached code files even if they are not valid for the current source code / parameters") AddOption(printLaunchBounds, bool, false, "", 0, "Print launch bounds used for RTC code as debugging option") +AddOption(allowOptimizedSlaveReconstruction, bool, false, "", 0, "Allow RTC with slave GPUReconstruction instances with optConstexpr and optSpecialcode") AddOption(cacheFolder, std::string, "./rtccache/", "", 0, "Folder in which the cache file is stored") AddOption(prependCommand, std::string, "", "", 0, "Prepend RTC compilation commands by this string") AddOption(overrideArchitecture, std::string, "", "", 0, "Override arhcitecture part of RTC compilation command line") // Part of cmdLine, so checked against the cache From dc60e3535f333042ef6dcb665cf4c3cfb5d95cfa Mon Sep 17 00:00:00 2001 From: shahoian Date: Fri, 16 May 2025 13:22:19 +0200 Subject: [PATCH 0038/1426] By default process K0s only in the sync. svertexer To rever to the full mode use define SECVTXK0ONLY=0 --- .../include/DetectorsVertexing/SVertexHypothesis.h | 4 ++-- prodtests/full-system-test/dpl-workflow.sh | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/Detectors/Vertexing/include/DetectorsVertexing/SVertexHypothesis.h b/Detectors/Vertexing/include/DetectorsVertexing/SVertexHypothesis.h index 1450e0c15e98c..c3fd74aa7eeff 100644 --- a/Detectors/Vertexing/include/DetectorsVertexing/SVertexHypothesis.h +++ b/Detectors/Vertexing/include/DetectorsVertexing/SVertexHypothesis.h @@ -60,7 +60,7 @@ class SVertexHypothesis bool check(float p2Pos, float p2Neg, float p2V0, float ptV0) const { // check if given mass and pt is matching to hypothesis - return check(calcMass(p2Pos, p2Neg, p2V0), ptV0); + return mPars[SigmaM] > 0 && check(calcMass(p2Pos, p2Neg, p2V0), ptV0); } bool check(float mass, float pt) const { // check if given mass and pt is matching to hypothesis @@ -151,7 +151,7 @@ class SVertex3Hypothesis bool check(float p2Pos, float p2Neg, float p2Bach, float p2Tot, float ptV0) const { // check if given mass and pt is matching to hypothesis - return check(calcMass(p2Pos, p2Neg, p2Bach, p2Tot), ptV0); + return mPars[SigmaM] > 0 && check(calcMass(p2Pos, p2Neg, p2Bach, p2Tot), ptV0); } bool check(float mass, float pt) const diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index b51594115154c..4e6cbbebe7db7 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -94,6 +94,7 @@ TPC_CORR_OPT= TPC_CORR_KEY= INTERACTION_TAG_CONFIG_KEY= EVE_OPT=" --jsons-folder $EDJSONS_DIR" +: ${SECVTXK0ONLY:=} : ${EVE_CONFIG:=} : ${STRTRACKING:=} : ${ITSEXTRAERR:=} @@ -591,7 +592,14 @@ has_detector_reco ITS && has_detector_gpu ITS TPC && [[ -z "$DISABLE_ROOT_OUTPUT has_detector_matching PRIMVTX && [[ ! -z "$VERTEXING_SOURCES" ]] && [[ $GLOBAL_READER_NEEDS_PV != 1 ]] && add_W o2-primary-vertexing-workflow "$DISABLE_MC $DISABLE_ROOT_INPUT $DISABLE_ROOT_OUTPUT $PVERTEX_CONFIG --pipeline $(get_N primary-vertexing MATCH REST 1 PRIMVTX),$(get_N pvertex-track-matching MATCH REST 1 PRIMVTXMATCH)" "${PVERTEXING_CONFIG_KEY};${INTERACTION_TAG_CONFIG_KEY};" if [[ $BEAMTYPE != "cosmic" ]] && has_detectors_reco ITS && has_detector_matching SECVTX && [[ ! -z "$SVERTEXING_SOURCES" ]]; then - [[ $GLOBAL_READER_NEEDS_SV != 1 ]] && add_W o2-secondary-vertexing-workflow "$DISABLE_MC $STRTRACKING $DISABLE_ROOT_INPUT $DISABLE_ROOT_OUTPUT $TPC_CORR_OPT --vertexing-sources $SVERTEXING_SOURCES --threads $SVERTEX_THREADS --pipeline $(get_N secondary-vertexing MATCH REST $SVERTEX_THREADS SECVTX)" "$TPC_CORR_KEY" + : ${REDUCESV_OPT:=} + : ${REDUCESV_CONF:=} + if [[ $SYNCMODE == 1 ]] && [[ $SECVTXK0ONLY != 0 ]] ; then + : ${STRTRACKING:=" --disable-strangeness-tracker "} + : ${REDUCESV_OPT:=" --disable-cascade-finder --disable-3body-finder "} + : ${REDUCESV_CONF:="svertexer.pidCutsPhoton[0]=-1;svertexer.pidCutsLambda[0]=-1;svertexer.pidCutsHTriton[0]=-1;svertexer.pidCutsHhydrog4[0]=-1;"} + fi + [[ $GLOBAL_READER_NEEDS_SV != 1 ]] && add_W o2-secondary-vertexing-workflow "$DISABLE_MC $STRTRACKING $REDUCESV_OPT $DISABLE_ROOT_INPUT $DISABLE_ROOT_OUTPUT $TPC_CORR_OPT --vertexing-sources $SVERTEXING_SOURCES --threads $SVERTEX_THREADS --pipeline $(get_N secondary-vertexing MATCH REST $SVERTEX_THREADS SECVTX)" "$TPC_CORR_KEY;$REDUCESV_CONF" SECTVTX_ON="1" else SECTVTX_ON="0" From 966097b751dced62af09972912f8711dac66b7e4 Mon Sep 17 00:00:00 2001 From: shahoian Date: Fri, 16 May 2025 17:37:22 +0200 Subject: [PATCH 0039/1426] Possibility to define AlignParam as Local Delta The constructors got extra argument convertLocalToGlobal (by default true) which tells that if the provided parameters correspond to the local delta (i.e. argument global == false), then it should be converted to global delta. Otherwise, the delta being local or global is fully defined by the value of the global argument. The AlignParam::applyToGeometry() accounts for the type of persistent type of delta and avoids extra global -> local conversion if mIsGlobalDelta is false. --- .../DetectorsCommonDataFormats/AlignParam.h | 17 +++++-- .../Detectors/Common/src/AlignParam.cxx | 46 +++++++++++-------- 2 files changed, 40 insertions(+), 23 deletions(-) diff --git a/DataFormats/Detectors/Common/include/DetectorsCommonDataFormats/AlignParam.h b/DataFormats/Detectors/Common/include/DetectorsCommonDataFormats/AlignParam.h index c4e702c6ae27e..a93a37c1e36ab 100644 --- a/DataFormats/Detectors/Common/include/DetectorsCommonDataFormats/AlignParam.h +++ b/DataFormats/Detectors/Common/include/DetectorsCommonDataFormats/AlignParam.h @@ -37,9 +37,12 @@ class AlignParam AlignParam(const char* symname, int algID, // volume symbolic name and its alignable ID double x, double y, double z, // delta translation double psi, double theta, double phi, // delta rotation - bool global = true); // global (preferable) or local delta definition + bool global = true, // global (preferable) or local delta definition + bool convertLocalToGlobal = true); // if local is provided, convert it to global - AlignParam(const char* symname, int algID, TGeoMatrix& m, bool global = true); + AlignParam(const char* symname, int algID, TGeoMatrix& m, + bool global = true, // global (preferable) or local delta definition + bool convertLocalToGlobal = true); // if local is provided, convert it to global /// return symbolic name of the volume const std::string& getSymName() const { return mSymName; } @@ -70,6 +73,9 @@ class AlignParam void setAlignableID(int id) { mAlignableID = id; } /// ================ methods for direct setting of delta params + /// set parameters + void setParams(double x, double y, double z, double psi, double theta, double phi); + /// set parameters of global delta void setGlobalParams(double x, double y, double z, double psi, double theta, double phi); @@ -114,6 +120,9 @@ class AlignParam int rectify(double zero = 1e-13); + bool isGlobal() const { return mIsGlobal; } + void setIsGlobal(bool v) { mIsGlobal = v; } + protected: bool matrixToAngles(const double* rot, double& psi, double& theta, double& phi) const; void anglesToMatrix(double psi, double theta, double phi, double* rot) const; @@ -123,8 +132,8 @@ class AlignParam private: std::string mSymName{}; + bool mIsGlobal = true; /// is this global delta? int mAlignableID = -1; /// alignable ID (set for sensors only) - double mX = 0.; ///< X translation of global delta double mY = 0.; ///< Y translation of global delta double mZ = 0.; ///< Z translation of global delta @@ -133,7 +142,7 @@ class AlignParam double mTheta = 0.; ///< "roll" : Euler angle of rotation around Y axis after 1st rotation (radians) double mPhi = 0.; ///< "yaw" : Euler angle of rotation around Z axis (radians) - ClassDefNV(AlignParam, 1); + ClassDefNV(AlignParam, 2); }; } // namespace detectors diff --git a/DataFormats/Detectors/Common/src/AlignParam.cxx b/DataFormats/Detectors/Common/src/AlignParam.cxx index 90f2a349607a1..f20cf3dac4971 100644 --- a/DataFormats/Detectors/Common/src/AlignParam.cxx +++ b/DataFormats/Detectors/Common/src/AlignParam.cxx @@ -26,8 +26,9 @@ using namespace o2::detectors; AlignParam::AlignParam(const char* symname, int algID, // volume symbolic name and its alignable ID double x, double y, double z, // delta translation double psi, double theta, double phi, // delta rotation - bool global) // global (preferable) or local delta definition - : mSymName(symname), mAlignableID(algID) + bool global, // global (preferable) or local delta definition + bool convertLocalToGlobal) // if local is provided, convert it to global + : mSymName(symname), mIsGlobal(global || convertLocalToGlobal), mAlignableID(algID) { /// standard constructor with 3 translation + 3 rotation parameters /// If the user explicitly sets the global variable to false then the @@ -35,23 +36,22 @@ AlignParam::AlignParam(const char* symname, int algID, // volume symbolic /// This requires to have a gGeoMenager active instance, otherwise the /// constructor will fail (no object created) - if (global) { - setGlobalParams(x, y, z, psi, theta, phi); - } else { + setParams(x, y, z, psi, theta, phi); + if (!global && convertLocalToGlobal) { setLocalParams(x, y, z, psi, theta, phi); } } //___________________________________________________ -AlignParam::AlignParam(const char* symname, int algID, TGeoMatrix& m, bool global) - : mSymName(symname), mAlignableID(algID) +AlignParam::AlignParam(const char* symname, int algID, TGeoMatrix& m, bool global, bool convertLocalToGlobal) + : mSymName(symname), mIsGlobal(global || convertLocalToGlobal), mAlignableID(algID) { setTranslation(m); if (!setRotation(m)) { const double* rot = m.GetRotationMatrix(); throw std::runtime_error(fmt::format("Failed to extract roll-pitch-yall angles from [[{},{},{}], [{},{},{}], [{},{},{}] for {}", rot[0], rot[1], rot[2], rot[3], rot[4], rot[5], rot[6], rot[7], rot[8], symname)); } - if (!global && !setLocalParams(mX, mY, mZ, mPsi, mTheta, mPhi)) { + if (!global && convertLocalToGlobal && !setLocalParams(mX, mY, mZ, mPsi, mTheta, mPhi)) { throw std::runtime_error(fmt::format("Alignment creation for {} failed: geomManager is absent", symname)); } } @@ -223,6 +223,10 @@ bool AlignParam::createLocalMatrix(TGeoHMatrix& m) const // In case that the TGeo was not initialized or not closed, // returns false and the object parameters are not set. // + m = createMatrix(); + if (!mIsGlobal) { + return true; + } if (!gGeoManager || !gGeoManager->IsClosed()) { LOG(error) << "Can't get the local alignment object parameters! gGeoManager doesn't exist or it is still open!"; return false; @@ -247,7 +251,6 @@ bool AlignParam::createLocalMatrix(TGeoHMatrix& m) const LOG(error) << "Volume name or path " << symname << " is not valid!"; return false; } - m = createMatrix(); TGeoHMatrix gprime, gprimeinv; gprime = *node->GetMatrix(); gprimeinv = gprime.Inverse(); @@ -302,18 +305,15 @@ bool AlignParam::applyToGeometry() const } // double threshold = 0.001; - - TGeoHMatrix gprime = *node->GetMatrix(); - TGeoHMatrix align = createMatrix(); - gprime.MultiplyLeft(&align); - TGeoHMatrix* ginv = new TGeoHMatrix; // TGeoPhysicalNode takes and manages raw pointer, need naked new! - TGeoHMatrix* g = node->GetMatrix(node->GetLevel() - 1); - *ginv = g->Inverse(); - *ginv *= gprime; - + TGeoHMatrix* align = new TGeoHMatrix(createMatrix()); + if (mIsGlobal) { + align->Multiply(node->GetMatrix()); + TGeoHMatrix* g = node->GetMatrix(node->GetLevel() - 1); + align->MultiplyLeft(node->GetMatrix(node->GetLevel() - 1)->Inverse()); + } LOG(debug) << "Aligning volume " << symname; - node->Align(ginv); + node->Align(align); return true; } @@ -359,6 +359,14 @@ void AlignParam::setGlobalParams(double x, double y, double z, double psi, doubl setRotation(psi, theta, phi); } +//_____________________________________________________________________________ +void AlignParam::setParams(double x, double y, double z, double psi, double theta, double phi) +{ + /// set parameters of global delta + setTranslation(x, y, z); + setRotation(psi, theta, phi); +} + //_____________________________________________________________________________ void AlignParam::setRotation(double psi, double theta, double phi) { From a6af3174a7011e8a0620b71a14fee134275f3815 Mon Sep 17 00:00:00 2001 From: swenzel Date: Fri, 16 May 2025 22:13:05 +0200 Subject: [PATCH 0040/1426] TPC init: load more params from CCDB for GEMAmpl --- Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx b/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx index a04f73a62fbf8..b111d8f372967 100644 --- a/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx +++ b/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx @@ -309,11 +309,16 @@ void initTPC(long timestamp) auto& cdb = o2::tpc::CDBInterface::instance(); cdb.setUseDefaults(); - // IMPORTANT: load ParameterGEM from CCDB + // IMPORTANT: load ParameterGEM, ParameterGas and CalPadGainFull from CCDB to correctly init GEMAmplification auto& ccdbManager = o2::ccdb::BasicCCDBManager::instance(); ccdbManager.getSpecific(o2::tpc::CDBTypeMap.at(o2::tpc::CDBType::ParGEM), timestamp); - LOGP(info, "initTPC: TPC GEM param updated for time {}", timestamp); + LOGP(info, "initTPC: TPC GEM param, Gas param + CalPadGainFull updated for time {}", timestamp); + ccdbManager.getSpecific(o2::tpc::CDBTypeMap.at(o2::tpc::CDBType::CalPadGainFull), timestamp); + ccdbManager.getSpecific(o2::tpc::CDBTypeMap.at(o2::tpc::CDBType::ParGas), timestamp); + o2::tpc::ParameterGEM::Instance().printKeyValues(true, true); + o2::tpc::ParameterGas::Instance().printKeyValues(true, true); + // by invoking this constructor we make sure that a common file will be created // in future we should take this from OCDB and just forward per message const static auto& ampl = o2::tpc::GEMAmplification::instance(); From 88baea9a23ba9f20083204b9ae46a63eaa19038b Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 19 May 2025 11:06:23 +0200 Subject: [PATCH 0041/1426] DPL Analysis: `as(cfg)` for adjusting the types of configurables in expressions (#14276) --- .../Core/include/Framework/Expressions.h | 22 +++++++++++++------ Framework/Core/test/test_Expressions.cxx | 9 ++++++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/Framework/Core/include/Framework/Expressions.h b/Framework/Core/include/Framework/Expressions.h index 9e00388ee5df8..18c930700a91d 100644 --- a/Framework/Core/include/Framework/Expressions.h +++ b/Framework/Core/include/Framework/Expressions.h @@ -12,13 +12,11 @@ #define O2_FRAMEWORK_EXPRESSIONS_H_ #include "Framework/BasicOps.h" -#include "Framework/CompilerBuiltins.h" #include "Framework/Pack.h" #include "Framework/Configurable.h" #include "Framework/Variant.h" #include "Framework/InitContext.h" #include "Framework/ConfigParamRegistry.h" -#include "Framework/RuntimeError.h" #include #include #include @@ -143,13 +141,17 @@ struct OpNode { /// A placeholder node for simple type configurable struct PlaceholderNode : LiteralNode { template + requires(variant_trait_v::type> != VariantType::Unknown) PlaceholderNode(Configurable const& v) : LiteralNode{v.value}, name{v.name} { - if constexpr (variant_trait_v::type> != VariantType::Unknown) { - retrieve = [](InitContext& context, char const* name) { return LiteralNode::var_t{context.options().get(name)}; }; - } else { - unknownParameterUsed(name.c_str()); - } + retrieve = [](InitContext& context, char const* name) { return LiteralNode::var_t{context.options().get(name)}; }; + } + + template + requires((std::convertible_to) && (variant_trait_v::type> != VariantType::Unknown)) + PlaceholderNode(Configurable const& v, AT*) : LiteralNode{static_cast(v.value)}, name{v.name} + { + retrieve = [](InitContext& context, char const* name) { return LiteralNode::var_t{static_cast(context.options().get(name))}; }; } PlaceholderNode(PlaceholderNode const& other) = default; @@ -163,6 +165,12 @@ struct PlaceholderNode : LiteralNode { LiteralNode::var_t (*retrieve)(InitContext&, char const*); }; +template +PlaceholderNode as(Configurable const& v) +{ + return PlaceholderNode(v, (AT*)nullptr); +} + /// A placeholder node for parameters taken from an array struct ParameterNode : LiteralNode { ParameterNode(int index_ = -1) diff --git a/Framework/Core/test/test_Expressions.cxx b/Framework/Core/test/test_Expressions.cxx index 2296b5dcbfbc4..6faa2fc352232 100644 --- a/Framework/Core/test/test_Expressions.cxx +++ b/Framework/Core/test/test_Expressions.cxx @@ -146,6 +146,15 @@ TEST_CASE("TestTreeParsing") REQUIRE(ptfilterspecs2[0].left == (DatumSpec{std::string{"fPt"}, typeid(o2::aod::track::Pt).hash_code(), atype::FLOAT})); REQUIRE(ptfilterspecs2[0].right == (DatumSpec{LiteralNode::var_t{1.0f}, atype::FLOAT})); REQUIRE(ptfilterspecs2[0].result == (DatumSpec{0u, atype::BOOL})); + + Configurable cvalue{"cvalue", 1, "test value"}; + Filter testFilter = o2::aod::track::tpcNClsShared < as(cvalue); + REQUIRE(testFilter.node->self.index() == 2); + REQUIRE(testFilter.node->left->self.index() == 1); + REQUIRE(testFilter.node->right->self.index() == 3); + REQUIRE(std::get(testFilter.node->right->self).name == "cvalue"); + auto testSpecs = createOperations(testFilter); + REQUIRE(testSpecs[0].right == (DatumSpec{LiteralNode::var_t{(uint8_t)1}, atype::UINT8})); } TEST_CASE("TestGandivaTreeCreation") From 4d20c8d9087a28bbd16984c36abef7b58b1f2802 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 19 May 2025 11:06:56 +0200 Subject: [PATCH 0042/1426] DPL Analysis: improve/fix join error by not relying on metadata (#14249) --- Framework/Core/include/Framework/ASoA.h | 10 ++++++---- .../Core/include/Framework/AnalysisManagers.h | 8 ++++---- Framework/Core/include/Framework/AnalysisTask.h | 4 ++-- Framework/Core/include/Framework/TableBuilder.h | 7 ++++--- Framework/Core/src/ASoA.cxx | 7 ++----- Framework/Core/test/test_ASoA.cxx | 15 +++++++++++++++ 6 files changed, 33 insertions(+), 18 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index e098cd89f6d5d..c13cfe912c814 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -1245,7 +1245,7 @@ struct TableIterator : IP, C... { }; struct ArrowHelpers { - static std::shared_ptr joinTables(std::vector>&& tables); + static std::shared_ptr joinTables(std::vector>&& tables, std::span labels); static std::shared_ptr concatTables(std::vector>&& tables); }; @@ -1683,6 +1683,7 @@ class Table using table_t = self_t; static constexpr const auto originals = computeOriginals(); + static constexpr const auto originalLabels = [] refs, size_t... Is>(std::index_sequence) { return std::array{o2::aod::label()...}; }.template operator()(std::make_index_sequence()); template bindings> requires(ref.origin_hash == "CONC"_h) @@ -1931,7 +1932,7 @@ class Table Table(std::vector>&& tables, uint64_t offset = 0) requires(ref.origin_hash != "CONC"_h) - : Table(ArrowHelpers::joinTables(std::move(tables)), offset) + : Table(ArrowHelpers::joinTables(std::move(tables), std::span{originalLabels}), offset) { } @@ -3213,7 +3214,7 @@ struct JoinFull : Table, D, o2::aod::Hash<"JOIN"_h>, Ts. bindInternalIndicesTo(this); } JoinFull(std::vector>&& tables, uint64_t offset = 0) - : base{ArrowHelpers::joinTables(std::move(tables)), offset} + : base{ArrowHelpers::joinTables(std::move(tables), std::span{base::originalLabels}), offset} { bindInternalIndicesTo(this); } @@ -3223,6 +3224,7 @@ struct JoinFull : Table, D, o2::aod::Hash<"JOIN"_h>, Ts. using self_t = JoinFull; using table_t = base; static constexpr const auto originals = base::originals; + static constexpr const auto originalLabels = base::originalLabels; using columns_t = typename table_t::columns_t; using persistent_columns_t = typename table_t::persistent_columns_t; using iterator = table_t::template iterator_template; @@ -3293,7 +3295,7 @@ using Join = JoinFull, Ts...>; template constexpr auto join(Ts const&... t) { - return Join(ArrowHelpers::joinTables({t.asArrowTable()...})); + return Join(ArrowHelpers::joinTables({t.asArrowTable()...}, std::span{Join::base::originalLabels})); } template diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index 330eaf01f0be4..b76d88ea5ee66 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -282,14 +282,14 @@ template bool prepareOutput(ProcessingContext& context, T& spawns) { using metadata = o2::aod::MetadataTrait>::metadata; - auto originalTable = soa::ArrowHelpers::joinTables(extractOriginals(context)); + auto originalTable = soa::ArrowHelpers::joinTables(extractOriginals(context), std::span{metadata::base_table_t::originalLabels}); if (originalTable->schema()->fields().empty() == true) { using base_table_t = typename T::base_table_t::table_t; originalTable = makeEmptyTable(o2::aod::label()); } spawns.extension = std::make_shared(o2::framework::spawner>(originalTable, o2::aod::label(), spawns.projector)); - spawns.table = std::make_shared(soa::ArrowHelpers::joinTables({spawns.extension->asArrowTable(), originalTable})); + spawns.table = std::make_shared(soa::ArrowHelpers::joinTables({spawns.extension->asArrowTable(), originalTable}, std::span{T::spawnable_t::table_t::originalLabels})); return true; } @@ -304,14 +304,14 @@ template bool prepareOutput(ProcessingContext& context, T& defines) { using metadata = o2::aod::MetadataTrait>::metadata; - auto originalTable = soa::ArrowHelpers::joinTables(extractOriginals(context)); + auto originalTable = soa::ArrowHelpers::joinTables(extractOriginals(context), std::span{metadata::base_table_t::originalLabels}); if (originalTable->schema()->fields().empty() == true) { using base_table_t = typename T::base_table_t::table_t; originalTable = makeEmptyTable(o2::aod::label()); } defines.extension = std::make_shared(o2::framework::spawner>(originalTable, o2::aod::label(), defines.projectors.data(), defines.projector)); - defines.table = std::make_shared(soa::ArrowHelpers::joinTables({defines.extension->asArrowTable(), originalTable})); + defines.table = std::make_shared(soa::ArrowHelpers::joinTables({defines.extension->asArrowTable(), originalTable}, std::span{T::spawnable_t::table_t::originalLabels})); return true; } diff --git a/Framework/Core/include/Framework/AnalysisTask.h b/Framework/Core/include/Framework/AnalysisTask.h index c7f3da1948c62..257a5358463c6 100644 --- a/Framework/Core/include/Framework/AnalysisTask.h +++ b/Framework/Core/include/Framework/AnalysisTask.h @@ -201,9 +201,9 @@ struct AnalysisDataProcessorBuilder { std::shared_ptr table = nullptr; auto joiner = [&record] refs, size_t... Is>(std::index_sequence) { return std::vector{extractTableFromRecord(record)...}; }; if constexpr (soa::is_iterator) { - table = o2::soa::ArrowHelpers::joinTables(joiner.template operator()(std::make_index_sequence())); + table = o2::soa::ArrowHelpers::joinTables(joiner.template operator()(std::make_index_sequence()), std::span{T::parent_t::originalLabels}); } else { - table = o2::soa::ArrowHelpers::joinTables(joiner.template operator()(std::make_index_sequence())); + table = o2::soa::ArrowHelpers::joinTables(joiner.template operator()(std::make_index_sequence()), std::span{T::originalLabels}); } expressions::updateFilterInfo(info, table); if constexpr (!o2::soa::is_smallgroups>) { diff --git a/Framework/Core/include/Framework/TableBuilder.h b/Framework/Core/include/Framework/TableBuilder.h index 0b35d5be083e4..e2d12789ef922 100644 --- a/Framework/Core/include/Framework/TableBuilder.h +++ b/Framework/Core/include/Framework/TableBuilder.h @@ -771,7 +771,7 @@ template auto spawner(std::vector>&& tables, const char* name, o2::framework::expressions::Projector* projectors, std::shared_ptr& projector) { using placeholders_pack_t = typename o2::aod::MetadataTrait::metadata::placeholders_pack_t; - auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables)); + auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); if (fullTable->num_rows() == 0) { return makeEmptyTable(name, placeholders_pack_t{}); } @@ -798,7 +798,7 @@ template auto spawner(std::vector>&& tables, const char* name, std::shared_ptr& projector) { using expression_pack_t = typename o2::aod::MetadataTrait::metadata::expression_pack_t; - auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables)); + auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); if (fullTable->num_rows() == 0) { return makeEmptyTable(name, expression_pack_t{}); } @@ -834,7 +834,8 @@ auto spawner(std::shared_ptr const& fullTable, const char* name, s template auto spawner(framework::pack columns, std::vector>&& tables, const char* name, std::shared_ptr& projector) { - auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables)); + std::array labels{"original"}; + auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{labels}); if (fullTable->num_rows() == 0) { return makeEmptyTable(name, framework::pack{}); } diff --git a/Framework/Core/src/ASoA.cxx b/Framework/Core/src/ASoA.cxx index 810398747de88..bd6ca551d24ec 100644 --- a/Framework/Core/src/ASoA.cxx +++ b/Framework/Core/src/ASoA.cxx @@ -64,7 +64,7 @@ SelectionVector sliceSelection(gsl::span const& mSelectedRows, in return slicedSelection; } -std::shared_ptr ArrowHelpers::joinTables(std::vector>&& tables) +std::shared_ptr ArrowHelpers::joinTables(std::vector>&& tables, std::span labels) { if (tables.size() == 1) { return tables[0]; @@ -72,10 +72,7 @@ std::shared_ptr ArrowHelpers::joinTables(std::vectornum_rows() != tables[i + 1]->num_rows()) { throw o2::framework::runtime_error_f("Tables %s and %s have different sizes (%d vs %d) and cannot be joined!", - tables[i]->schema()->metadata()->Get("label").ValueOrDie().c_str(), - tables[i + 1]->schema()->metadata()->Get("label").ValueOrDie().c_str(), - tables[i]->num_rows(), - tables[i + 1]->num_rows()); + labels[i], labels[i + 1], tables[i]->num_rows(), tables[i + 1]->num_rows()); } } std::vector> fields; diff --git a/Framework/Core/test/test_ASoA.cxx b/Framework/Core/test/test_ASoA.cxx index 6d9ee16fec84d..80519aebc9ee7 100644 --- a/Framework/Core/test/test_ASoA.cxx +++ b/Framework/Core/test/test_ASoA.cxx @@ -31,6 +31,7 @@ namespace test DECLARE_SOA_COLUMN(X, x, int); DECLARE_SOA_COLUMN(Y, y, int); DECLARE_SOA_COLUMN(Z, z, int); +DECLARE_SOA_COLUMN(W, w, int); DECLARE_SOA_DYNAMIC_COLUMN(Sum, sum, [](int x, int y) { return x + y; }); DECLARE_SOA_EXPRESSION_COLUMN(ESum, esum, int, test::x + test::y); } // namespace test @@ -268,9 +269,17 @@ TEST_CASE("TestJoinedTables") rowWriterZ(0, 8); auto tableZ = builderZ.finalize(); + TableBuilder builderW; + auto rowWriterW = builderW.persist({"fW"}); + rowWriterW(0, 8); + rowWriterW(0, 8); + rowWriterW(0, 8); + auto tableW = builderW.finalize(); + using TestX = InPlaceTable<"A0"_h, o2::aod::test::X>; using TestY = InPlaceTable<"A1"_h, o2::aod::test::Y>; using TestZ = InPlaceTable<"A2"_h, o2::aod::test::Z>; + using TestW = InPlaceTable<"A3"_h, o2::aod::test::W>; using Test = Join; REQUIRE(Test::contains()); @@ -303,6 +312,12 @@ TEST_CASE("TestJoinedTables") for (auto& test : tests4) { REQUIRE(15 == test.x() + test.y() + test.z()); } + + try { + auto testF = join(TestZ{tableZ}, TestW{tableW}); + } catch (RuntimeErrorRef ref) { + REQUIRE(std::string{error_from_ref(ref).what} == "Tables TEST and TEST have different sizes (8 vs 3) and cannot be joined!"); + } } TEST_CASE("TestConcatTables") From 35e208b13fb4c30c38ff4b3d05535fb370b53c0a Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 19 May 2025 11:07:42 +0200 Subject: [PATCH 0043/1426] DPL Analysis: prevent slice cache from updating unnecessarily (#14257) * Cache setup now only happens after init when process configurables' values are final * Add inline contrained functions to avoid using "overloaded" * add error messages for unexpected situations --- Framework/Core/include/Framework/ASoA.h | 12 +-- .../Core/include/Framework/AnalysisManagers.h | 14 ++- .../Core/include/Framework/AnalysisTask.h | 102 ++++++++++++------ .../Framework/ArrowTableSlicingCache.h | 45 +++++--- .../Core/include/Framework/GroupSlicer.h | 2 +- Framework/Core/src/ASoA.cxx | 2 +- Framework/Core/src/ArrowSupport.cxx | 19 ++-- Framework/Core/src/ArrowTableSlicingCache.cxx | 60 +++++++---- Framework/Core/test/test_GroupSlicer.cxx | 4 +- 9 files changed, 167 insertions(+), 93 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index c13cfe912c814..4ed8e830fe137 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -1400,10 +1400,10 @@ namespace o2::framework struct PreslicePolicyBase { const std::string binding; - StringPair bindingKey; + Entry bindingKey; bool isMissing() const; - StringPair const& getBindingKey() const; + Entry const& getBindingKey() const; }; struct PreslicePolicySorted : public PreslicePolicyBase { @@ -1428,7 +1428,7 @@ struct PresliceBase : public Policy { const std::string binding; PresliceBase(expressions::BindingNode index_) - : Policy{PreslicePolicyBase{{o2::soa::getLabelFromTypeForKey(std::string{index_.name})}, std::make_pair(o2::soa::getLabelFromTypeForKey(std::string{index_.name}), std::string{index_.name})}, {}} + : Policy{PreslicePolicyBase{{o2::soa::getLabelFromTypeForKey(std::string{index_.name})}, Entry(o2::soa::getLabelFromTypeForKey(std::string{index_.name}), std::string{index_.name})}, {}} { } @@ -1508,7 +1508,7 @@ auto doSliceBy(T const* table, o2::framework::PresliceBase const { if constexpr (OPT) { if (container.isMissing()) { - missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.second.c_str()); + missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.key.c_str()); } } uint64_t offset = 0; @@ -1545,7 +1545,7 @@ auto doSliceBy(T const* table, o2::framework::PresliceBase const { if constexpr (OPT) { if (container.isMissing()) { - missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.second.c_str()); + missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.key.c_str()); } } auto selection = container.getSliceFor(value); @@ -1574,7 +1574,7 @@ auto doFilteredSliceBy(T const* table, o2::framework::PresliceBase().data(), container.bindingKey.second.c_str()); + missingOptionalPreslice(getLabelFromType().data(), container.bindingKey.key.c_str()); } } uint64_t offset = 0; diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index b76d88ea5ee66..1d894b2b67948 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -534,39 +534,43 @@ static void setGroupedCombination(C& comb, TG& grouping, std::tuple& asso /// Preslice handling template requires(!is_preslice) -bool registerCache(T&, std::vector&, std::vector&) +bool registerCache(T&, Cache&, Cache&) { return false; } template requires std::same_as -bool registerCache(T& preslice, std::vector& bsks, std::vector&) +bool registerCache(T& preslice, Cache& bsks, Cache&) { if constexpr (T::optional) { if (preslice.binding == "[MISSING]") { return true; } } - auto locate = std::find_if(bsks.begin(), bsks.end(), [&](auto const& entry) { return (entry.first == preslice.bindingKey.first) && (entry.second == preslice.bindingKey.second); }); + auto locate = std::find_if(bsks.begin(), bsks.end(), [&](auto const& entry) { return (entry.binding == preslice.bindingKey.binding) && (entry.key == preslice.bindingKey.key); }); if (locate == bsks.end()) { bsks.emplace_back(preslice.getBindingKey()); + } else if (locate->enabled == false) { + locate->enabled = true; } return true; } template requires std::same_as -bool registerCache(T& preslice, std::vector&, std::vector& bsksU) +bool registerCache(T& preslice, Cache&, Cache& bsksU) { if constexpr (T::optional) { if (preslice.binding == "[MISSING]") { return true; } } - auto locate = std::find_if(bsksU.begin(), bsksU.end(), [&](auto const& entry) { return (entry.first == preslice.bindingKey.first) && (entry.second == preslice.bindingKey.second); }); + auto locate = std::find_if(bsksU.begin(), bsksU.end(), [&](auto const& entry) { return (entry.binding == preslice.bindingKey.binding) && (entry.key == preslice.bindingKey.key); }); if (locate == bsksU.end()) { bsksU.emplace_back(preslice.getBindingKey()); + } else if (locate->enabled == false) { + locate->enabled = true; } return true; } diff --git a/Framework/Core/include/Framework/AnalysisTask.h b/Framework/Core/include/Framework/AnalysisTask.h index 257a5358463c6..b53929f326712 100644 --- a/Framework/Core/include/Framework/AnalysisTask.h +++ b/Framework/Core/include/Framework/AnalysisTask.h @@ -65,21 +65,18 @@ concept is_enumeration = is_enumeration_v>; // the contents of an AnalysisTask... namespace { struct AnalysisDataProcessorBuilder { - template - static void addGroupingCandidates(std::vector& bk, std::vector& bku) + template + static void addGroupingCandidates(Cache& bk, Cache& bku, bool enabled) { - [&bk, &bku](framework::pack) mutable { - std::string key; - if constexpr (soa::is_iterator>) { - key = std::string{"fIndex"} + o2::framework::cutString(soa::getLabelFromType>()); - } - ([&bk, &bku, &key]() mutable { + [&bk, &bku, enabled](framework::pack) mutable { + auto key = std::string{"fIndex"} + o2::framework::cutString(soa::getLabelFromType>()); + ([&bk, &bku, &key, enabled]() mutable { if constexpr (soa::relatedByIndex, std::decay_t>()) { auto binding = soa::getLabelFromTypeForKey>(key); if constexpr (o2::soa::is_smallgroups>) { - framework::updatePairList(bku, binding, key); + framework::updatePairList(bku, binding, key, enabled); } else { - framework::updatePairList(bk, binding, key); + framework::updatePairList(bk, binding, key, enabled); } } }(), @@ -145,34 +142,72 @@ struct AnalysisDataProcessorBuilder { } /// helper to parse the process arguments + template + inline static bool requestInputsFromArgs(T&, std::string const&, std::vector&, std::vector&) + { + return false; + } + template + inline static bool requestInputsFromArgs(T& pc, std::string const& name, std::vector& inputs, std::vector& eis) + { + AnalysisDataProcessorBuilder::inputsFromArgs(pc.process, (name + "/" + pc.name).c_str(), pc.value, inputs, eis); + return true; + } + template + inline static bool requestCacheFromArgs(T&, Cache&, Cache&) + { + return false; + } + template + inline static bool requestCacheFromArgs(T& pc, Cache& bk, Cache& bku) + { + AnalysisDataProcessorBuilder::cacheFromArgs(pc.process, pc.value, bk, bku); + return true; + } /// 1. enumeration (must be the only argument) template - static void inputsFromArgs(R (C::*)(A), const char* /*name*/, bool /*value*/, std::vector& inputs, std::vector&, std::vector&, std::vector&) + static void inputsFromArgs(R (C::*)(A), const char* /*name*/, bool /*value*/, std::vector& inputs, std::vector&) //, Cache&, Cache&) { std::vector inputMetadata; // FIXME: for the moment we do not support begin, end and step. DataSpecUtils::updateInputList(inputs, InputSpec{"enumeration", "DPL", "ENUM", 0, Lifetime::Enumeration, inputMetadata}); } - /// 2. grouping case - 1st argument is an iterator + /// 2. 1st argument is an iterator template - static void inputsFromArgs(R (C::*)(A, Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, std::vector& bk, std::vector& bku) + static void inputsFromArgs(R (C::*)(A, Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos) //, Cache& bk, Cache& bku) requires(std::is_lvalue_reference_v && (std::is_lvalue_reference_v && ...)) { - addGroupingCandidates(bk, bku); constexpr auto hash = o2::framework::TypeIdHelpers::uniqueId(); addInputsAndExpressions::parent_t, Args...>(hash, name, value, inputs, eInfos); } /// 3. generic case template - static void inputsFromArgs(R (C::*)(Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, std::vector&, std::vector&) + static void inputsFromArgs(R (C::*)(Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos) //, Cache&, Cache&) requires(std::is_lvalue_reference_v && ...) { constexpr auto hash = o2::framework::TypeIdHelpers::uniqueId(); addInputsAndExpressions(hash, name, value, inputs, eInfos); } + /// 1. enumeration (no grouping) + template + static void cacheFromArgs(R (C::*)(A), bool, Cache&, Cache&) + { + } + /// 2. iterator (the only grouping case) + template + static void cacheFromArgs(R (C::*)(A, Args...), bool value, Cache& bk, Cache& bku) + { + addGroupingCandidates(bk, bku, value); + } + /// 3. generic case (no grouping) + template + static void cacheFromArgs(R (C::*)(A, Args...), bool, Cache&, Cache&) + { + } + template static auto extractTableFromRecord(InputRecord& record) { @@ -480,8 +515,6 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) std::vector inputs; std::vector options; std::vector expressionInfos; - std::vector bindingsKeys; - std::vector bindingsKeysUnsorted; /// make sure options and configurables are set before expression infos are created homogeneous_apply_refs([&options, &hash](auto& element) { return analysis_task_parsers::appendOption(options, element); }, *task.get()); @@ -490,23 +523,15 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) /// parse process functions defined by corresponding configurables if constexpr (requires { &T::process; }) { - AnalysisDataProcessorBuilder::inputsFromArgs(&T::process, "default", true, inputs, expressionInfos, bindingsKeys, bindingsKeysUnsorted); + AnalysisDataProcessorBuilder::inputsFromArgs(&T::process, "default", true, inputs, expressionInfos); } homogeneous_apply_refs( - overloaded{ - [name = name_str, &expressionInfos, &inputs, &bindingsKeys, &bindingsKeysUnsorted](framework::is_process_configurable auto& x) mutable { - // this pushes (argumentIndex,processHash,schemaPtr,nullptr) into expressionInfos for arguments that are Filtered/filtered_iterators - AnalysisDataProcessorBuilder::inputsFromArgs(x.process, (name + "/" + x.name).c_str(), x.value, inputs, expressionInfos, bindingsKeys, bindingsKeysUnsorted); - return true; - }, - [](auto&) { - return false; - }}, + [name = name_str, &expressionInfos, &inputs](auto& x) mutable { + // this pushes (argumentIndex, processHash, schemaPtr, nullptr) into expressionInfos for arguments that are Filtered/filtered_iterators + return AnalysisDataProcessorBuilder::requestInputsFromArgs(x, name, inputs, expressionInfos); + }, *task.get()); - // add preslice declarations to slicing cache definition - homogeneous_apply_refs([&bindingsKeys, &bindingsKeysUnsorted](auto& element) { return analysis_task_parsers::registerCache(element, bindingsKeys, bindingsKeysUnsorted); }, *task.get()); - // request base tables for spawnable extended tables and indices to be built // this checks for duplications homogeneous_apply_refs([&inputs](auto& element) { @@ -526,7 +551,12 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) requiredServices.insert(requiredServices.end(), arrowServices.begin(), arrowServices.end()); homogeneous_apply_refs([&requiredServices](auto& element) { return analysis_task_parsers::addService(requiredServices, element); }, *task.get()); - auto algo = AlgorithmSpec::InitCallback{[task = task, expressionInfos, bindingsKeys, bindingsKeysUnsorted](InitContext& ic) mutable { + auto algo = AlgorithmSpec::InitCallback{[task = task, expressionInfos](InitContext& ic) mutable { + Cache bindingsKeys; + Cache bindingsKeysUnsorted; + // add preslice declarations to slicing cache definition + homogeneous_apply_refs([&bindingsKeys, &bindingsKeysUnsorted](auto& element) { return analysis_task_parsers::registerCache(element, bindingsKeys, bindingsKeysUnsorted); }, *task.get()); + homogeneous_apply_refs([&ic](auto&& element) { return analysis_task_parsers::prepareOption(ic, element); }, *task.get()); homogeneous_apply_refs([&ic](auto&& element) { return analysis_task_parsers::prepareService(ic, element); }, *task.get()); @@ -556,6 +586,16 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) task->init(ic); } + /// parse process functions to enable requested grouping caches - note that at this state process configurables have their final values + if constexpr (requires { &T::process; }) { + AnalysisDataProcessorBuilder::cacheFromArgs(&T::process, true, bindingsKeys, bindingsKeysUnsorted); + } + homogeneous_apply_refs( + [&bindingsKeys, &bindingsKeysUnsorted](auto& x) mutable { + return AnalysisDataProcessorBuilder::requestCacheFromArgs(x, bindingsKeys, bindingsKeysUnsorted); + }, + *task.get()); + ic.services().get().setCaches(std::move(bindingsKeys)); ic.services().get().setCachesUnsorted(std::move(bindingsKeysUnsorted)); // initialize global caches diff --git a/Framework/Core/include/Framework/ArrowTableSlicingCache.h b/Framework/Core/include/Framework/ArrowTableSlicingCache.h index 2edc23a63ce76..292a67023fc5e 100644 --- a/Framework/Core/include/Framework/ArrowTableSlicingCache.h +++ b/Framework/Core/include/Framework/ArrowTableSlicingCache.h @@ -34,51 +34,64 @@ struct SliceInfoUnsortedPtr { gsl::span getSliceFor(int value) const; }; -using StringPair = std::pair; +struct Entry { + std::string binding; + std::string key; + bool enabled; + + Entry(std::string b, std::string k, bool e = true) + : binding{b}, + key{k}, + enabled{e} + { + } +}; + +using Cache = std::vector; -void updatePairList(std::vector& list, std::string const& binding, std::string const& key); +void updatePairList(Cache& list, std::string const& binding, std::string const& key, bool enabled); struct ArrowTableSlicingCacheDef { constexpr static ServiceKind service_kind = ServiceKind::Global; - std::vector bindingsKeys; - std::vector bindingsKeysUnsorted; + Cache bindingsKeys; + Cache bindingsKeysUnsorted; - void setCaches(std::vector&& bsks); - void setCachesUnsorted(std::vector&& bsks); + void setCaches(Cache&& bsks); + void setCachesUnsorted(Cache&& bsks); }; struct ArrowTableSlicingCache { constexpr static ServiceKind service_kind = ServiceKind::Stream; - std::vector bindingsKeys; + Cache bindingsKeys; std::vector>> values; std::vector>> counts; - std::vector bindingsKeysUnsorted; + Cache bindingsKeysUnsorted; std::vector> valuesUnsorted; std::vector groups; - ArrowTableSlicingCache(std::vector&& bsks, std::vector&& bsksUnsorted = {}); + ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorted = {}); // set caching information externally - void setCaches(std::vector&& bsks, std::vector&& bsksUnsorted = {}); + void setCaches(Cache&& bsks, Cache&& bsksUnsorted = {}); // update slicing info cache entry (assumes it is already present) arrow::Status updateCacheEntry(int pos, std::shared_ptr const& table); arrow::Status updateCacheEntryUnsorted(int pos, std::shared_ptr const& table); // helper to locate cache position - std::pair getCachePos(StringPair const& bindingKey) const; - int getCachePosSortedFor(StringPair const& bindingKey) const; - int getCachePosUnsortedFor(StringPair const& bindingKey) const; + std::pair getCachePos(Entry const& bindingKey) const; + int getCachePosSortedFor(Entry const& bindingKey) const; + int getCachePosUnsortedFor(Entry const& bindingKey) const; // get slice from cache for a given value - SliceInfoPtr getCacheFor(StringPair const& bindingKey) const; - SliceInfoUnsortedPtr getCacheUnsortedFor(StringPair const& bindingKey) const; + SliceInfoPtr getCacheFor(Entry const& bindingKey) const; + SliceInfoUnsortedPtr getCacheUnsortedFor(Entry const& bindingKey) const; SliceInfoPtr getCacheForPos(int pos) const; SliceInfoUnsortedPtr getCacheUnsortedForPos(int pos) const; - static void validateOrder(StringPair const& bindingKey, std::shared_ptr const& input); + static void validateOrder(Entry const& bindingKey, std::shared_ptr const& input); }; } // namespace o2::framework diff --git a/Framework/Core/include/Framework/GroupSlicer.h b/Framework/Core/include/Framework/GroupSlicer.h index 64b1d863c59e6..b8436314b057e 100644 --- a/Framework/Core/include/Framework/GroupSlicer.h +++ b/Framework/Core/include/Framework/GroupSlicer.h @@ -55,7 +55,7 @@ struct GroupSlicer { { constexpr auto index = framework::has_type_at_v>(associated_pack_t{}); auto binding = o2::soa::getLabelFromTypeForKey>(mIndexColumnName); - auto bk = std::make_pair(binding, mIndexColumnName); + auto bk = Entry(binding, mIndexColumnName); if constexpr (!o2::soa::is_smallgroups>) { if (table.size() == 0) { return; diff --git a/Framework/Core/src/ASoA.cxx b/Framework/Core/src/ASoA.cxx index bd6ca551d24ec..3a681ee931a2b 100644 --- a/Framework/Core/src/ASoA.cxx +++ b/Framework/Core/src/ASoA.cxx @@ -194,7 +194,7 @@ bool PreslicePolicyBase::isMissing() const return binding == "[MISSING]"; } -StringPair const& PreslicePolicyBase::getBindingKey() const +Entry const& PreslicePolicyBase::getBindingKey() const { return bindingKey; } diff --git a/Framework/Core/src/ArrowSupport.cxx b/Framework/Core/src/ArrowSupport.cxx index 12a4c7131e828..3b13e30581f70 100644 --- a/Framework/Core/src/ArrowSupport.cxx +++ b/Framework/Core/src/ArrowSupport.cxx @@ -567,26 +567,27 @@ o2::framework::ServiceSpec ArrowSupport::arrowTableSlicingCacheSpec() .name = "arrow-slicing-cache", .uniqueId = CommonServices::simpleServiceId(), .init = [](ServiceRegistryRef services, DeviceState&, fair::mq::ProgOptions&) { return ServiceHandle{TypeIdHelpers::uniqueId(), - new ArrowTableSlicingCache(std::vector>{services.get().bindingsKeys}, std::vector{services.get().bindingsKeysUnsorted}), + new ArrowTableSlicingCache(Cache{services.get().bindingsKeys}, + Cache{services.get().bindingsKeysUnsorted}), ServiceKind::Stream, typeid(ArrowTableSlicingCache).name()}; }, .configure = CommonServices::noConfiguration(), .preProcessing = [](ProcessingContext& pc, void* service_ptr) { auto* service = static_cast(service_ptr); auto& caches = service->bindingsKeys; - for (auto i = 0; i < caches.size(); ++i) { - if (pc.inputs().getPos(caches[i].first.c_str()) >= 0) { - auto status = service->updateCacheEntry(i, pc.inputs().get(caches[i].first.c_str())->asArrowTable()); + for (auto i = 0u; i < caches.size(); ++i) { + if (caches[i].enabled && pc.inputs().getPos(caches[i].binding.c_str()) >= 0) { + auto status = service->updateCacheEntry(i, pc.inputs().get(caches[i].binding.c_str())->asArrowTable()); if (!status.ok()) { - throw runtime_error_f("Failed to update slice cache for %s/%s", caches[i].first.c_str(), caches[i].second.c_str()); + throw runtime_error_f("Failed to update slice cache for %s/%s", caches[i].binding.c_str(), caches[i].key.c_str()); } } } auto& unsortedCaches = service->bindingsKeysUnsorted; - for (auto i = 0; i < unsortedCaches.size(); ++i) { - if (pc.inputs().getPos(unsortedCaches[i].first.c_str()) >= 0) { - auto status = service->updateCacheEntryUnsorted(i, pc.inputs().get(unsortedCaches[i].first.c_str())->asArrowTable()); + for (auto i = 0u; i < unsortedCaches.size(); ++i) { + if (unsortedCaches[i].enabled && pc.inputs().getPos(unsortedCaches[i].binding.c_str()) >= 0) { + auto status = service->updateCacheEntryUnsorted(i, pc.inputs().get(unsortedCaches[i].binding.c_str())->asArrowTable()); if (!status.ok()) { - throw runtime_error_f("failed to update slice cache (unsorted) for %s/%s", unsortedCaches[i].first.c_str(), unsortedCaches[i].second.c_str()); + throw runtime_error_f("failed to update slice cache (unsorted) for %s/%s", unsortedCaches[i].binding.c_str(), unsortedCaches[i].key.c_str()); } } } }, diff --git a/Framework/Core/src/ArrowTableSlicingCache.cxx b/Framework/Core/src/ArrowTableSlicingCache.cxx index 4b31f96e32fba..e001e293c4733 100644 --- a/Framework/Core/src/ArrowTableSlicingCache.cxx +++ b/Framework/Core/src/ArrowTableSlicingCache.cxx @@ -19,10 +19,13 @@ namespace o2::framework { -void updatePairList(std::vector& list, std::string const& binding, std::string const& key) +void updatePairList(Cache& list, std::string const& binding, std::string const& key, bool enabled = true) { - if (std::find_if(list.begin(), list.end(), [&binding, &key](auto const& entry) { return (entry.first == binding) && (entry.second == key); }) == list.end()) { - list.emplace_back(binding, key); + auto locate = std::find_if(list.begin(), list.end(), [&binding, &key](auto const& entry) { return (entry.binding == binding) && (entry.key == key); }); + if (locate == list.end()) { + list.emplace_back(binding, key, enabled); + } else if (!locate->enabled && enabled) { + locate->enabled = true; } } @@ -65,17 +68,17 @@ gsl::span SliceInfoUnsortedPtr::getSliceFor(int value) const return {(*groups)[value].data(), (*groups)[value].size()}; } -void ArrowTableSlicingCacheDef::setCaches(std::vector&& bsks) +void ArrowTableSlicingCacheDef::setCaches(Cache&& bsks) { bindingsKeys = bsks; } -void ArrowTableSlicingCacheDef::setCachesUnsorted(std::vector&& bsks) +void ArrowTableSlicingCacheDef::setCachesUnsorted(Cache&& bsks) { bindingsKeysUnsorted = bsks; } -ArrowTableSlicingCache::ArrowTableSlicingCache(std::vector&& bsks, std::vector&& bsksUnsorted) +ArrowTableSlicingCache::ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorted) : bindingsKeys{bsks}, bindingsKeysUnsorted{bsksUnsorted} { @@ -86,7 +89,7 @@ ArrowTableSlicingCache::ArrowTableSlicingCache(std::vector&& bsks, s groups.resize(bindingsKeysUnsorted.size()); } -void ArrowTableSlicingCache::setCaches(std::vector&& bsks, std::vector&& bsksUnsorted) +void ArrowTableSlicingCache::setCaches(Cache&& bsks, Cache&& bsksUnsorted) { bindingsKeys = bsks; bindingsKeysUnsorted = bsksUnsorted; @@ -107,11 +110,15 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr< counts[pos].reset(); return arrow::Status::OK(); } + auto& [b, k, e] = bindingsKeys[pos]; + if (!e) { + throw runtime_error_f("Disabled cache %s/%s update requested", b.c_str(), k.c_str()); + } validateOrder(bindingsKeys[pos], table); arrow::Datum value_counts; auto options = arrow::compute::ScalarAggregateOptions::Defaults(); ARROW_ASSIGN_OR_RAISE(value_counts, - arrow::compute::CallFunction("value_counts", {table->GetColumnByName(bindingsKeys[pos].second)}, + arrow::compute::CallFunction("value_counts", {table->GetColumnByName(bindingsKeys[pos].key)}, &options)); auto pair = static_cast(value_counts.array()); values[pos].reset(); @@ -128,7 +135,10 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st if (table->num_rows() == 0) { return arrow::Status::OK(); } - auto& [b, k] = bindingsKeysUnsorted[pos]; + auto& [b, k, e] = bindingsKeysUnsorted[pos]; + if (!e) { + throw runtime_error_f("Disabled unsorted cache %s/%s update requested", b.c_str(), k.c_str()); + } auto column = table->GetColumnByName(k); auto row = 0; for (auto iChunk = 0; iChunk < column->num_chunks(); ++iChunk) { @@ -139,7 +149,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st if (std::find(valuesUnsorted[pos].begin(), valuesUnsorted[pos].end(), v) == valuesUnsorted[pos].end()) { valuesUnsorted[pos].push_back(v); } - if (groups[pos].size() <= v) { + if ((int)groups[pos].size() <= v) { groups[pos].resize(v + 1); } (groups[pos])[v].push_back(row); @@ -151,7 +161,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st return arrow::Status::OK(); } -std::pair ArrowTableSlicingCache::getCachePos(const StringPair& bindingKey) const +std::pair ArrowTableSlicingCache::getCachePos(const Entry& bindingKey) const { auto pos = getCachePosSortedFor(bindingKey); if (pos != -1) { @@ -161,41 +171,47 @@ std::pair ArrowTableSlicingCache::getCachePos(const StringPair& bindi if (pos != -1) { return {pos, false}; } - throw runtime_error_f("%s/%s not found neither in sorted or unsorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); + throw runtime_error_f("%s/%s not found neither in sorted or unsorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); } -int ArrowTableSlicingCache::getCachePosSortedFor(StringPair const& bindingKey) const +int ArrowTableSlicingCache::getCachePosSortedFor(Entry const& bindingKey) const { - auto locate = std::find_if(bindingsKeys.begin(), bindingsKeys.end(), [&](StringPair const& bk) { return (bindingKey.first == bk.first) && (bindingKey.second == bk.second); }); + auto locate = std::find_if(bindingsKeys.begin(), bindingsKeys.end(), [&](Entry const& bk) { return (bindingKey.binding == bk.binding) && (bindingKey.key == bk.key); }); if (locate != bindingsKeys.end()) { return std::distance(bindingsKeys.begin(), locate); } return -1; } -int ArrowTableSlicingCache::getCachePosUnsortedFor(StringPair const& bindingKey) const +int ArrowTableSlicingCache::getCachePosUnsortedFor(Entry const& bindingKey) const { - auto locate_unsorted = std::find_if(bindingsKeysUnsorted.begin(), bindingsKeysUnsorted.end(), [&](StringPair const& bk) { return (bindingKey.first == bk.first) && (bindingKey.second == bk.second); }); + auto locate_unsorted = std::find_if(bindingsKeysUnsorted.begin(), bindingsKeysUnsorted.end(), [&](Entry const& bk) { return (bindingKey.binding == bk.binding) && (bindingKey.key == bk.key); }); if (locate_unsorted != bindingsKeysUnsorted.end()) { return std::distance(bindingsKeysUnsorted.begin(), locate_unsorted); } return -1; } -SliceInfoPtr ArrowTableSlicingCache::getCacheFor(StringPair const& bindingKey) const +SliceInfoPtr ArrowTableSlicingCache::getCacheFor(Entry const& bindingKey) const { auto [p, s] = getCachePos(bindingKey); if (!s) { - throw runtime_error_f("%s/%s is found in unsorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); + throw runtime_error_f("%s/%s is found in unsorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); + } + if (!bindingsKeys[p].enabled) { + throw runtime_error_f("Disabled cache %s/%s is requested", bindingKey.binding.c_str(), bindingKey.key.c_str()); } return getCacheForPos(p); } -SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedFor(const StringPair& bindingKey) const +SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedFor(const Entry& bindingKey) const { auto [p, s] = getCachePos(bindingKey); if (s) { - throw runtime_error_f("%s/%s is found in sorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); + throw runtime_error_f("%s/%s is found in sorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); + } + if (!bindingsKeysUnsorted[p].enabled) { + throw runtime_error_f("Disabled unsorted cache %s/%s is requested", bindingKey.binding.c_str(), bindingKey.key.c_str()); } return getCacheUnsortedForPos(p); @@ -224,9 +240,9 @@ SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedForPos(int pos) con }; } -void ArrowTableSlicingCache::validateOrder(StringPair const& bindingKey, const std::shared_ptr& input) +void ArrowTableSlicingCache::validateOrder(Entry const& bindingKey, const std::shared_ptr& input) { - auto const& [target, key] = bindingKey; + auto const& [target, key, enabled] = bindingKey; auto column = input->GetColumnByName(key); auto array0 = static_cast>(column->chunk(0)->data()); int32_t prev = 0; diff --git a/Framework/Core/test/test_GroupSlicer.cxx b/Framework/Core/test/test_GroupSlicer.cxx index 161939141e790..091c21eeae229 100644 --- a/Framework/Core/test/test_GroupSlicer.cxx +++ b/Framework/Core/test/test_GroupSlicer.cxx @@ -683,7 +683,7 @@ TEST_CASE("ArrowDirectSlicing") std::vector slices; std::vector offsts; - auto bk = std::make_pair(soa::getLabelFromType(), "fID"); + auto bk = Entry(soa::getLabelFromType(), "fID"); ArrowTableSlicingCache cache({bk}); auto s = cache.updateCacheEntry(0, {evtTable}); auto lcache = cache.getCacheFor(bk); @@ -741,7 +741,7 @@ TEST_CASE("TestSlicingException") } auto evtTable = builderE.finalize(); - auto bk = std::make_pair(soa::getLabelFromType(), "fID"); + auto bk = Entry(soa::getLabelFromType(), "fID"); ArrowTableSlicingCache cache({bk}); try { From 0c5140edf08d83042e2b8362eb152db6e01e3177 Mon Sep 17 00:00:00 2001 From: Christian Sonnabend Date: Mon, 19 May 2025 12:48:54 +0200 Subject: [PATCH 0044/1426] NN clustering: VRAM memory leak fix + (u)int -> (u)int32_t (#14272) * VRAM memory leak fix + (u)int -> (u)int32_t * Please consider the following formatting changes * Fixing my own debug messages * Making shared pointer for releasing * Bug-fix * Adding Davids patch --------- Co-authored-by: ALICE Action Bot --- Common/ML/include/ML/OrtInterface.h | 14 +-- Common/ML/src/OrtInterface.cxx | 42 ++++--- .../Global/GPUChainTrackingClusterizer.cxx | 33 ++++-- .../GPUTPCNNClusterizerHost.cxx | 23 ++-- .../GPUTPCNNClusterizerHost.h | 2 +- .../GPUTPCNNClusterizerKernels.cxx | 108 +++++++++--------- .../GPUTPCNNClusterizerKernels.h | 7 +- 7 files changed, 128 insertions(+), 101 deletions(-) diff --git a/Common/ML/include/ML/OrtInterface.h b/Common/ML/include/ML/OrtInterface.h index b4f40f3f5c694..7224645425856 100644 --- a/Common/ML/include/ML/OrtInterface.h +++ b/Common/ML/include/ML/OrtInterface.h @@ -45,14 +45,10 @@ class OrtModel public: // Constructors & destructors - OrtModel() = default; - OrtModel(std::unordered_map optionsMap) { init(optionsMap); } - void init(std::unordered_map optionsMap) - { - initOptions(optionsMap); - initEnvironment(); - } - virtual ~OrtModel() = default; + OrtModel(); + OrtModel(std::unordered_map optionsMap); + void init(std::unordered_map optionsMap); + virtual ~OrtModel(); // General purpose void initOptions(std::unordered_map optionsMap); @@ -113,7 +109,7 @@ class OrtModel private: // ORT variables -> need to be hidden as pImpl struct OrtVariables; - OrtVariables* mPImplOrt; + std::unique_ptr mPImplOrt; // Input & Output specifications of the loaded network std::vector mInputNamesChar, mOutputNamesChar; diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx index df7f0a2deba82..8f31761489997 100644 --- a/Common/ML/src/OrtInterface.cxx +++ b/Common/ML/src/OrtInterface.cxx @@ -27,11 +27,20 @@ namespace o2 namespace ml { +OrtModel::OrtModel() = default; +OrtModel::OrtModel(std::unordered_map optionsMap) { init(optionsMap); } +OrtModel::~OrtModel() = default; +void OrtModel::init(std::unordered_map optionsMap) +{ + initOptions(optionsMap); + initEnvironment(); +} + struct OrtModel::OrtVariables { // The actual implementation is hidden in the .cxx file // ORT runtime objects Ort::RunOptions runOptions; - std::shared_ptr env = nullptr; - std::shared_ptr session = nullptr; ///< ONNX session + std::unique_ptr env = nullptr; + std::unique_ptr session = nullptr; ///< ONNX session Ort::SessionOptions sessionOptions; Ort::AllocatorWithDefaultOptions allocator; Ort::MemoryInfo memoryInfo = Ort::MemoryInfo("Cpu", OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemType::OrtMemTypeDefault); @@ -41,7 +50,7 @@ struct OrtModel::OrtVariables { // The actual implementation is hidden in the .c // General purpose void OrtModel::initOptions(std::unordered_map optionsMap) { - mPImplOrt = new OrtVariables(); + mPImplOrt = std::make_unique(); // Load from options map if (!optionsMap.contains("model-path")) { @@ -101,7 +110,7 @@ void OrtModel::initOptions(std::unordered_map optionsM void OrtModel::initEnvironment() { - mPImplOrt->env = std::make_shared( + mPImplOrt->env = std::make_unique( OrtLoggingLevel(mLoggingLevel), (mEnvName.empty() ? "ORT" : mEnvName.c_str()), // Integrate ORT logging into Fairlogger @@ -129,7 +138,7 @@ void OrtModel::initSession() if (mAllocateDeviceMemory) { memoryOnDevice(mDeviceId); } - mPImplOrt->session = std::make_shared(*mPImplOrt->env, mModelPath.c_str(), mPImplOrt->sessionOptions); + mPImplOrt->session = std::make_unique(*mPImplOrt->env, mModelPath.c_str(), mPImplOrt->sessionOptions); mPImplOrt->ioBinding = std::make_unique(*mPImplOrt->session); setIO(); @@ -147,12 +156,12 @@ void OrtModel::memoryOnDevice(int32_t deviceIndex) (mPImplOrt->sessionOptions).AddConfigEntry("session.use_env_allocators", "1"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time (mPImplOrt->sessionOptions).AddConfigEntry("session_options.enable_cpu_mem_arena", "0"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time // Arena memory shrinkage comes at performance cost - /// For now prefer to use single allocation, enabled by O2/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu -> SetONNXGPUStream -> rocm_options.arena_extend_strategy = 0; - // (mPImplOrt->runOptions).AddConfigEntry("memory.enable_memory_arena_shrinkage", ("gpu:" + std::to_string(deviceIndex)).c_str()); // See kOrtRunOptionsConfigEnableMemoryArenaShrinkage, https://github.com/microsoft/onnxruntime/blob/90c263f471bbce724e77d8e62831d3a9fa838b2f/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h#L27 + // For now prefer to use single allocation, enabled by O2/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu -> SetONNXGPUStream -> rocm_options.arena_extend_strategy = 0; + (mPImplOrt->runOptions).AddConfigEntry("memory.enable_memory_arena_shrinkage", ("gpu:" + std::to_string(deviceIndex)).c_str()); // See kOrtRunOptionsConfigEnableMemoryArenaShrinkage, https://github.com/microsoft/onnxruntime/blob/90c263f471bbce724e77d8e62831d3a9fa838b2f/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h#L27 std::string dev_mem_str = ""; if (mDeviceType == "ROCM") { - dev_mem_str = "Hip"; + dev_mem_str = "HipPinned"; } if (mDeviceType == "CUDA") { dev_mem_str = "Cuda"; @@ -166,7 +175,7 @@ void OrtModel::memoryOnDevice(int32_t deviceIndex) void OrtModel::resetSession() { - mPImplOrt->session = std::make_shared(*(mPImplOrt->env), mModelPath.c_str(), mPImplOrt->sessionOptions); + mPImplOrt->session = std::make_unique(*(mPImplOrt->env), mModelPath.c_str(), mPImplOrt->sessionOptions); } // Getters @@ -252,7 +261,7 @@ void OrtModel::setIO() void OrtModel::setEnv(Ort::Env* env) { - mPImplOrt->env = std::shared_ptr(env); + mPImplOrt->env.reset(env); } // Inference @@ -308,6 +317,14 @@ void OrtModel::inference(I* input, int64_t input_size, O* output) (mPImplOrt->ioBinding)->BindOutput(mOutputNames[0].c_str(), outputTensor); (mPImplOrt->session)->Run(mPImplOrt->runOptions, *mPImplOrt->ioBinding); + // mPImplOrt->session->Run( + // mPImplOrt->runOptions, + // mInputNamesChar.data(), + // &inputTensor, + // mInputNamesChar.size(), + // mOutputNamesChar.data(), + // &outputTensor, + // mOutputNamesChar.size()); } template void OrtModel::inference(OrtDataType::Float16_t*, int64_t, OrtDataType::Float16_t*); @@ -427,10 +444,7 @@ template std::vector OrtModel::inferencesession->EndProfiling(); - // } - LOG(info) << "(ORT) Size of mPImplOrt: " << sizeof(*mPImplOrt) << " bytes"; + mPImplOrt.reset(); } // private diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 2cdd1bb76bf00..6c4e60a6025e1 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -659,7 +659,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) // But environment must be valid, so we init the model environment first and use it here afterwards. // Either this is done in one environment with lane == 0 or by recreating the allocator using recreateMemoryAllocator. // TODO: Volatile allocation works for reserving, but not yet for allocations when binding the input tensor - // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator); + // if (lane == 0) { + // nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator); + // } // recreateMemoryAllocator = true; (nnApplications[lane].mModelClass).initSession(); } @@ -671,7 +673,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } // (nnApplications[lane].mModelReg1).setEnv((nnApplications[lane].mModelClass).getEnv()); (nnApplications[lane].mModelReg1).initEnvironment(); - // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].mModelReg1).getEnv(), (nnApplications[lane].mModelReg1).getMemoryInfo(), mRec, recreateMemoryAllocator); + // nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelReg1).getEnv(), (nnApplications[lane].mModelReg1).getMemoryInfo(), mRec, recreateMemoryAllocator); (nnApplications[lane].mModelReg1).initSession(); } if (nnApplications[lane].mModelsUsed[2]) { @@ -680,8 +682,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (nnApplications[lane].mModelReg2.getIntraOpNumThreads() > maxThreads) { nnApplications[lane].mModelReg2.setIntraOpNumThreads(maxThreads); } + // (nnApplications[lane].mModelReg2).setEnv((nnApplications[lane].mModelClass).getEnv()); (nnApplications[lane].mModelReg2).initEnvironment(); - // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator); + // nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator); (nnApplications[lane].mModelReg2).initSession(); } if (nn_settings.nnClusterizerVerbosity < 3) { @@ -707,8 +710,6 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (doGPU) { WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->tpcNNClusterer - (char*)processors(), &processorsShadow()->tpcNNClusterer, sizeof(GPUTPCNNClusterizer) * NSECTORS, mRec->NStreams() - 1, &mEvents->init); } - LOG(info) << "Size of nnApplications[lane]: " << sizeof(nnApplications[0]) << " bytes"; - LOG(info) << "Size of nnApplications: " << sizeof(GPUTPCNNClusterizerHost) * GetProcessingSettings().nTPCClustererLanes << " bytes"; } #endif @@ -976,6 +977,15 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[lane] : clustererNN; GPUTPCNNClusterizerHost& nnApplication = nnApplications[lane]; + // // bool recreateMemoryAllocator = false; + // if (lane == 0) { + // (nnApplications[lane].mModelClass).initEnvironment(); + // nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, 0); + // } + // // recreateMemoryAllocator = true; + // (nnApplications[lane].mModelClass).initSession(); + // (nnApplications[lane].mModelReg1).initSession(); + int withMC = (doGPU && propagateMCLabels); if (clustererNNShadow.mNnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) { @@ -1188,12 +1198,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } } for (int32_t i = 0; i < GetProcessingSettings().nTPCClustererLanes; i++) { - // if (GetProcessingSettings().nn.applyNNclusterizer) { - // GPUTPCNNClusterizerHost& nnApplication = nnApplications[i]; - // nnApplication.mModelClass.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); - // nnApplication.mModelReg1.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); - // nnApplication.mModelReg2.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); - // } + if (GetProcessingSettings().nn.applyNNclusterizer) { + LOG(info) << "(ORT) Environment releasing..."; + GPUTPCNNClusterizerHost& nnApplication = nnApplications[i]; + nnApplication.mModelClass.release(true); + nnApplication.mModelReg1.release(true); + nnApplication.mModelReg2.release(true); + } if (transferRunning[i]) { ReleaseEvent(mEvents->stream[i], doGPU); } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx index ca2deec60601c..90f1d6e27246f 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx @@ -136,8 +136,8 @@ struct MockedOrtAllocator : OrtAllocator { std::atomic memory_inuse{0}; std::atomic num_allocations{0}; std::atomic num_reserve_allocations{0}; - OrtMemoryInfo* memory_info; - GPUReconstruction* rec; + OrtMemoryInfo* mMemoryInfoInternal; + GPUReconstruction* mRecInternal; }; MockedOrtAllocator::MockedOrtAllocator(GPUReconstruction* r, OrtMemoryInfo* info) @@ -147,37 +147,36 @@ MockedOrtAllocator::MockedOrtAllocator(GPUReconstruction* r, OrtMemoryInfo* info OrtAllocator::Free = [](OrtAllocator* this_, void* p) { static_cast(this_)->Free(p); }; OrtAllocator::Info = [](const OrtAllocator* this_) { return static_cast(this_)->Info(); }; OrtAllocator::Reserve = [](OrtAllocator* this_, size_t size) { return static_cast(this_)->Reserve(size); }; - rec = r; - memory_info = info; + mRecInternal = r; + mMemoryInfoInternal = info; } MockedOrtAllocator::~MockedOrtAllocator() { - // Ort::GetApi().ReleaseMemoryInfo(memory_info); + // Ort::GetApi().ReleaseMemoryInfo(mMemoryInfoInternal); (void)0; // Suppress warning for empty destructor } void* MockedOrtAllocator::Alloc(size_t size) { - // LOG(info) << "(ORT) Allocating volatile memory of size " << size << " bytes"; - return rec->AllocateVolatileDeviceMemory(size); + LOG(info) << "(ORT) Allocating direct memory of size " << size << " bytes"; + return mRecInternal->AllocateDirectMemory(size, GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_STACK); } void* MockedOrtAllocator::Reserve(size_t size) { - // LOG(info) << "(ORT) Reserving volatile memory of size " << size << " bytes"; - return rec->AllocateVolatileDeviceMemory(size); + LOG(info) << "(ORT) Reserving direct memory of size " << size << " bytes"; + return mRecInternal->AllocateDirectMemory(size, GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_STACK); } void MockedOrtAllocator::Free(void* p) { // LOG(info) << "(ORT) Freeing volatile memory " << p; - rec->ReturnVolatileDeviceMemory(); } const OrtMemoryInfo* MockedOrtAllocator::Info() const { - return memory_info; + return mMemoryInfoInternal; } size_t MockedOrtAllocator::NumAllocations() const @@ -197,7 +196,7 @@ void MockedOrtAllocator::LeakCheck() } } -void GPUTPCNNClusterizerHost::volatileOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, bool recreate) +void GPUTPCNNClusterizerHost::directOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, bool recreate) { mMockedAlloc = std::make_shared(rec, (OrtMemoryInfo*)(*memInfo)); if (recreate) { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h index e659753f21d7d..4334c3418eb09 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h @@ -53,7 +53,7 @@ class GPUTPCNNClusterizerHost void initClusterizer(const GPUSettingsProcessingNNclusterizer&, GPUTPCNNClusterizer&); // ONNX - void volatileOrtAllocator(Ort::Env*, Ort::MemoryInfo*, GPUReconstruction*, bool = false); + void directOrtAllocator(Ort::Env*, Ort::MemoryInfo*, GPUReconstruction*, bool = false); MockedOrtAllocator* getMockedAllocator(); const OrtMemoryInfo* getMockedMemoryInfo(); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx index 47bc5e8da80ca..8ca61602ab4e9 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx @@ -35,9 +35,9 @@ using namespace o2::gpu::tpccf; // Defining individual thread functions for data filling, determining the class label and running the CF clusterizer template <> -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart) { - uint glo_idx = get_global_id(0); + uint32_t glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; if (clustererNN.mOutputDataClass[glo_idx] == 0) { // default clusterizer should not be called in batched mode due to mess-up with thread indices @@ -51,29 +51,29 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart) { - uint glo_idx = get_global_id(0); + uint32_t glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; - uint write_idx = glo_idx * clustererNN.mNnClusterizerElementSize; // Potential optimization: Either choose mNnClusterizerBatchedMode as a power of 2 or calculate from threadId and blockId + uint32_t write_idx = glo_idx * clustererNN.mNnClusterizerElementSize; // Potential optimization: Either choose mNnClusterizerBatchedMode as a power of 2 or calculate from threadId and blockId CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); CfArray2D isPeakMap(clusterer.mPpeakMap); - CfChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; - int row = static_cast(peak.row()), pad = static_cast(peak.pad()), time = static_cast(peak.time()); // Explicit casting to avoid conversion errors + CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))]; + int32_t row = static_cast(peak.row()), pad = static_cast(peak.pad()), time = static_cast(peak.time()); // Explicit casting to avoid conversion errors float central_charge = static_cast(chargeMap[peak].unpack()); - int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow); + int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow); #ifndef GPUCA_GPUCODE GPUCA_UNROLL(U(), U()); #endif - for (int r = -clustererNN.mNnClusterizerSizeInputRow; r <= clustererNN.mNnClusterizerSizeInputRow; r++) { + for (int32_t r = -clustererNN.mNnClusterizerSizeInputRow; r <= clustererNN.mNnClusterizerSizeInputRow; r++) { bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0); - int pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(row, row + r); - for (int p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p++) { + int32_t pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(row, row + r); + for (int32_t p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p++) { bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow); - for (int t = -clustererNN.mNnClusterizerSizeInputTime; t <= clustererNN.mNnClusterizerSizeInputTime; t++) { + for (int32_t t = -clustererNN.mNnClusterizerSizeInputTime; t <= clustererNN.mNnClusterizerSizeInputTime; t++) { if (!is_boundary) { CfChargePos tmp_pos(row + r, pad + p, time + t); if (r == 0 && !clustererNN.mClusterFlags[2 * glo_idx] && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization @@ -111,21 +111,21 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart) { - uint glo_idx = get_global_id(0); + uint32_t glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; - uint base_idx = CAMath::Floor(glo_idx / clustererNN.mNnClusterizerElementSize); - uint transient_index = glo_idx % clustererNN.mNnClusterizerElementSize; + uint32_t base_idx = CAMath::Floor(glo_idx / clustererNN.mNnClusterizerElementSize); + uint32_t transient_index = glo_idx - (base_idx * clustererNN.mNnClusterizerElementSize); CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); CfArray2D isPeakMap(clusterer.mPpeakMap); - CfChargePos peak = clusterer.mPfilteredPeakPositions[base_idx + batchStart]; - int row = static_cast(peak.row()), pad = static_cast(peak.pad()); + CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(base_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))]; + int32_t row = static_cast(peak.row()), pad = static_cast(peak.pad()); if (clustererNN.mNnClusterizerAddIndexData && (int32_t)transient_index == (clustererNN.mNnClusterizerElementSize - 1)) { - uint top_idx = (base_idx + 1) * clustererNN.mNnClusterizerElementSize; + uint32_t top_idx = (base_idx + 1) * clustererNN.mNnClusterizerElementSize; for (uint16_t i = 0; i < 8; i++) { Delta2 d = cfconsts::InnerNeighbors[i]; CfChargePos tmp_pos = peak.delta(d); @@ -142,8 +142,8 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(pad) / GPUTPCGeometry::NPads(row); } } else if ((int32_t)transient_index < (clustererNN.mNnClusterizerElementSize - 3)) { - int time = static_cast(peak.time()); - int r = CAMath::Floor(transient_index / ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1))) - clustererNN.mNnClusterizerSizeInputRow; + int32_t time = static_cast(peak.time()); + int32_t r = CAMath::Floor(transient_index / ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1))) - clustererNN.mNnClusterizerSizeInputRow; bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0); if (is_row_boundary) { if (dtype == 0) { @@ -152,15 +152,16 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(clustererNN.mNnClusterizerBoundaryFillValue); } } else { - int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow); - int pad_offset = GPUTPCNNClusterizerKernels::padOffset(row, row + r); - int rest_1 = transient_index % ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1)); - int p = CAMath::Floor(rest_1 / (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputPad + pad_offset; - bool is_boundary = GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow); + int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow); + int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset(row, row + r); + int32_t rest_1 = transient_index % ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1)); + int32_t p = CAMath::Floor(rest_1 / (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputPad + pad_offset; + int32_t t = (rest_1 % (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputTime; + + bool is_boundary = GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow) && (t < 0 || t >= TPC_MAX_FRAGMENT_LEN_GPU); if (!is_boundary) { float central_charge = static_cast(chargeMap[peak].unpack()); - int t = (rest_1 % (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputTime; CfChargePos tmp_pos(row + r, pad + p, time + t); if (dtype == 0) { clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); @@ -179,9 +180,9 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart) { - uint glo_idx = get_global_id(0); + uint32_t glo_idx = get_global_id(0); if (dtype == 0) { processors.tpcNNClusterer[sector].mOutputDataClass[glo_idx + batchStart] = (int)((processors.tpcNNClusterer[sector].mModelProbabilities_16[glo_idx]).ToFloat() > processors.tpcNNClusterer[sector].mNnClassThreshold); } else if (dtype == 1) { @@ -190,14 +191,14 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart) { auto& clustererNN = processors.tpcNNClusterer[sector]; - uint glo_idx = get_global_id(0); - uint elem_iterator = glo_idx * clustererNN.mNnClusterizerModelClassNumOutputNodes; + uint32_t glo_idx = get_global_id(0); + uint32_t elem_iterator = glo_idx * clustererNN.mNnClusterizerModelClassNumOutputNodes; float current_max_prob = 0.f; // If the neural network doesn't contain the softmax as a last layer, the outputs can range in [-infty, infty] - uint class_label = 0; - for (uint pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes; pIdx++) { + uint32_t class_label = 0; + for (uint32_t pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes; pIdx++) { if (pIdx == elem_iterator) { if (dtype == 0) { current_max_prob = static_cast(clustererNN.mModelProbabilities_16[pIdx]); @@ -212,7 +213,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread 1) { clustererNN.mClusterFlags[2 * glo_idx] = 1; @@ -221,25 +222,30 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart) { - uint glo_idx = get_global_id(0); + uint32_t glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; + uint32_t maxClusterNum = clusterer.mPmemory->counters.nClusters; + uint32_t full_glo_idx = glo_idx + batchStart; + if (full_glo_idx >= maxClusterNum) { + return; + } + int32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg1NumOutputNodes; + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - CfChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; + CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(full_glo_idx, maxClusterNum - 1)]; float central_charge = static_cast(chargeMap[peak].unpack()); CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer)); MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem); tpc::ClusterNative* clusterOut = (withMC) ? nullptr : clusterer.mPclusterByRow; - uint full_glo_idx = glo_idx + batchStart; - int model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg1NumOutputNodes; // LOG(info) << glo_idx << " -- " << model_output_index << " / " << clustererNN.outputDataReg1.size() << " / " << clustererNN.mNnClusterizerModelReg1NumOutputNodes << " -- " << clusterer.peakPositions.size() << " -- " << clusterer.centralCharges.size(); - if (clustererNN.mOutputDataClass[full_glo_idx] == 1 || (clustererNN.mNnClusterizerModelReg2NumOutputNodes == -1 && clustererNN.mOutputDataClass[full_glo_idx] >= 1)) { + if (clustererNN.mOutputDataClass[full_glo_idx] == 1 || (clustererNN.mNnClusterizerModelReg2NumOutputNodes != -1 && clustererNN.mOutputDataClass[full_glo_idx] >= 1)) { ClusterAccumulator pc; @@ -291,7 +297,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart) { - uint glo_idx = get_global_id(0); + uint32_t glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - CfChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; + CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))]; float central_charge = static_cast(chargeMap[peak].unpack()); CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer)); MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem); tpc::ClusterNative* clusterOut = (withMC) ? nullptr : clusterer.mPclusterByRow; - uint full_glo_idx = glo_idx + batchStart; - int model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg2NumOutputNodes; + uint32_t full_glo_idx = glo_idx + batchStart; + uint32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg2NumOutputNodes; if (clustererNN.mOutputDataClass[full_glo_idx] > 0) { @@ -384,7 +390,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread 62 ? global_shift : 0); } -GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int row, int pad, int global_shift) +GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int32_t row, int32_t pad, int32_t global_shift) { if (pad < 0 || row < 0) { // Faster short-circuit return true; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h index dc7f537c6c1e8..dac2bf9554849 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h @@ -73,11 +73,12 @@ class GPUTPCNNClusterizerKernels : public GPUKernelTemplate private: static GPUd() void fillInputData(int32_t, int32_t, int32_t, int32_t, processorType&, uint8_t, int8_t, uint); static GPUd() void publishClustersReg1(uint, GPUSharedMemory&, processorType&, uint8_t, int8_t, int8_t, uint); + static GPUd() uint32_t sortIntoBuckets(GPUTPCClusterFinder&, const tpc::ClusterNative&, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*, uint32_t); static GPUd() void publishClustersReg2(uint, GPUSharedMemory&, processorType&, uint8_t, int8_t, int8_t, uint); - static GPUd() int padOffset(int, int); - static GPUd() int rowOffset(int, int); - static GPUd() bool isBoundary(int, int, int); + static GPUd() int32_t padOffset(int32_t, int32_t); + static GPUd() int32_t rowOffset(int32_t, int32_t); + static GPUd() bool isBoundary(int32_t, int32_t, int32_t); }; } // namespace o2::gpu From e7d217af61b67a12b508451c2f2ea925fda92f6e Mon Sep 17 00:00:00 2001 From: Marvin Hemmer <53471402+mhemmer-cern@users.noreply.github.com> Date: Mon, 19 May 2025 14:29:21 +0200 Subject: [PATCH 0045/1426] [EMCAL-688] EMCAL: Add fCross to the AnalysisCluster (#14282) - Add fCross the exoticity parameter to the AnalysisCluster so we can easily access it later in the emcalCorrectionTask in O2Physics --- .../EMCAL/include/DataFormatsEMCAL/AnalysisCluster.h | 8 ++++++-- .../EMCAL/base/include/EMCALBase/ClusterFactory.h | 5 +++-- Detectors/EMCAL/base/src/ClusterFactory.cxx | 12 ++++++++---- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/DataFormats/Detectors/EMCAL/include/DataFormatsEMCAL/AnalysisCluster.h b/DataFormats/Detectors/EMCAL/include/DataFormatsEMCAL/AnalysisCluster.h index 758e0a1fa0b47..e19fd17dea2ce 100644 --- a/DataFormats/Detectors/EMCAL/include/DataFormatsEMCAL/AnalysisCluster.h +++ b/DataFormats/Detectors/EMCAL/include/DataFormatsEMCAL/AnalysisCluster.h @@ -182,6 +182,9 @@ class AnalysisCluster float getCoreEnergy() const { return mCoreEnergy; } void setCoreEnergy(float energy) { mCoreEnergy = energy; } + float getFCross() const { return mFCross; } + void setFCross(float fCross) { mFCross = fCross; } + /// /// Returns TLorentzVector with momentum of the cluster. Only valid for clusters /// identified as photons or pi0 (overlapped gamma) produced on the vertex @@ -223,12 +226,13 @@ class AnalysisCluster float mTime = 0.; ///< Time of the digit/cell with maximal energy deposition bool mIsExotic = false; //!::buildCluster(int clusterIn float exoticTime = mInputsContainer[inputIndMax].getTimeStamp(); + float fCross = 0.; + try { - clusterAnalysis.setIsExotic(isExoticCell(towerId, inputEnergyMax, exoticTime)); + clusterAnalysis.setIsExotic(isExoticCell(towerId, inputEnergyMax, exoticTime, fCross)); + clusterAnalysis.setFCross(fCross); } catch (UninitLookUpTableException& e) { LOG(error) << e.what(); } @@ -253,7 +256,7 @@ void ClusterFactory::evalLocalPosition(gsl::span inputsInd clRmsXYZ[i] += (w * xyzi[i] * xyzi[i]); } } // w > 0 - } // dig loop + } // dig loop // cout << " wtot " << wtot << endl; @@ -600,7 +603,7 @@ std::tuple ClusterFactory::getMaximalEnergyI /// Look to cell neighbourhood and reject if it seems exotic //____________________________________________________________________________ template -bool ClusterFactory::isExoticCell(short towerId, float ecell, float const exoticTime) const +bool ClusterFactory::isExoticCell(short towerId, float ecell, float const exoticTime, float& fCross) const { if (ecell < mExoticCellMinAmplitude) { return false; // do not reject low energy cells @@ -612,8 +615,9 @@ bool ClusterFactory::isExoticCell(short towerId, float ecell, float c } float eCross = getECross(towerId, ecell, exoticTime); + fCross = 1.f - eCross / ecell; - if (1 - eCross / ecell > mExoticCellFraction) { + if (fCross > mExoticCellFraction) { LOG(debug) << "EXOTIC CELL id " << towerId << ", eCell " << ecell << ", eCross " << eCross << ", 1-eCross/eCell " << 1 - eCross / ecell; return true; } From 762cef75f0f7ce4c23c81e78efb439c72bdc7970 Mon Sep 17 00:00:00 2001 From: Marvin Hemmer <53471402+mhemmer-cern@users.noreply.github.com> Date: Mon, 19 May 2025 14:30:02 +0200 Subject: [PATCH 0046/1426] [EMCAL-689] EMCAL/Geometry: Add function to load alignment matrix from CCDB (#14237) - Add function `SetMisalMatrixFromCcdb` to set the missalignment matrices for the EMCal via the CCDB. This way they are not laoded from the GeoManager. The function expects a path inside the ccdb, which by default is set to `"Users/m/mhemmer/EMCAL/Config/GeometryAligned"` and the timestamp. Currently in the CCDB we only have the old Run 2 alignment object. Once we have the new alignment objects we can uplaod them and via the correct time stamp load them fittingly. --- .../EMCAL/base/include/EMCALBase/Geometry.h | 9 +++++- Detectors/EMCAL/base/src/Geometry.cxx | 31 +++++++++++++++++-- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/Detectors/EMCAL/base/include/EMCALBase/Geometry.h b/Detectors/EMCAL/base/include/EMCALBase/Geometry.h index 04dcaa3b802de..4d4a947de88ca 100644 --- a/Detectors/EMCAL/base/include/EMCALBase/Geometry.h +++ b/Detectors/EMCAL/base/include/EMCALBase/Geometry.h @@ -22,7 +22,9 @@ #include #include #include +#include +#include "CCDB/BasicCCDBManager.h" #include "DataFormatsEMCAL/Constants.h" #include "EMCALBase/GeometryBase.h" #include "MathUtils/Cartesian.h" @@ -57,7 +59,7 @@ class Geometry /// | EMCAL_COMPLETE12SMV1_DCAL | Full EMCAL, 10 DCAL Supermodules (not used in practice) | /// | EMCAL_COMPLETE12SMV1_DCAL_8SM | Full EMCAL, 8 DCAL Supermodules (run2) | /// | EMCAL_COMPLETE12SMV1_DCAL_DEV | Full EMCAL, DCAL development geometry (not used) | - Geometry(const std::string_view name, const std::string_view mcname = "", const std::string_view mctitle = ""); + explicit Geometry(const std::string_view name, const std::string_view mcname = "", const std::string_view mctitle = ""); /// \brief Copy constructor. Geometry(const Geometry& geom); @@ -564,6 +566,11 @@ class Geometry /// void SetMisalMatrix(const TGeoHMatrix* m, Int_t smod) const; + /// + /// Method to set shift-rotational matrixes from CCDB + /// + void SetMisalMatrixFromCcdb(const char* path = "Users/m/mhemmer/EMCAL/Config/GeometryAligned", int timestamp = 10000) const; + /// /// Transform clusters cell position into global with alternative method, taking into account the depth calculation. /// Input are: diff --git a/Detectors/EMCAL/base/src/Geometry.cxx b/Detectors/EMCAL/base/src/Geometry.cxx index 920dc24823e83..6eff6c161f2a1 100644 --- a/Detectors/EMCAL/base/src/Geometry.cxx +++ b/Detectors/EMCAL/base/src/Geometry.cxx @@ -8,16 +8,21 @@ // In applying this license CERN does not waive the privileges and immunities // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. +#include "EMCALBase/Geometry.h" + +#include + #include +#include +#include +#include +#include #include #include #include #include -#include - -#include "EMCALBase/Geometry.h" #include "EMCALBase/ShishKebabTrd1Module.h" #include @@ -1557,6 +1562,7 @@ const TGeoHMatrix* Geometry::GetMatrixForSuperModule(Int_t smod) const if (!SMODULEMATRIX[smod]) { if (gGeoManager) { + LOG(info) << "Loading EMCAL misalignment matrix for SM " << smod << " from GeoManager."; SetMisalMatrix(GetMatrixForSuperModuleFromGeoManager(smod), smod); } else { LOG(fatal) << "Cannot find EMCAL misalignment matrices! Recover them either: \n" @@ -1762,6 +1768,25 @@ void Geometry::SetMisalMatrix(const TGeoHMatrix* m, Int_t smod) const } } +void Geometry::SetMisalMatrixFromCcdb(const char* path, int timestamp) const +{ + LOG(info) << "Using CCDB to obtain EMCal alignment."; + o2::ccdb::CcdbApi api; + map metadata; // can be empty + api.init("http://alice-ccdb.cern.ch"); + TObjArray* matrices = api.retrieveFromTFileAny(path, metadata, timestamp); + + for (int iSM = 0; iSM < mNumberOfSuperModules; ++iSM) { + TGeoHMatrix* mat = reinterpret_cast(matrices->At(iSM)); + if (mat) { + + SetMisalMatrix(mat, iSM); + } else { + LOG(info) << "Could not obtain Alignment Matrix for SM " << iSM; + } + } +} + Bool_t Geometry::IsDCALSM(Int_t iSupMod) const { if (mEMCSMSystem[iSupMod] == DCAL_STANDARD || mEMCSMSystem[iSupMod] == DCAL_EXT) { From e2f0fa3f1da3ecba8e1d446de4fd456fdd61b359 Mon Sep 17 00:00:00 2001 From: Roman Lietava Date: Mon, 19 May 2025 18:55:40 +0200 Subject: [PATCH 0047/1426] fix: suppressing excesive output from raw decoder (#14291) * fix: suppressing excesive output from raw decoder * clang * fix * fix --- .../include/DataFormatsCTP/Configuration.h | 2 +- .../Detectors/CTP/src/Configuration.cxx | 12 ++++- .../CTPReconstruction/RawDataDecoder.h | 10 ++-- .../CTP/reconstruction/src/RawDataDecoder.cxx | 32 ++++++------ .../include/CTPWorkflow/RawDecoderSpec.h | 11 ++++ Detectors/CTP/workflow/src/RawDecoderSpec.cxx | 50 +++++++++++++------ 6 files changed, 79 insertions(+), 38 deletions(-) diff --git a/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h b/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h index fdd73986f1eaf..e9464089d71fc 100644 --- a/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h +++ b/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h @@ -160,7 +160,7 @@ class CTPConfiguration const std::vector& getCTPClasses() const { return mCTPClasses; } // Read-only interface uint64_t getInputMask(const std::string& name) const; int getInputIndex(const std::string& name) const; - std::string getClassNameFromIndex(int index) { return mCTPClasses[index].name; }; + std::string getClassNameFromIndex(int index); std::string getClassNameFromHWIndex(int index); const CTPClass* getCTPClassFromHWIndex(const int index) const; bool isMaskInInputs(const uint64_t& mask) const; diff --git a/DataFormats/Detectors/CTP/src/Configuration.cxx b/DataFormats/Detectors/CTP/src/Configuration.cxx index 38a49132db3d1..61e51bcb20d91 100644 --- a/DataFormats/Detectors/CTP/src/Configuration.cxx +++ b/DataFormats/Detectors/CTP/src/Configuration.cxx @@ -780,6 +780,15 @@ int CTPConfiguration::getInputIndex(const std::string& name) const LOG(info) << "input:" << name << " index:" << index; return index; } +std::string CTPConfiguration::getClassNameFromIndex(int index) +{ + if (index < (int)mCTPClasses.size()) { + return mCTPClasses[index].name; + } else { + std::string name = "Cls" + std::to_string(index); + return name; + } +}; std::string CTPConfiguration::getClassNameFromHWIndex(int index) { for (auto& cls : mCTPClasses) { @@ -924,8 +933,9 @@ uint64_t CTPConfiguration::getTriggerClassMaskWInputsNoTrgDets() const exclude += cls.name.find("EMC") != std::string::npos; exclude += cls.name.find("TRD") != std::string::npos; exclude += cls.name.find("HMP") != std::string::npos; - if (!exclude) + if (!exclude) { clsmask |= cls.classMask; + } } return clsmask; } diff --git a/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h b/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h index 8ebc7e0304561..53addf32c538f 100644 --- a/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h +++ b/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h @@ -60,7 +60,8 @@ class RawDataDecoder std::array getClassErrorsB() { return mClassErrorsB; } std::array getClassCountersA() { return mClassCountersA; } std::array getClassCountersB() { return mClassCountersB; } - int getLostDueToShift() { return mLostDueToShift; } + int getLostDueToShiftCls() { return mLostDueToShiftCC; } + int getLostDueToShiftInp() { return mLostDueToShiftInps; } private: static constexpr uint32_t TF_TRIGGERTYPE_MASK = 0x800; @@ -80,8 +81,8 @@ class RawDataDecoder gbtword80_t mTVXMask = 0x4; // TVX is 3rd input gbtword80_t mVBAMask = 0x20; // VBA is 6 th input bool mVerbose = false; - uint32_t mIRRejected = 0; - uint32_t mTCRRejected = 0; + int mIRRejected = 0; + int mTCRRejected = 0; bool mPadding = true; uint32_t mTFOrbit = 0; std::vector mTFOrbits; @@ -94,7 +95,8 @@ class RawDataDecoder std::array mClassErrorsB{}; // from inputs std::array mClassCountersA{}; std::array mClassCountersB{}; // from inputs - int mLostDueToShift = 0; + int mLostDueToShiftCC = 0; + int mLostDueToShiftInps = 0; CTPConfiguration mCTPConfig; }; } // namespace ctp diff --git a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx index b216f5ec54570..a062a262acf62 100644 --- a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx +++ b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx @@ -296,12 +296,12 @@ int RawDataDecoder::decodeRaw(o2::framework::InputRecord& inputs, std::vector& digitsMap, o2::pmr::vector& digits, uint32_t TFOrbit, uint64_t trgclassmask) { // int nClasswoInp = 0; // counting classes without input which should never happen + int lost = 0; std::map digitsMapShifted; auto L0shift = o2::ctp::TriggerOffsetsParam::Instance().LM_L0; auto L1shift = L0shift + o2::ctp::TriggerOffsetsParam::Instance().L0_L1; @@ -551,7 +554,7 @@ int RawDataDecoder::shiftInputs(std::map& digit if (lut == 0 || lut == 1) { // no inps or LM digitsMapShifted[dig.first] = dig.second; } else if (lut == 2) { // L0 - shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); if (dig.second.CTPClassMask.count()) { // LOG(error) << "Adding class mask without input ?"; // This is not needed as it can happen; Full checj done below - see next LOG(error) @@ -559,30 +562,30 @@ int RawDataDecoder::shiftInputs(std::map& digit digitsMapShifted[dig.first] = digi; } } else if (lut == 4) { // L1 - shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); if (dig.second.CTPClassMask.count()) { CTPDigit digi = {dig.first, 0, dig.second.CTPClassMask}; digitsMapShifted[dig.first] = digi; } } else if (lut == 6) { // L0 and L1 - shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); - shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); if (dig.second.CTPClassMask.count()) { CTPDigit digi = {dig.first, 0, dig.second.CTPClassMask}; digitsMapShifted[dig.first] = digi; } } else if (lut == 3) { // LM and L0 - shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); CTPDigit digi = {dig.first, inpmask & (~L0MASKInputs), dig.second.CTPClassMask}; // if LM level do not need to add class as LM is not shifted; digitsMapShifted[dig.first] = digi; } else if (lut == 5) { // LM and L1 - shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); CTPDigit digi = {dig.first, inpmask & (~L1MASKInputs), dig.second.CTPClassMask}; digitsMapShifted[dig.first] = digi; } else if (lut == 7) { // LM and L0 and L1 - shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); - shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); CTPDigit digi = {dig.first, inpmaskLM, dig.second.CTPClassMask}; digitsMapShifted[dig.first] = digi; } else { @@ -592,7 +595,7 @@ int RawDataDecoder::shiftInputs(std::map& digit for (auto const& dig : digitsMapShifted) { digits.push_back(dig.second); } - return 0; + return lost; } // int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, uint64_t trgclassmask, uint64_t trgclassmaskNoTrgDet) @@ -654,16 +657,13 @@ int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, mClassErrorsB[cls.getIndex()]++; ret = 256; } else { - mLostDueToShift++; + mLostDueToShiftCC++; } } } } } } - if (mLostDueToShift) { - LOG(debug) << "LOST classes because of shift:" << mLostDueToShift; - } return ret; } // diff --git a/Detectors/CTP/workflow/include/CTPWorkflow/RawDecoderSpec.h b/Detectors/CTP/workflow/include/CTPWorkflow/RawDecoderSpec.h index a5a1a75a0b594..3198e5c33e219 100644 --- a/Detectors/CTP/workflow/include/CTPWorkflow/RawDecoderSpec.h +++ b/Detectors/CTP/workflow/include/CTPWorkflow/RawDecoderSpec.h @@ -74,6 +74,17 @@ class RawDecoderSpec : public framework::Task std::deque mHistoryT; std::deque mHistoryV; RawDataDecoder mDecoder; + // Errors + int mLostDueToShiftInps = 0; + int mErrorIR = 0; + int mErrorTCR = 0; + int mIRRejected = 0; + int mTCRRejected = 0; + std::array mClsEA{}; + std::array mClsEB{}; // from inputs + std::array mClsA{}; + std::array mClsB{}; // from inputs + bool mCheckConsistency = false; }; /// \brief Creating DataProcessorSpec for the CTP diff --git a/Detectors/CTP/workflow/src/RawDecoderSpec.cxx b/Detectors/CTP/workflow/src/RawDecoderSpec.cxx index 2df6bc981ce44..041e6cb472ebb 100644 --- a/Detectors/CTP/workflow/src/RawDecoderSpec.cxx +++ b/Detectors/CTP/workflow/src/RawDecoderSpec.cxx @@ -26,6 +26,8 @@ using namespace o2::ctp::reco_workflow; void RawDecoderSpec::init(framework::InitContext& ctx) { + mCheckConsistency = ctx.options().get("check-consistency"); + mDecoder.setCheckConsistency(mCheckConsistency); mDecodeinputs = ctx.options().get("ctpinputs-decoding"); mDecoder.setDecodeInps(mDecodeinputs); mNTFToIntegrate = ctx.options().get("ntf-to-average"); @@ -43,7 +45,7 @@ void RawDecoderSpec::init(framework::InitContext& ctx) mOutputLumiInfo.inp2 = inp2; mMaxInputSize = ctx.options().get("max-input-size"); mMaxInputSizeFatal = ctx.options().get("max-input-size-fatal"); - LOG(info) << "CTP reco init done. Inputs decoding here:" << mDecodeinputs << " DoLumi:" << mDoLumi << " DoDigits:" << mDoDigits << " NTF:" << mNTFToIntegrate << " Lumi inputs:" << lumiinp1 << ":" << inp1 << " " << lumiinp2 << ":" << inp2 << " Max errors:" << maxerrors << " Max input size:" << mMaxInputSize << " MaxInputSizeFatal:" << mMaxInputSizeFatal; + LOG(info) << "CTP reco init done. Inputs decoding here:" << mDecodeinputs << " DoLumi:" << mDoLumi << " DoDigits:" << mDoDigits << " NTF:" << mNTFToIntegrate << " Lumi inputs:" << lumiinp1 << ":" << inp1 << " " << lumiinp2 << ":" << inp2 << " Max errors:" << maxerrors << " Max input size:" << mMaxInputSize << " MaxInputSizeFatal:" << mMaxInputSizeFatal << " CheckConsistency:" << mCheckConsistency; // mOutputLumiInfo.printInputs(); } void RawDecoderSpec::endOfStream(framework::EndOfStreamContext& ec) @@ -69,22 +71,22 @@ void RawDecoderSpec::endOfStream(framework::EndOfStreamContext& ec) o0 = TFOrbits[i]; } std::cout << std::endl; - LOG(info) << " Lost due to the shift:" << mDecoder.getLostDueToShift(); - LOG(info) << "Number of missing TF:" << nmiss << std::endl; - if (mDecoder.getErrorIR() || mDecoder.getErrorTCR()) { - LOG(error) << "# of IR errors:" << mDecoder.getErrorIR() << " TCR errors:" << mDecoder.getErrorTCR() << std::endl; + LOG(info) << "Number of non continous TF:" << nmiss << std::endl; + LOG(info) << "Lost in shiftInputs:" << mLostDueToShiftInps; + LOG(info) << "Lost in addDigit Inputs:" << mIRRejected << " Classes:" << mTCRRejected; + if (mErrorIR || mErrorTCR) { + LOG(error) << "# of IR errors:" << mErrorIR << " TCR errors:" << mErrorTCR << std::endl; } - std::array clsA = mDecoder.getClassCountersA(); - std::array clsB = mDecoder.getClassCountersB(); - std::array clsEA = mDecoder.getClassErrorsA(); - std::array clsEB = mDecoder.getClassErrorsB(); - - for (int i = 0; i < o2::ctp::CTP_NCLASSES; i++) { - bool print = clsA[i] > 0 || clsB[i] > 0 || clsEA[i] > 0 || clsEB[i] > 0; - if (clsEA[i]) { - LOG(error) << " Class without inputs:"; + if (mCheckConsistency) { + LOG(info) << "Lost due to the shift Consistency Checker:" << mDecoder.getLostDueToShiftCls(); + auto ctpcfg = mDecoder.getCTPConfig(); + for (int i = 0; i < o2::ctp::CTP_NCLASSES; i++) { + std::string name = ctpcfg.getClassNameFromIndex(i); + if (mClsEA[i]) { + LOG(error) << " Class without inputs:"; + } + LOG(important) << "CLASS:" << name << ":" << i << " Cls=>Inp:" << mClsA[i] << " Inp=>Cls:" << mClsB[i] << " ErrorsCls=>Inps:" << mClsEA[i] << " MissingInps=>Cls:" << mClsEB[i]; } - LOG(important) << "CLASS:" << i << " Cls=>Inp:" << clsA[i] << " Inp=>Cls:" << clsB[i] << " ErrorsCls=>Inps:" << clsEA[i] << " MissingInps=>Cls:" << clsEB[i]; } } void RawDecoderSpec::run(framework::ProcessingContext& ctx) @@ -161,6 +163,21 @@ void RawDecoderSpec::run(framework::ProcessingContext& ctx) if (mDoDigits) { LOG(info) << "[CTPRawToDigitConverter - run] Writing " << mOutputDigits.size() << " digits. IR rejected:" << mDecoder.getIRRejected() << " TCR rejected:" << mDecoder.getTCRRejected(); ctx.outputs().snapshot(o2::framework::Output{"CTP", "DIGITS", 0}, mOutputDigits); + mLostDueToShiftInps += mDecoder.getLostDueToShiftInp(); + mErrorIR += mDecoder.getErrorIR(); + mErrorTCR += mDecoder.getErrorTCR(); + mIRRejected += mDecoder.getIRRejected(); + mTCRRejected += mDecoder.getTCRRejected(); + auto clsEA = mDecoder.getClassErrorsA(); + auto clsEB = mDecoder.getClassErrorsB(); + auto cntCA = mDecoder.getClassCountersA(); + auto cntCB = mDecoder.getClassCountersB(); + for (int i = 0; i < o2::ctp::CTP_NCLASSES; i++) { + mClsEA[i] += clsEA[i]; + mClsEB[i] += clsEB[i]; + mClsA[i] += cntCA[i]; + mClsB[i] += cntCB[i]; + } } if (mDoLumi) { uint32_t tfCountsT = 0; @@ -236,7 +253,8 @@ o2::framework::DataProcessorSpec o2::ctp::reco_workflow::getRawDecoderSpec(bool {"lumi-inp2", o2::framework::VariantType::String, "VBA", {"The second input used for online lumi. Name in capital."}}, {"use-verbose-mode", o2::framework::VariantType::Bool, false, {"Verbose logging"}}, {"max-input-size", o2::framework::VariantType::Int, 0, {"Do not process input if bigger than max size, 0 - do not check"}}, - {"max-input-size-fatal", o2::framework::VariantType::Bool, false, {"If true issue fatal error otherwise error on;y"}}, + {"max-input-size-fatal", o2::framework::VariantType::Bool, false, {"If true issue fatal error otherwise error only"}}, + {"check-consistency", o2::framework::VariantType::Bool, false, {"If true checks digits consistency using ctp config"}}, {"ctpinputs-decoding", o2::framework::VariantType::Bool, false, {"Inputs alignment: true - raw decoder - has to be compatible with CTF decoder: allowed options: 10,01,00"}}}}; } void RawDecoderSpec::updateTimeDependentParams(framework::ProcessingContext& pc) From 769ba3364776be99859990b6523814297d207aec Mon Sep 17 00:00:00 2001 From: swenzel Date: Mon, 19 May 2025 17:58:22 +0200 Subject: [PATCH 0048/1426] Fix units for GeneratorFromEventPool --- Generators/src/GeneratorFromFile.cxx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Generators/src/GeneratorFromFile.cxx b/Generators/src/GeneratorFromFile.cxx index e37a3886c24e1..66f7e03a4cf15 100644 --- a/Generators/src/GeneratorFromFile.cxx +++ b/Generators/src/GeneratorFromFile.cxx @@ -385,6 +385,11 @@ GeneratorFromEventPool::GeneratorFromEventPool(EventPoolGenConfig const& pars) : bool GeneratorFromEventPool::Init() { + // this simply passes tracks trough. Leave units intact. + setTimeUnit(1.); + setPositionUnit(1.); + setEnergyUnit(1.); + // initialize the event pool if (mConfig.rngseed > 0) { mRandomEngine.seed(mConfig.rngseed); @@ -588,4 +593,4 @@ std::vector GeneratorFromEventPool::setupFileUniverse(std::string c ClassImp(o2::eventgen::GeneratorFromEventPool); ClassImp(o2::eventgen::GeneratorFromFile); -ClassImp(o2::eventgen::GeneratorFromO2Kine); \ No newline at end of file +ClassImp(o2::eventgen::GeneratorFromO2Kine); From 0a9fbfa7809b174632895e1e804ab0ae42c0e2f3 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 20 May 2025 10:39:14 +0200 Subject: [PATCH 0049/1426] DPL Analysis: fix ineffective function for Builds<> (#14297) --- Framework/Core/include/Framework/AnalysisManagers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index 1d894b2b67948..dfec2256875c9 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -294,7 +294,7 @@ bool prepareOutput(ProcessingContext& context, T& spawns) } template -bool prepareOuput(ProcessingContext& context, T& builds) +bool prepareOutput(ProcessingContext& context, T& builds) { using metadata = o2::aod::MetadataTrait>::metadata; return builds.template build(builds.pack(), extractOriginals(context)); From 128a030847822127c42ba6e92f606d2f87b55409 Mon Sep 17 00:00:00 2001 From: Sergio Garcia <47090312+singiamtel@users.noreply.github.com> Date: Tue, 20 May 2025 11:19:39 +0200 Subject: [PATCH 0050/1426] Github Actions: Setup dependabot (#14292) Related: https://github.com/AliceO2Group/O2Physics/pull/10660 --- .github/dependabot.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000000..30ad6d8f005b3 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,10 @@ +--- +# Dependabot configuration +# Reference: https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "github-actions" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" From dbf8b73a42164b608f91ed2095f8550788672dea Mon Sep 17 00:00:00 2001 From: Daniel Battistini <60930860+danielbattistini@users.noreply.github.com> Date: Tue, 20 May 2025 18:06:53 +0200 Subject: [PATCH 0051/1426] Fix the thickness of logical volumes for kTurboStaves and kStaggered configurations of the tracker (#14268) --- .../TRK/simulation/include/TRKSimulation/TRKLayer.h | 3 +++ Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKLayer.h b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKLayer.h index ef355ec36ce2f..ba894f6d7a92b 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKLayer.h +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKLayer.h @@ -45,6 +45,9 @@ class TRKLayer void createLayer(TGeoVolume* motherVolume); private: + // TGeo objects outside logical volumes can cause errors. Only used in case of kStaggered and kTurboStaves layouts + static constexpr float mLogicalVolumeThickness = 1; + int mLayerNumber; std::string mLayerName; float mInnerRadius; diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx index e6b00f6e96425..a95418afbba25 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx @@ -120,7 +120,7 @@ TGeoVolume* TRKLayer::createStave(std::string type, double width) staveVol->AddNode(chipVol, 1, nullptr); } else if (type == "staggered") { double width = mModuleWidth * 2; // Each stave has two modules (based on the LOI design) - stave = new TGeoBBox(width / 2, mChipThickness / 2, mZ / 2); + stave = new TGeoBBox(width / 2, mLogicalVolumeThickness / 2, mZ / 2); TGeoVolume* chipVolLeft = createChip("flat", mModuleWidth); TGeoVolume* chipVolRight = createChip("flat", mModuleWidth); staveVol = new TGeoVolume(staveName.c_str(), stave, medAir); @@ -152,7 +152,11 @@ void TRKLayer::createLayer(TGeoVolume* motherVolume) chipName = o2::trk::GeometryTGeo::getTRKChipPattern() + std::to_string(mLayerNumber), sensName = Form("%s%d", GeometryTGeo::getTRKSensorPattern(), mLayerNumber); - TGeoTube* layer = new TGeoTube(mInnerRadius, mInnerRadius + mChipThickness, mZ / 2); + double layerThickness = mChipThickness; + if (mLayout != eLayout::kCylinder) { + layerThickness = mLogicalVolumeThickness; + } + TGeoTube* layer = new TGeoTube(mInnerRadius - 0.333 * layerThickness, mInnerRadius + 0.667 * layerThickness, mZ / 2); TGeoVolume* layerVol = new TGeoVolume(mLayerName.c_str(), layer, medAir); layerVol->SetLineColor(kYellow); From 930d83707083b9916ee60c18fa1680e75f8ff5f8 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 20 May 2025 19:27:52 +0200 Subject: [PATCH 0052/1426] DPL Analysis: move spawner caches outside of the spawner function (#14281) --- .../Core/include/Framework/AnalysisHelpers.h | 12 ++++- .../Core/include/Framework/AnalysisManagers.h | 14 ++++- .../Core/include/Framework/TableBuilder.h | 53 +++++-------------- Framework/Core/src/AODReaderHelpers.cxx | 8 ++- Framework/Core/test/test_TableSpawner.cxx | 7 ++- 5 files changed, 45 insertions(+), 49 deletions(-) diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index 55d2490dff1bc..95be6c7e407b3 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -297,6 +297,7 @@ struct Spawns : decltype(transformBase()) { using extension_t = typename metadata::extension_table_t; using base_table_t = typename metadata::base_table_t; using expression_pack_t = typename metadata::expression_pack_t; + static constexpr size_t N = framework::pack_size(expression_pack_t{}); constexpr auto pack() { @@ -318,7 +319,13 @@ struct Spawns : decltype(transformBase()) { } std::shared_ptr table = nullptr; std::shared_ptr extension = nullptr; + std::array projectors = [](framework::pack) -> std::array + { + return {{std::move(C::Projector())...}}; + } + (expression_pack_t{}); std::shared_ptr projector = nullptr; + std::shared_ptr schema = std::make_shared(o2::soa::createFieldsFromColumns(expression_pack_t{})); }; template @@ -365,6 +372,7 @@ struct Defines : decltype(transformBase()) { std::array projectors; std::shared_ptr projector = nullptr; + std::shared_ptr schema = std::make_shared(o2::soa::createFieldsFromColumns(placeholders_pack_t{})); }; template @@ -828,8 +836,10 @@ template auto Extend(T const& table) { using output_t = Join, o2::aod::Hash<"JOIN/0"_h>, o2::aod::Hash<"JOIN"_h>, Cs...>>; + static std::array projectors{{std::move(Cs::Projector())...}}; static std::shared_ptr projector = nullptr; - return output_t{{o2::framework::spawner(framework::pack{}, {table.asArrowTable()}, "dynamicExtension", projector), table.asArrowTable()}, 0}; + static auto schema = std::make_shared(o2::soa::createFieldsFromColumns(framework::pack{})); + return output_t{{o2::framework::spawner(framework::pack{}, {table.asArrowTable()}, "dynamicExtension", projectors.data(), projector, schema), table.asArrowTable()}, 0}; } /// Template function to attach dynamic columns on-the-fly (e.g. inside diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index dfec2256875c9..2a052c0b07218 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -287,8 +287,13 @@ bool prepareOutput(ProcessingContext& context, T& spawns) using base_table_t = typename T::base_table_t::table_t; originalTable = makeEmptyTable(o2::aod::label()); } + using D = o2::aod::Hash; - spawns.extension = std::make_shared(o2::framework::spawner>(originalTable, o2::aod::label(), spawns.projector)); + spawns.extension = std::make_shared(o2::framework::spawner(originalTable, + o2::aod::label(), + spawns.projectors.data(), + spawns.projector, + spawns.schema)); spawns.table = std::make_shared(soa::ArrowHelpers::joinTables({spawns.extension->asArrowTable(), originalTable}, std::span{T::spawnable_t::table_t::originalLabels})); return true; } @@ -309,8 +314,13 @@ bool prepareOutput(ProcessingContext& context, T& defines) using base_table_t = typename T::base_table_t::table_t; originalTable = makeEmptyTable(o2::aod::label()); } + using D = o2::aod::Hash; - defines.extension = std::make_shared(o2::framework::spawner>(originalTable, o2::aod::label(), defines.projectors.data(), defines.projector)); + defines.extension = std::make_shared(o2::framework::spawner(originalTable, + o2::aod::label(), + defines.projectors.data(), + defines.projector, + defines.schema)); defines.table = std::make_shared(soa::ArrowHelpers::joinTables({defines.extension->asArrowTable(), originalTable}, std::span{T::spawnable_t::table_t::originalLabels})); return true; } diff --git a/Framework/Core/include/Framework/TableBuilder.h b/Framework/Core/include/Framework/TableBuilder.h index e2d12789ef922..f941bf29bd8c8 100644 --- a/Framework/Core/include/Framework/TableBuilder.h +++ b/Framework/Core/include/Framework/TableBuilder.h @@ -768,80 +768,51 @@ std::shared_ptr spawnerHelper(std::shared_ptr const& /// Expression-based column generator to materialize columns template requires(soa::has_configurable_extension::metadata>) -auto spawner(std::vector>&& tables, const char* name, o2::framework::expressions::Projector* projectors, std::shared_ptr& projector) +auto spawner(std::shared_ptr const& fullTable, const char* name, o2::framework::expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) { using placeholders_pack_t = typename o2::aod::MetadataTrait::metadata::placeholders_pack_t; - auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); if (fullTable->num_rows() == 0) { return makeEmptyTable(name, placeholders_pack_t{}); } - static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(placeholders_pack_t{})); - - return spawnerHelper(fullTable, new_schema, framework::pack_size(placeholders_pack_t{}), projectors, name, projector); + return spawnerHelper(fullTable, schema, framework::pack_size(placeholders_pack_t{}), projectors, name, projector); } template requires(soa::has_configurable_extension::metadata>) -auto spawner(std::shared_ptr const& fullTable, const char* name, o2::framework::expressions::Projector* projectors, std::shared_ptr& projector) +auto spawner(std::vector>&& tables, const char* name, o2::framework::expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) { - using placeholders_pack_t = typename o2::aod::MetadataTrait::metadata::placeholders_pack_t; - if (fullTable->num_rows() == 0) { - return makeEmptyTable(name, placeholders_pack_t{}); - } - static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(placeholders_pack_t{})); - - return spawnerHelper(fullTable, new_schema, framework::pack_size(placeholders_pack_t{}), projectors, name, projector); + auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); + return spawner(fullTable, name, projectors, projector, schema); } template requires(soa::has_extension::metadata> && !soa::has_configurable_extension::metadata>) -auto spawner(std::vector>&& tables, const char* name, std::shared_ptr& projector) +auto spawner(std::shared_ptr const& fullTable, const char* name, expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) { using expression_pack_t = typename o2::aod::MetadataTrait::metadata::expression_pack_t; - auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); if (fullTable->num_rows() == 0) { return makeEmptyTable(name, expression_pack_t{}); } - static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(expression_pack_t{})); - - auto projectors = [](framework::pack) -> std::array - { - return {{std::move(C::Projector())...}}; - } - (expression_pack_t{}); - - return spawnerHelper(fullTable, new_schema, framework::pack_size(expression_pack_t{}), projectors.data(), name, projector); + return spawnerHelper(fullTable, schema, framework::pack_size(expression_pack_t{}), projectors, name, projector); } template requires(soa::has_extension::metadata> && !soa::has_configurable_extension::metadata>) -auto spawner(std::shared_ptr const& fullTable, const char* name, std::shared_ptr& projector) +auto spawner(std::vector>&& tables, const char* name, expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) { - using expression_pack_t = typename o2::aod::MetadataTrait::metadata::expression_pack_t; - if (fullTable->num_rows() == 0) { - return makeEmptyTable(name, expression_pack_t{}); - } - static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(expression_pack_t{})); - auto projectors = [](framework::pack) -> std::array - { - return {{std::move(C::Projector())...}}; - } - (expression_pack_t{}); - - return spawnerHelper(fullTable, new_schema, framework::pack_size(expression_pack_t{}), projectors.data(), name, projector); + auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); + return spawner(fullTable, name, projectors, projector, schema); } template -auto spawner(framework::pack columns, std::vector>&& tables, const char* name, std::shared_ptr& projector) +auto spawner(framework::pack, std::vector>&& tables, const char* name, expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) { std::array labels{"original"}; auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{labels}); if (fullTable->num_rows() == 0) { return makeEmptyTable(name, framework::pack{}); } - static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(columns)); - std::array projectors{{std::move(C::Projector())...}}; - return spawnerHelper(fullTable, new_schema, sizeof...(C), projectors.data(), name, projector); + return spawnerHelper(fullTable, schema, sizeof...(C), projectors, name, projector); } template diff --git a/Framework/Core/src/AODReaderHelpers.cxx b/Framework/Core/src/AODReaderHelpers.cxx index c413f2520919d..4dbd2877476be 100644 --- a/Framework/Core/src/AODReaderHelpers.cxx +++ b/Framework/Core/src/AODReaderHelpers.cxx @@ -158,7 +158,13 @@ auto make_spawn(InputSpec const& input, ProcessingContext& pc) using metadata_t = o2::aod::MetadataTrait::metadata; constexpr auto sources = metadata_t::sources; static std::shared_ptr projector = nullptr; - return o2::framework::spawner(extractOriginals(pc), input.binding.c_str(), projector); + static std::shared_ptr schema = std::make_shared(o2::soa::createFieldsFromColumns(typename metadata_t::expression_pack_t{})); + static auto projectors = [](framework::pack) -> std::array + { + return {{std::move(C::Projector())...}}; + } + (typename metadata_t::expression_pack_t{}); + return o2::framework::spawner(extractOriginals(pc), input.binding.c_str(), projectors.data(), projector, schema); } } // namespace diff --git a/Framework/Core/test/test_TableSpawner.cxx b/Framework/Core/test/test_TableSpawner.cxx index 2291ba5f4f787..e200adf37ccb4 100644 --- a/Framework/Core/test/test_TableSpawner.cxx +++ b/Framework/Core/test/test_TableSpawner.cxx @@ -50,10 +50,9 @@ TEST_CASE("TestTableSpawner") auto t1 = b1.finalize(); Points st1{t1}; - std::shared_ptr projector = nullptr; - auto expoints_a = o2::soa::Extend(st1); - auto extension = ExPointsExtension{o2::framework::spawner>(t1, o2::aod::Hash<"ExPoints"_h>::str, projector)}; + Spawns s; + auto extension = ExPointsExtension{o2::framework::spawner>(t1, o2::aod::Hash<"ExPoints"_h>::str, s.projectors.data(), s.projector, s.schema)}; auto expoints = ExPoints{{t1, extension.asArrowTable()}, 0}; REQUIRE(expoints_a.size() == 9); @@ -81,7 +80,7 @@ TEST_CASE("TestTableSpawner") Defines excpts; excpts.projectors[0] = test::x * test::x + test::y * test::y + test::z * test::z; - auto extension_2 = ExcPointsCfgExtension{o2::framework::spawner>({t1}, o2::aod::Hash<"ExcPoints"_h>::str, excpts.projectors.data(), excpts.projector)}; + auto extension_2 = ExcPointsCfgExtension{o2::framework::spawner>({t1}, o2::aod::Hash<"ExcPoints"_h>::str, excpts.projectors.data(), excpts.projector, excpts.schema)}; auto excpoints = ExcPoints{{t1, extension_2.asArrowTable()}, 0}; rex = extension.begin(); From 23781677b66c802d8f8ea8e2dbb390b425d91bec Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 20 May 2025 23:59:09 +0200 Subject: [PATCH 0053/1426] GPU TPC NN Clusterizer: Fix compilation without ONNX --- GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 6c4e60a6025e1..64e6f5a31aaa7 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -1198,6 +1198,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } } for (int32_t i = 0; i < GetProcessingSettings().nTPCClustererLanes; i++) { +#ifdef GPUCA_HAS_ONNX if (GetProcessingSettings().nn.applyNNclusterizer) { LOG(info) << "(ORT) Environment releasing..."; GPUTPCNNClusterizerHost& nnApplication = nnApplications[i]; @@ -1205,6 +1206,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) nnApplication.mModelReg1.release(true); nnApplication.mModelReg2.release(true); } +#endif if (transferRunning[i]) { ReleaseEvent(mEvents->stream[i], doGPU); } From 546f79341f0407ed993b08046b97ef3f7a34e8cd Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 21 May 2025 16:06:32 +0200 Subject: [PATCH 0054/1426] DPL: fix error reporting (#14306) Any oldest possible timeframe message was accounted as error. Maybe we should simply drop the metric... --- Framework/Core/src/DataProcessingDevice.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/Core/src/DataProcessingDevice.cxx b/Framework/Core/src/DataProcessingDevice.cxx index ae25d8d3a915c..c303af4858234 100644 --- a/Framework/Core/src/DataProcessingDevice.cxx +++ b/Framework/Core/src/DataProcessingDevice.cxx @@ -2107,7 +2107,7 @@ void DataProcessingDevice::handleData(ServiceRegistryRef ref, InputChannelInfo& LOGP(debug, "Got DomainInfoHeader, new oldestPossibleTimeslice {} on channel {}", oldestPossibleTimeslice, info.id.value); parts.At(headerIndex).reset(nullptr); parts.At(payloadIndex).reset(nullptr); - } + } break; case InputType::Invalid: { reportError("Invalid part found."); } break; From 981cd40881706e1cf56a2e99d2778c683835153f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 20 May 2025 16:07:57 +0200 Subject: [PATCH 0055/1426] GPU: Add possibility to dump raw data in case of error --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 2 + GPU/GPUTracking/Base/GPUReconstruction.h | 11 + .../Base/GPUReconstructionDebug.cxx | 188 ++++++++++++++++++ .../Base/GPUReconstructionLibrary.cxx | 2 +- GPU/GPUTracking/CMakeLists.txt | 1 + GPU/GPUTracking/Definitions/GPUSettingsList.h | 8 + GPU/GPUTracking/Global/GPUChainTracking.cxx | 28 ++- GPU/GPUTracking/Global/GPUChainTracking.h | 7 +- .../GPUChainTrackingDebugAndProfiling.cxx | 24 ++- GPU/GPUTracking/Global/GPUChainTrackingIO.cxx | 81 ++++---- GPU/GPUTracking/Global/GPUErrorCodes.h | 1 + GPU/GPUTracking/Global/GPUErrors.cxx | 8 +- GPU/GPUTracking/Global/GPUErrors.h | 2 +- 13 files changed, 313 insertions(+), 50 deletions(-) create mode 100644 GPU/GPUTracking/Base/GPUReconstructionDebug.cxx diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index c76bf11c3e25d..a4e5d5e1189f5 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -193,6 +193,7 @@ int32_t GPUReconstruction::Init() } mSlaves[i]->ClearAllocatedMemory(); } + debugInit(); return 0; } @@ -469,6 +470,7 @@ int32_t GPUReconstruction::Exit() if (mInitialized) { ExitDevice(); } + debugExit(); mInitialized = false; return 0; } diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index d5c0b8e828087..e0c866fd9421b 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -239,6 +240,9 @@ class GPUReconstruction virtual void PrintKernelOccupancies() {} double GetStatKernelTime() { return mStatKernelTime; } double GetStatWallTime() { return mStatWallTime; } + void setDebugDumpCallback(std::function&& callback = std::function(nullptr)); + bool triggerDebugDump(); + std::string getDebugFolder(const std::string& prefix = ""); // empty string = no debug // Threading std::shared_ptr mThreading; @@ -407,6 +411,13 @@ class GPUReconstruction }; static std::shared_ptr sLibCUDA, sLibHIP, sLibOCL; + // Debugging + struct debugInternal; + static std::unique_ptr mDebugData; + bool mDebugEnabled = false; + void debugInit(); + void debugExit(); + static GPUReconstruction* GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend& cfg); }; diff --git a/GPU/GPUTracking/Base/GPUReconstructionDebug.cxx b/GPU/GPUTracking/Base/GPUReconstructionDebug.cxx new file mode 100644 index 0000000000000..c1c31eedde1b2 --- /dev/null +++ b/GPU/GPUTracking/Base/GPUReconstructionDebug.cxx @@ -0,0 +1,188 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionDebug.cxx +/// \author David Rohr + +#include "GPUReconstruction.h" +#include "GPULogging.h" +#include "GPUSettings.h" + +#include +#include +#include +#include +#include +#include +#include + +using namespace o2::gpu; + +struct GPUReconstruction::debugInternal { + std::function signalCallback; + std::function debugCallback = nullptr; + std::function reinstallCallback = nullptr; + std::unordered_map oldActions; + size_t debugCount = 0; + static void globalCallback(int32_t signal, siginfo_t* info, void* ucontext) + { + GPUReconstruction::mDebugData->signalCallback(signal, info, ucontext); + } +}; + +std::unique_ptr GPUReconstruction::mDebugData; + +void GPUReconstruction::debugInit() +{ + if (GetProcessingSettings().debugOnFailure) { + static std::mutex initMutex; + { + std::lock_guard guard(initMutex); + if (mDebugData) { + GPUFatal("Error handlers for debug dumps already set, cannot set them again"); + } + mDebugData = std::make_unique(); + } + mDebugEnabled = true; + if ((GetProcessingSettings().debugOnFailure & 1) || (GetProcessingSettings().debugOnFailure & 2)) { + struct sigaction sa, oldsa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = GPUReconstruction::debugInternal::globalCallback; + sa.sa_flags = SA_SIGINFO; + uint32_t mask = GetProcessingSettings().debugOnFailureSignalMask == (uint32_t)-1 ? ((1 << SIGINT) | (1 << SIGABRT) | (1 << SIGBUS) | (1 << SIGTERM) | (1 << SIGSEGV)) : GetProcessingSettings().debugOnFailureSignalMask; + if (mask) { + for (uint32_t i = 0; i < sizeof(mask) * 8; i++) { + if (mask & (1 << i)) { + if (sigaction(i, &sa, &oldsa)) { + GPUFatal("Error installing signal handler for error dump on signal %d", i); + } + mDebugData->oldActions.emplace(i, oldsa); + } + } + } + + mDebugData->signalCallback = [this, &oldActions = mDebugData->oldActions, myAction = std::move(sa)](int32_t signal, siginfo_t* info, void* ucontext) { + static std::mutex callbackMutex; + std::lock_guard guard(callbackMutex); + if (mDebugData->debugCallback) { + GPUInfo("Running debug callback for signal %d", signal); + mDebugData->debugCallback(); + mDebugData->debugCount++; + } + mDebugData->debugCallback = nullptr; + if (!GetProcessingSettings().debugOnFailureNoForwardSignal) { + sigaction(signal, &oldActions[signal], nullptr); + raise(signal); + mDebugData->reinstallCallback = [signal, myAction]() { sigaction(signal, &myAction, nullptr); }; + } + }; + } + } +} + +void GPUReconstruction::debugExit() +{ + if (!mDebugEnabled) { + return; + } + if (mDebugData) { + for (auto& it : mDebugData->oldActions) { + if (sigaction(it.first, &it.second, nullptr)) { + GPUFatal("Error restoring signal handler for signal %d", it.first); + } + } + } + mDebugEnabled = false; +} + +void GPUReconstruction::setDebugDumpCallback(std::function&& callback) +{ + if (mMaster) { + if (mDebugData->reinstallCallback) { + mDebugData->reinstallCallback(); + mDebugData->reinstallCallback = nullptr; + } + mMaster->setDebugDumpCallback(std::move(callback)); + } else if (mDebugEnabled && mDebugData) { + mDebugData->debugCallback = callback; + } +} + +std::string GPUReconstruction::getDebugFolder(const std::string& prefix) +{ + const std::filesystem::path target_dir = GetProcessingSettings().debugOnFailureDirectory; + + std::size_t total_size = 0; + std::size_t subfolder_count = 0; + + if (!std::filesystem::exists(target_dir) || !std::filesystem::is_directory(target_dir)) { + GPUError("Invalid debugOnFailureDirectory %s", GetProcessingSettings().debugOnFailureDirectory.c_str()); + return ""; + } + + for (const auto& entry : std::filesystem::directory_iterator(target_dir)) { + if (entry.is_directory()) { + subfolder_count++; + + for (const auto& subentry : std::filesystem::directory_iterator(entry.path())) { + if (subentry.is_regular_file()) { + std::error_code ec; + auto size = std::filesystem::file_size(subentry.path(), ec); + if (!ec) { + total_size += size; + } + } + } + } + } + + if ((GetProcessingSettings().debugOnFailureMaxFiles && subfolder_count >= GetProcessingSettings().debugOnFailureMaxFiles) || (GetProcessingSettings().debugOnFailureMaxSize && (total_size >> 30) >= GetProcessingSettings().debugOnFailureMaxSize)) { + GPUError("Cannot store debug dump files, target storage exceeded: %zu dumps, %zu bytes", subfolder_count, total_size); + return ""; + } + + auto currentTime = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + std::ostringstream dateTime; + dateTime << std::put_time(std::localtime(¤tTime), "%Y-%m-%d_%H-%M-%S"); + + int32_t attempt = 0; + std::string outname; + while (true) { + if (attempt++ >= 512) { + GPUError("Error creating debug dump folder"); + return ""; + } + + outname = GetProcessingSettings().debugOnFailureDirectory + "/debug_" + prefix + (prefix == "" ? "" : "_") + dateTime.str() + "_" + std::to_string(attempt); + std::error_code ec; + bool created = std::filesystem::create_directory(outname, ec); + if (!ec && created) { + break; + } + } + + GPUInfo("Debug dump to %s", outname.c_str()); + return outname; +} + +bool GPUReconstruction::triggerDebugDump() +{ + if (mMaster) { + return mMaster->triggerDebugDump(); + } else if (mDebugEnabled && mDebugData && mDebugData->debugCallback) { + GPUInfo("Running triggered debug callback"); + mDebugData->debugCallback(); + mDebugData->debugCount++; + mDebugData->debugCallback = nullptr; + return true; + } + return false; +} diff --git a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx index 89517c612403b..64184dd724acd 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx @@ -9,7 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUReconstruction.cxx +/// \file GPUReconstructionLibrary.cxx /// \author David Rohr #ifdef _WIN32 diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 52848692e7516..1b108bc74190d 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -95,6 +95,7 @@ set(SRCS_NO_CINT set(SRCS_NO_H SectorTracker/GPUTPCTrackerDump.cxx Merger/GPUTPCGMMergerDump.cxx Base/GPUReconstructionLibrary.cxx + Base/GPUReconstructionDebug.cxx Global/GPUChainTrackingClusterizer.cxx Global/GPUChainTrackingTransformation.cxx Global/GPUChainTrackingTRD.cxx diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index b9be1db881816..12f40cda4c398 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -360,6 +360,14 @@ AddOption(oclCompileFromSources, bool, false, "", 0, "Compile OpenCL binary from AddOption(oclOverrideSourceBuildFlags, std::string, "", "", 0, "Override OCL build flags for compilation from source, put a space for empty options") AddOption(printSettings, bool, false, "", 0, "Print all settings when initializing") AddOption(tpcFreeAllocatedMemoryAfterProcessing, bool, false, "", 0, "Clean all memory allocated by TPC when TPC processing done, only data written to external output resources will remain") +AddOption(debugOnFailure, int32_t, 0, "", 0, "Dump raw data in case an error occured, bit 1 enables all dumps, otherwise bitmask for: 2 = signal, 3 = GPUErrorCode", def(1)) +AddOption(debugOnFailureSignalMask, uint32_t, (uint32_t)-1, "", 0, "Mask of signals that trigger debug / dump") +AddOption(debugOnFailureErrorMask, uint64_t, (uint64_t)-1, "", 0, "Mask of GPUCA_ERRORS that trigger debug / dump") +AddOption(debugOnFailureNoForwardSignal, bool, false, "", 0, "Do not forward signal to original signal handler") +AddOption(debugOnFailureMaxN, uint32_t, 1, "", 0, "Max number of times to run the debug / dump") +AddOption(debugOnFailureMaxFiles, uint32_t, 0, "", 0, "Max number of files to have in the target folder") +AddOption(debugOnFailureMaxSize, uint32_t, 0, "", 0, "Max size of existing dumps in the target folder in GB") +AddOption(debugOnFailureDirectory, std::string, ".", "", 0, "Target folder for debug / dump") AddVariable(eventDisplay, o2::gpu::GPUDisplayFrontendInterface*, nullptr) AddSubConfig(GPUSettingsProcessingRTC, rtc) AddSubConfig(GPUSettingsProcessingRTCtechnical, rtctech) diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index c1c3e368ce90c..db84050772312 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -705,10 +705,14 @@ int32_t GPUChainTracking::RunChain() } mRec->getGeneralStepTimer(GeneralStep::Prepare).Stop(); - PrepareDebugOutput(); + PrepareKernelDebugOutput(); SynchronizeStream(0); // Synchronize all init copies that might be ongoing + if (GetProcessingSettings().debugOnFailure) { + mRec->setDebugDumpCallback([this]() { DoDebugRawDump(); }); + } + if (mIOPtrs.tpcCompressedClusters) { if (runRecoStep(RecoStep::TPCDecompression, &GPUChainTracking::RunTPCDecompression)) { return 1; @@ -775,7 +779,7 @@ int32_t GPUChainTracking::RunChain() } int32_t retVal = 0; - if (CheckErrorCodes(false, false, mRec->getErrorCodeOutput())) { + if (CheckErrorCodes(false, false, mRec->getErrorCodeOutput())) { // TODO: Eventually, we should use GPUReconstruction::CheckErrorCodes retVal = 3; if (!GetProcessingSettings().ignoreNonFatalGPUErrors) { return retVal; @@ -815,7 +819,7 @@ int32_t GPUChainTracking::RunChainFinalize() PrintOutputStat(); } - PrintDebugOutput(); + PrintKernelDebugOutput(); // PrintMemoryRelations(); @@ -884,6 +888,7 @@ int32_t GPUChainTracking::FinalizePipelinedProcessing() int32_t GPUChainTracking::CheckErrorCodes(bool cpuOnly, bool forceShowErrors, std::vector>* fillErrors) { int32_t retVal = 0; + bool hasDebugError = false; for (int32_t i = 0; i < 1 + (!cpuOnly && mRec->IsGPU()); i++) { if (i) { const auto& threadContext = GetThreadContext(); @@ -925,9 +930,26 @@ int32_t GPUChainTracking::CheckErrorCodes(bool cpuOnly, bool forceShowErrors, st fillErrors->emplace_back(std::array{pErrors[4 * j], pErrors[4 * j + 1], pErrors[4 * j + 2], pErrors[4 * j + 3]}); } } + if ((GetProcessingSettings().debugOnFailure & 1) || (GetProcessingSettings().debugOnFailure & 4)) { + if (GetProcessingSettings().debugOnFailureErrorMask == (uint64_t)-1) { + hasDebugError = true; + } else { + uint32_t nErrors = processors()->errorCodes.getNErrors(); + const uint32_t* pErrors = processors()->errorCodes.getErrorPtr(); + for (uint32_t j = 0; j < nErrors; j++) { + if (GetProcessingSettings().debugOnFailureErrorMask & (1 << pErrors[4 * j])) { + hasDebugError = true; + break; + } + } + } + } } } ClearErrorCodes(cpuOnly); + if (hasDebugError) { + mRec->triggerDebugDump(); + } return retVal; } diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 2a2996895dbcf..7d4adcd70af7f 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -134,7 +134,7 @@ class GPUChainTracking : public GPUChain void ClearIOPointers(); void AllocateIOMemory(); using GPUChain::DumpData; - void DumpData(const char* filename); + void DumpData(const char* filename, const GPUTrackingInOutPointers* ioPtrs = nullptr); using GPUChain::ReadData; int32_t ReadData(const char* filename); void DumpSettings(const char* dir = "") override; @@ -231,11 +231,12 @@ class GPUChainTracking : public GPUChain int32_t DoProfile(); void PrintMemoryRelations(); void PrintMemoryStatistics() override; - void PrepareDebugOutput(); - void PrintDebugOutput(); + void PrepareKernelDebugOutput(); + void PrintKernelDebugOutput(); void PrintOutputStat(); static void DumpClusters(std::ostream& out, const o2::tpc::ClusterNativeAccess* clusters); static void DebugSortCompressedClusters(o2::tpc::CompressedClustersFlat* cls); + void DoDebugRawDump(); bool ValidateSteps(); bool ValidateSettings(); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index f72943e6bcd5a..e9721ec9d12bf 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -185,7 +185,7 @@ void GPUChainTracking::PrintMemoryRelations() GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NOutputTrackClusters()); } -void GPUChainTracking::PrepareDebugOutput() +void GPUChainTracking::PrepareKernelDebugOutput() { #ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT const auto& threadContext = GetThreadContext(); @@ -198,7 +198,7 @@ void GPUChainTracking::PrepareDebugOutput() #endif } -void GPUChainTracking::PrintDebugOutput() +void GPUChainTracking::PrintKernelDebugOutput() { #ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT const auto& threadContext = GetThreadContext(); @@ -390,3 +390,23 @@ void GPUChainTracking::DebugSortCompressedClusters(o2::tpc::CompressedClustersFl sortMultiple(c.nAttachedClustersReduced, getReducedOffset, getN1, c.rowDiffA, c.sliceLegDiffA, c.padResA, c.timeResA); sortMultiple(c.nTracks, getIndex, get1, c.qPtA, c.rowA, c.sliceA, c.timeA, c.padA, c.nTrackClusters); // NOTE: This must be last, since nTrackClusters is used for handling the arrays above! } + +void GPUChainTracking::DoDebugRawDump() +{ + std::string dirName = mRec->getDebugFolder("tpc_raw"); + if (dirName == "") { + return; + } + GPUTrackingInOutPointers ioPtrs; + if (mIOPtrs.tpcZS) { + ioPtrs.tpcZS = mIOPtrs.tpcZS; + } else if (mIOPtrs.tpcPackedDigits) { + ioPtrs.tpcPackedDigits = mIOPtrs.tpcPackedDigits; + } else if (mIOPtrs.clustersNative) { + ioPtrs.clustersNative = mIOPtrs.clustersNative; + } + + GPUInfo("Doing debug raw dump"); + mRec->DumpSettings((dirName + "/").c_str()); + DumpData((dirName + "/event.0.dump").c_str(), &ioPtrs); +} diff --git a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx index 035e257ca7952..5a141cd08eb65 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx @@ -63,33 +63,36 @@ GPUChainTracking::InOutMemory::~InOutMemory() = default; GPUChainTracking::InOutMemory::InOutMemory(GPUChainTracking::InOutMemory&&) = default; GPUChainTracking::InOutMemory& GPUChainTracking::InOutMemory::operator=(GPUChainTracking::InOutMemory&&) = default; // NOLINT: False positive in clang-tidy -void GPUChainTracking::DumpData(const char* filename) +void GPUChainTracking::DumpData(const char* filename, const GPUTrackingInOutPointers* ioPtrs) { FILE* fp = fopen(filename, "w+b"); if (fp == nullptr) { return; } + if (ioPtrs == nullptr) { + ioPtrs = &mIOPtrs; + } fwrite(DUMP_HEADER, 1, DUMP_HEADER_SIZE, fp); fwrite(&GPUReconstruction::geometryType, sizeof(GPUReconstruction::geometryType), 1, fp); - DumpData(fp, mIOPtrs.clusterData, mIOPtrs.nClusterData, InOutPointerType::CLUSTER_DATA); - DumpData(fp, mIOPtrs.rawClusters, mIOPtrs.nRawClusters, InOutPointerType::RAW_CLUSTERS); - if (mIOPtrs.clustersNative) { - if (DumpData(fp, &mIOPtrs.clustersNative->clustersLinear, &mIOPtrs.clustersNative->nClustersTotal, InOutPointerType::CLUSTERS_NATIVE)) { - fwrite(&mIOPtrs.clustersNative->nClusters[0][0], sizeof(mIOPtrs.clustersNative->nClusters[0][0]), NSECTORS * GPUCA_ROW_COUNT, fp); - if (mIOPtrs.clustersNative->clustersMCTruth) { - const auto& buffer = mIOPtrs.clustersNative->clustersMCTruth->getBuffer(); + DumpData(fp, ioPtrs->clusterData, ioPtrs->nClusterData, InOutPointerType::CLUSTER_DATA); + DumpData(fp, ioPtrs->rawClusters, ioPtrs->nRawClusters, InOutPointerType::RAW_CLUSTERS); + if (ioPtrs->clustersNative) { + if (DumpData(fp, &ioPtrs->clustersNative->clustersLinear, &ioPtrs->clustersNative->nClustersTotal, InOutPointerType::CLUSTERS_NATIVE)) { + fwrite(&ioPtrs->clustersNative->nClusters[0][0], sizeof(ioPtrs->clustersNative->nClusters[0][0]), NSECTORS * GPUCA_ROW_COUNT, fp); + if (ioPtrs->clustersNative->clustersMCTruth) { + const auto& buffer = ioPtrs->clustersNative->clustersMCTruth->getBuffer(); std::pair tmp = {buffer.data(), buffer.size()}; DumpData(fp, &tmp.first, &tmp.second, InOutPointerType::CLUSTER_NATIVE_MC); } } } - if (mIOPtrs.tpcPackedDigits) { - if (DumpData(fp, mIOPtrs.tpcPackedDigits->tpcDigits, mIOPtrs.tpcPackedDigits->nTPCDigits, InOutPointerType::TPC_DIGIT) && mIOPtrs.tpcPackedDigits->tpcDigitsMC) { + if (ioPtrs->tpcPackedDigits) { + if (DumpData(fp, ioPtrs->tpcPackedDigits->tpcDigits, ioPtrs->tpcPackedDigits->nTPCDigits, InOutPointerType::TPC_DIGIT) && ioPtrs->tpcPackedDigits->tpcDigitsMC) { const char* ptrs[NSECTORS]; size_t sizes[NSECTORS]; for (uint32_t i = 0; i < NSECTORS; i++) { - if (mIOPtrs.tpcPackedDigits->tpcDigitsMC->v[i]) { - const auto& buffer = mIOPtrs.tpcPackedDigits->tpcDigitsMC->v[i]->getBuffer(); + if (ioPtrs->tpcPackedDigits->tpcDigitsMC->v[i]) { + const auto& buffer = ioPtrs->tpcPackedDigits->tpcDigitsMC->v[i]->getBuffer(); ptrs[i] = buffer.data(); sizes[i] = buffer.size(); } else { @@ -100,12 +103,12 @@ void GPUChainTracking::DumpData(const char* filename) DumpData(fp, ptrs, sizes, InOutPointerType::TPC_DIGIT_MC); } } - if (mIOPtrs.tpcZS) { + if (ioPtrs->tpcZS) { size_t total = 0; for (int32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[i].count[j]; k++) { - total += mIOPtrs.tpcZS->sector[i].nZSPtr[j][k]; + for (uint32_t k = 0; k < ioPtrs->tpcZS->sector[i].count[j]; k++) { + total += ioPtrs->tpcZS->sector[i].nZSPtr[j][k]; } } } @@ -115,10 +118,10 @@ void GPUChainTracking::DumpData(const char* filename) total = 0; for (int32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[i].count[j]; k++) { - memcpy(&ptr[total * TPCZSHDR::TPC_ZS_PAGE_SIZE], mIOPtrs.tpcZS->sector[i].zsPtr[j][k], mIOPtrs.tpcZS->sector[i].nZSPtr[j][k] * TPCZSHDR::TPC_ZS_PAGE_SIZE); - counts.count[i][j] += mIOPtrs.tpcZS->sector[i].nZSPtr[j][k]; - total += mIOPtrs.tpcZS->sector[i].nZSPtr[j][k]; + for (uint32_t k = 0; k < ioPtrs->tpcZS->sector[i].count[j]; k++) { + memcpy(&ptr[total * TPCZSHDR::TPC_ZS_PAGE_SIZE], ioPtrs->tpcZS->sector[i].zsPtr[j][k], ioPtrs->tpcZS->sector[i].nZSPtr[j][k] * TPCZSHDR::TPC_ZS_PAGE_SIZE); + counts.count[i][j] += ioPtrs->tpcZS->sector[i].nZSPtr[j][k]; + total += ioPtrs->tpcZS->sector[i].nZSPtr[j][k]; } } } @@ -127,33 +130,33 @@ void GPUChainTracking::DumpData(const char* filename) fwrite(&counts, sizeof(counts), 1, fp); } } - if (mIOPtrs.tpcCompressedClusters) { - if (mIOPtrs.tpcCompressedClusters->ptrForward) { + if (ioPtrs->tpcCompressedClusters) { + if (ioPtrs->tpcCompressedClusters->ptrForward) { throw std::runtime_error("Cannot dump non-flat compressed clusters"); } - char* ptr = (char*)mIOPtrs.tpcCompressedClusters; - size_t size = mIOPtrs.tpcCompressedClusters->totalDataSize; + char* ptr = (char*)ioPtrs->tpcCompressedClusters; + size_t size = ioPtrs->tpcCompressedClusters->totalDataSize; DumpData(fp, &ptr, &size, InOutPointerType::TPC_COMPRESSED_CL); } - if (mIOPtrs.settingsTF) { + if (ioPtrs->settingsTF) { uint32_t n = 1; - DumpData(fp, &mIOPtrs.settingsTF, &n, InOutPointerType::TF_SETTINGS); + DumpData(fp, &ioPtrs->settingsTF, &n, InOutPointerType::TF_SETTINGS); } - DumpData(fp, mIOPtrs.sectorTracks, mIOPtrs.nSectorTracks, InOutPointerType::SECTOR_OUT_TRACK); - DumpData(fp, mIOPtrs.sectorClusters, mIOPtrs.nSectorClusters, InOutPointerType::SECTOR_OUT_CLUSTER); - DumpData(fp, &mIOPtrs.mcLabelsTPC, &mIOPtrs.nMCLabelsTPC, InOutPointerType::MC_LABEL_TPC); - DumpData(fp, &mIOPtrs.mcInfosTPC, &mIOPtrs.nMCInfosTPC, InOutPointerType::MC_INFO_TPC); - DumpData(fp, &mIOPtrs.mcInfosTPCCol, &mIOPtrs.nMCInfosTPCCol, InOutPointerType::MC_INFO_TPC); - DumpData(fp, &mIOPtrs.mergedTracks, &mIOPtrs.nMergedTracks, InOutPointerType::MERGED_TRACK); - DumpData(fp, &mIOPtrs.mergedTrackHits, &mIOPtrs.nMergedTrackHits, InOutPointerType::MERGED_TRACK_HIT); - DumpData(fp, &mIOPtrs.trdTracks, &mIOPtrs.nTRDTracks, InOutPointerType::TRD_TRACK); - DumpData(fp, &mIOPtrs.trdTracklets, &mIOPtrs.nTRDTracklets, InOutPointerType::TRD_TRACKLET); - if (mIOPtrs.trdSpacePoints) { - DumpData(fp, &mIOPtrs.trdSpacePoints, &mIOPtrs.nTRDTracklets, InOutPointerType::TRD_SPACEPOINT); + DumpData(fp, ioPtrs->sectorTracks, ioPtrs->nSectorTracks, InOutPointerType::SECTOR_OUT_TRACK); + DumpData(fp, ioPtrs->sectorClusters, ioPtrs->nSectorClusters, InOutPointerType::SECTOR_OUT_CLUSTER); + DumpData(fp, &ioPtrs->mcLabelsTPC, &ioPtrs->nMCLabelsTPC, InOutPointerType::MC_LABEL_TPC); + DumpData(fp, &ioPtrs->mcInfosTPC, &ioPtrs->nMCInfosTPC, InOutPointerType::MC_INFO_TPC); + DumpData(fp, &ioPtrs->mcInfosTPCCol, &ioPtrs->nMCInfosTPCCol, InOutPointerType::MC_INFO_TPC); + DumpData(fp, &ioPtrs->mergedTracks, &ioPtrs->nMergedTracks, InOutPointerType::MERGED_TRACK); + DumpData(fp, &ioPtrs->mergedTrackHits, &ioPtrs->nMergedTrackHits, InOutPointerType::MERGED_TRACK_HIT); + DumpData(fp, &ioPtrs->trdTracks, &ioPtrs->nTRDTracks, InOutPointerType::TRD_TRACK); + DumpData(fp, &ioPtrs->trdTracklets, &ioPtrs->nTRDTracklets, InOutPointerType::TRD_TRACKLET); + if (ioPtrs->trdSpacePoints) { + DumpData(fp, &ioPtrs->trdSpacePoints, &ioPtrs->nTRDTracklets, InOutPointerType::TRD_SPACEPOINT); } - DumpData(fp, &mIOPtrs.trdTriggerTimes, &mIOPtrs.nTRDTriggerRecords, InOutPointerType::TRD_TRIGGERRECORDS); - DumpData(fp, &mIOPtrs.trdTrackletIdxFirst, &mIOPtrs.nTRDTriggerRecords, InOutPointerType::TRD_TRIGGERRECORDS); - DumpData(fp, &mIOPtrs.trdTrigRecMask, &mIOPtrs.nTRDTriggerRecords, InOutPointerType::TRD_TRIGGERRECORDS); + DumpData(fp, &ioPtrs->trdTriggerTimes, &ioPtrs->nTRDTriggerRecords, InOutPointerType::TRD_TRIGGERRECORDS); + DumpData(fp, &ioPtrs->trdTrackletIdxFirst, &ioPtrs->nTRDTriggerRecords, InOutPointerType::TRD_TRIGGERRECORDS); + DumpData(fp, &ioPtrs->trdTrigRecMask, &ioPtrs->nTRDTriggerRecords, InOutPointerType::TRD_TRIGGERRECORDS); fclose(fp); } diff --git a/GPU/GPUTracking/Global/GPUErrorCodes.h b/GPU/GPUTracking/Global/GPUErrorCodes.h index f35f5fc81a382..8fec23be00a09 100644 --- a/GPU/GPUTracking/Global/GPUErrorCodes.h +++ b/GPU/GPUTracking/Global/GPUErrorCodes.h @@ -47,5 +47,6 @@ GPUCA_ERROR_CODE(26, ERROR_TPCZS_INVALID_ROW, SectorRow) GPUCA_ERROR_CODE(27, ERROR_TPCZS_INVALID_NADC, SectorCRU, SamplesInPage, SamplesWritten) // Invalid number of ADC samples in header, existing samples were decoded GPUCA_ERROR_CODE(28, ERROR_TPCZS_INCOMPLETE_HBF, SectorCRU, PacketCount, NextPacketCount) // Part of HBF is missing, decoding incomplete GPUCA_ERROR_CODE(29, ERROR_TPCZS_INVALID_OFFSET, SectorEndpoint, Value, Expected) // Raw page is skipped since it contains invalid payload offset +GPUCA_ERROR_CODE(29, MAX_GPUCA_ERROR_NUMBER) // #define GPUCA_CHECK_TPCZS_CORRUPTION diff --git a/GPU/GPUTracking/Global/GPUErrors.cxx b/GPU/GPUTracking/Global/GPUErrors.cxx index 7f3ed1d8206d9..e9d5a74c6567a 100644 --- a/GPU/GPUTracking/Global/GPUErrors.cxx +++ b/GPU/GPUTracking/Global/GPUErrors.cxx @@ -54,12 +54,17 @@ static std::unordered_map errorNames = { #undef GPUCA_ERROR_CODE }; -void GPUErrors::printErrors(bool silent) +bool GPUErrors::printErrors(bool silent, uint64_t mask) { + bool retVal = 0; for (uint32_t i = 0; i < std::min(*mErrors, GPUCA_MAX_ERRORS); i++) { uint32_t errorCode = mErrors[4 * i + 1]; const auto& it = errorNames.find(errorCode); const char* errorName = it == errorNames.end() ? "INVALID ERROR CODE" : it->second; + static_assert(MAX_GPUCA_ERROR_NUMBER <= sizeof(mask) * 8); + if (mask & (1 << errorCode)) { + retVal = 1; + } if (silent && i) { GPUWarning("GPU Error Code (%u:%u) %s : %u / %u / %u", i, errorCode, errorName, mErrors[4 * i + 2], mErrors[4 * i + 3], mErrors[4 * i + 4]); } else if (silent) { @@ -75,6 +80,7 @@ void GPUErrors::printErrors(bool silent) GPUError("Additional errors occured (codes not stored)"); } } + return retVal; } uint32_t GPUErrors::getNErrors() const diff --git a/GPU/GPUTracking/Global/GPUErrors.h b/GPU/GPUTracking/Global/GPUErrors.h index cd86390bc1b01..1cbc4a019601d 100644 --- a/GPU/GPUTracking/Global/GPUErrors.h +++ b/GPU/GPUTracking/Global/GPUErrors.h @@ -33,7 +33,7 @@ class GPUErrors GPUd() bool hasError() { return *mErrors > 0; } void setMemory(GPUglobalref() uint32_t* m) { mErrors = m; } void clear(); - void printErrors(bool silent = false); + bool printErrors(bool silent = false, uint64_t mask = 0); uint32_t getNErrors() const; const uint32_t* getErrorPtr() const; static uint32_t getMaxErrors(); From bb048efab7be5df04ad93a974abef167f4c6e88a Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Wed, 21 May 2025 19:41:52 +0200 Subject: [PATCH 0056/1426] DPL Analysis: add `clamp` expression node to constrain a result of an expresison between two values (#14305) --- Framework/Core/include/Framework/Expressions.h | 8 ++++++++ Framework/Core/test/test_Expressions.cxx | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/Framework/Core/include/Framework/Expressions.h b/Framework/Core/include/Framework/Expressions.h index 18c930700a91d..9d6c3cfb7c66e 100644 --- a/Framework/Core/include/Framework/Expressions.h +++ b/Framework/Core/include/Framework/Expressions.h @@ -546,6 +546,14 @@ inline Node updateParameters(Node const& pexp, int bins, std::vector const& p return result; } +/// clamping functional +template +inline Node clamp(Node&& expr, T low, T hi) +{ + auto copy = expr; + return ifnode(Node{copy} < LiteralNode{low}, LiteralNode{low}, ifnode(Node{copy} > LiteralNode{hi}, LiteralNode{hi}, Node{copy})); +} + /// A struct, containing the root of the expression tree struct Filter { Filter() = default; diff --git a/Framework/Core/test/test_Expressions.cxx b/Framework/Core/test/test_Expressions.cxx index 6faa2fc352232..e8cf43e03e11d 100644 --- a/Framework/Core/test/test_Expressions.cxx +++ b/Framework/Core/test/test_Expressions.cxx @@ -290,6 +290,13 @@ TEST_CASE("TestConditionalExpressions") auto gandiva_condition2 = makeCondition(gandiva_tree2); auto gandiva_filter2 = createFilter(schema2, gandiva_condition2); REQUIRE(gandiva_tree2->ToString() == "bool greater_than((float) fSigned1Pt, (const float) 0 raw(0)) && if (bool less_than(float absf((float) fEta), (const float) 1 raw(3f800000)) && if (bool less_than((float) fPt, (const float) 1 raw(3f800000))) { bool greater_than((float) fPhi, (const float) 1.5708 raw(3fc90fdb)) } else { bool less_than((float) fPhi, (const float) 1.5708 raw(3fc90fdb)) }) { bool greater_than(float absf((float) fX), (const float) 1 raw(3f800000)) } else { bool greater_than(float absf((float) fY), (const float) 1 raw(3f800000)) }"); + + // clamp + Projector clp = clamp(o2::aod::track::pt, 1.0f, 10.f); + auto clpspecs = createOperations(clp); + auto schemaclp = std::make_shared(std::vector{o2::aod::track::Pt::asArrowField()}); + auto gandiva_tree_clp = createExpressionTree(clpspecs, schemaclp); + REQUIRE(gandiva_tree_clp->ToString() == "if (bool less_than((float) fPt, (const float) 1 raw(3f800000))) { (const float) 1 raw(3f800000) } else { if (bool greater_than((float) fPt, (const float) 10 raw(41200000))) { (const float) 10 raw(41200000) } else { (float) fPt } }"); } TEST_CASE("TestBinnedExpressions") From 35ca22b3bde5014b40ebe1d823c6dc88a313ddbf Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 21 May 2025 19:54:16 +0200 Subject: [PATCH 0057/1426] DPL: fix merging of pipelined devices (#14307) Sometimes we are just too smart. Multiple messages with the same signature are coalesced in the same input if they are processed at the same time. This explains why the sleep was improving behavior: it merely staggers arrival, so that the optimisation cannot happen anymore. --- .../AnalysisSupport/src/AODWriterHelpers.cxx | 254 +++++++++--------- 1 file changed, 129 insertions(+), 125 deletions(-) diff --git a/Framework/AnalysisSupport/src/AODWriterHelpers.cxx b/Framework/AnalysisSupport/src/AODWriterHelpers.cxx index 2b1b4f880d1ee..40d2189ea96d0 100644 --- a/Framework/AnalysisSupport/src/AODWriterHelpers.cxx +++ b/Framework/AnalysisSupport/src/AODWriterHelpers.cxx @@ -269,145 +269,149 @@ AlgorithmSpec AODWriterHelpers::getOutputObjHistWriter(ConfigContext const& ctx) callbacks.set(endofdatacb); return [inputObjects, objmap, tskmap](ProcessingContext& pc) mutable -> void { - auto const& ref = pc.inputs().get("x"); - if (!ref.header) { - LOG(error) << "Header not found"; - return; - } - auto datah = o2::header::get(ref.header); - if (!datah) { - LOG(error) << "No data header in stack"; - return; - } + auto mergePart = [&inputObjects, &objmap, &tskmap](DataRef const& ref) { + if (!ref.header) { + LOG(error) << "Header not found"; + return; + } + auto datah = o2::header::get(ref.header); + if (!datah) { + LOG(error) << "No data header in stack"; + return; + } - if (!ref.payload) { - LOGP(error, "Payload not found for {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); - return; - } + if (!ref.payload) { + LOGP(error, "Payload not found for {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); + return; + } - auto objh = o2::header::get(ref.header); - if (!objh) { - LOGP(error, "No output object header in stack of {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); - return; - } + auto objh = o2::header::get(ref.header); + if (!objh) { + LOGP(error, "No output object header in stack of {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); + return; + } - InputObject obj; - FairInputTBuffer tm(const_cast(ref.payload), static_cast(datah->payloadSize)); - tm.InitMap(); - obj.kind = tm.ReadClass(); - tm.SetBufferOffset(0); - tm.ResetMap(); - if (obj.kind == nullptr) { - LOGP(error, "Cannot read class info from buffer of {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); - return; - } + InputObject obj; + FairInputTBuffer tm(const_cast(ref.payload), static_cast(datah->payloadSize)); + tm.InitMap(); + obj.kind = tm.ReadClass(); + tm.SetBufferOffset(0); + tm.ResetMap(); + if (obj.kind == nullptr) { + LOGP(error, "Cannot read class info from buffer of {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); + return; + } - auto policy = objh->mPolicy; - auto sourceType = objh->mSourceType; - auto hash = objh->mTaskHash; + auto policy = objh->mPolicy; + auto sourceType = objh->mSourceType; + auto hash = objh->mTaskHash; - obj.obj = tm.ReadObjectAny(obj.kind); - auto* named = static_cast(obj.obj); - obj.name = named->GetName(); - auto hpos = std::find_if(tskmap.begin(), tskmap.end(), [&](auto&& x) { return x.id == hash; }); - if (hpos == tskmap.end()) { - LOG(error) << "No task found for hash " << hash; - return; - } - auto taskname = hpos->name; - auto opos = std::find_if(objmap.begin(), objmap.end(), [&](auto&& x) { return x.id == hash; }); - if (opos == objmap.end()) { - LOG(error) << "No object list found for task " << taskname << " (hash=" << hash << ")"; - return; - } - auto objects = opos->bindings; - if (std::find(objects.begin(), objects.end(), obj.name) == objects.end()) { - LOG(error) << "No object " << obj.name << " in map for task " << taskname; - return; - } - auto nameHash = runtime_hash(obj.name.c_str()); - InputObjectRoute key{obj.name, nameHash, taskname, hash, policy, sourceType}; - auto existing = std::find_if(inputObjects->begin(), inputObjects->end(), [&](auto&& x) { return (x.first.uniqueId == nameHash) && (x.first.taskHash == hash); }); - // If it's the first one, we just add it to the list. - if (existing == inputObjects->end()) { - obj.count = objh->mPipelineSize; - inputObjects->push_back(std::make_pair(key, obj)); - existing = inputObjects->end() - 1; - } else { - obj.count = existing->second.count; - // Otherwise, we merge it with the existing one. - auto merger = existing->second.kind->GetMerge(); - if (!merger) { - LOG(error) << "Already one unmergeable object found for " << obj.name; + obj.obj = tm.ReadObjectAny(obj.kind); + auto* named = static_cast(obj.obj); + obj.name = named->GetName(); + auto hpos = std::find_if(tskmap.begin(), tskmap.end(), [&](auto&& x) { return x.id == hash; }); + if (hpos == tskmap.end()) { + LOG(error) << "No task found for hash " << hash; return; } - TList coll; - coll.Add(static_cast(obj.obj)); - merger(existing->second.obj, &coll, nullptr); - } - // We expect as many objects as the pipeline size, for - // a given object name and task hash. - existing->second.count -= 1; - - if (existing->second.count != 0) { - return; - } - // Write the object here. - auto route = existing->first; - auto entry = existing->second; - auto file = ROOTfileNames.find(route.policy); - if (file == ROOTfileNames.end()) { - return; - } - auto filename = file->second; - if (f[route.policy] == nullptr) { - f[route.policy] = TFile::Open(filename.c_str(), "RECREATE"); - } - auto nextDirectory = route.directory; - if ((nextDirectory != currentDirectory) || (filename != currentFile)) { - if (!f[route.policy]->FindKey(nextDirectory.c_str())) { - f[route.policy]->mkdir(nextDirectory.c_str()); + auto taskname = hpos->name; + auto opos = std::find_if(objmap.begin(), objmap.end(), [&](auto&& x) { return x.id == hash; }); + if (opos == objmap.end()) { + LOG(error) << "No object list found for task " << taskname << " (hash=" << hash << ")"; + return; } - currentDirectory = nextDirectory; - currentFile = filename; - } + auto objects = opos->bindings; + if (std::find(objects.begin(), objects.end(), obj.name) == objects.end()) { + LOG(error) << "No object " << obj.name << " in map for task " << taskname; + return; + } + auto nameHash = runtime_hash(obj.name.c_str()); + InputObjectRoute key{obj.name, nameHash, taskname, hash, policy, sourceType}; + auto existing = std::find_if(inputObjects->begin(), inputObjects->end(), [&](auto&& x) { return (x.first.uniqueId == nameHash) && (x.first.taskHash == hash); }); + // If it's the first one, we just add it to the list. + if (existing == inputObjects->end()) { + obj.count = objh->mPipelineSize; + inputObjects->push_back(std::make_pair(key, obj)); + existing = inputObjects->end() - 1; + } else { + obj.count = existing->second.count; + // Otherwise, we merge it with the existing one. + auto merger = existing->second.kind->GetMerge(); + if (!merger) { + LOG(error) << "Already one unmergeable object found for " << obj.name; + return; + } + TList coll; + coll.Add(static_cast(obj.obj)); + merger(existing->second.obj, &coll, nullptr); + } + // We expect as many objects as the pipeline size, for + // a given object name and task hash. + existing->second.count -= 1; - // translate the list-structure created by the registry into a directory structure within the file - std::function writeListToFile; - writeListToFile = [&](TList* list, TDirectory* parentDir) { - TIter next(list); - TObject* object = nullptr; - while ((object = next())) { - if (object->InheritsFrom(TList::Class())) { - writeListToFile(static_cast(object), parentDir->mkdir(object->GetName(), object->GetName(), true)); - } else { - parentDir->WriteObjectAny(object, object->Class(), object->GetName()); - auto* written = list->Remove(object); - delete written; + if (existing->second.count != 0) { + return; + } + // Write the object here. + auto route = existing->first; + auto entry = existing->second; + auto file = ROOTfileNames.find(route.policy); + if (file == ROOTfileNames.end()) { + return; + } + auto filename = file->second; + if (f[route.policy] == nullptr) { + f[route.policy] = TFile::Open(filename.c_str(), "RECREATE"); + } + auto nextDirectory = route.directory; + if ((nextDirectory != currentDirectory) || (filename != currentFile)) { + if (!f[route.policy]->FindKey(nextDirectory.c_str())) { + f[route.policy]->mkdir(nextDirectory.c_str()); } + currentDirectory = nextDirectory; + currentFile = filename; } - }; - TDirectory* currentDir = f[route.policy]->GetDirectory(currentDirectory.c_str()); - if (route.sourceType == OutputObjSourceType::HistogramRegistrySource) { - auto* outputList = static_cast(entry.obj); - outputList->SetOwner(false); + // translate the list-structure created by the registry into a directory structure within the file + std::function writeListToFile; + writeListToFile = [&](TList* list, TDirectory* parentDir) { + TIter next(list); + TObject* object = nullptr; + while ((object = next())) { + if (object->InheritsFrom(TList::Class())) { + writeListToFile(static_cast(object), parentDir->mkdir(object->GetName(), object->GetName(), true)); + } else { + parentDir->WriteObjectAny(object, object->Class(), object->GetName()); + auto* written = list->Remove(object); + delete written; + } + } + }; + + TDirectory* currentDir = f[route.policy]->GetDirectory(currentDirectory.c_str()); + if (route.sourceType == OutputObjSourceType::HistogramRegistrySource) { + auto* outputList = static_cast(entry.obj); + outputList->SetOwner(false); + + // if registry should live in dedicated folder a TNamed object is appended to the list + if (outputList->Last() && outputList->Last()->IsA() == TNamed::Class()) { + delete outputList->Last(); + outputList->RemoveLast(); + currentDir = currentDir->mkdir(outputList->GetName(), outputList->GetName(), true); + } - // if registry should live in dedicated folder a TNamed object is appended to the list - if (outputList->Last() && outputList->Last()->IsA() == TNamed::Class()) { - delete outputList->Last(); - outputList->RemoveLast(); - currentDir = currentDir->mkdir(outputList->GetName(), outputList->GetName(), true); + writeListToFile(outputList, currentDir); + outputList->SetOwner(); + delete outputList; + entry.obj = nullptr; + } else { + currentDir->WriteObjectAny(entry.obj, entry.kind, entry.name.c_str()); + delete (TObject*)entry.obj; + entry.obj = nullptr; } - - writeListToFile(outputList, currentDir); - outputList->SetOwner(); - delete outputList; - entry.obj = nullptr; - } else { - currentDir->WriteObjectAny(entry.obj, entry.kind, entry.name.c_str()); - delete (TObject*)entry.obj; - entry.obj = nullptr; + }; + for (int pi = 0; pi < pc.inputs().getNofParts(0); ++pi) { + mergePart(pc.inputs().get("x", pi)); } }; }}; From 0386f65567a1fcda173c9ae39304bbd284677774 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 19 May 2025 10:49:44 +0200 Subject: [PATCH 0058/1426] Revert "GPU Common: Workaround for removing gpustd::array, temporary alias for O2Physics" This reverts commit a850e9eb3e6a634a1e87a70170c05ad6d8bce3af. --- .../ReconstructionDataFormats/TrackParametrization.h | 1 - GPU/Common/GPUCommonArray.h | 6 ------ 2 files changed, 7 deletions(-) diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h index 1d0a5f1a9f1fd..f240e34861eeb 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h @@ -29,7 +29,6 @@ #include "GPUCommonDef.h" #include "GPUCommonRtypes.h" #include "GPUCommonMath.h" -#include "GPUCommonArray.h" #include "GPUROOTCartesianFwd.h" #ifndef GPUCA_GPUCODE_DEVICE diff --git a/GPU/Common/GPUCommonArray.h b/GPU/Common/GPUCommonArray.h index fa86d7bb4a021..e83ca8c4a69fc 100644 --- a/GPU/Common/GPUCommonArray.h +++ b/GPU/Common/GPUCommonArray.h @@ -48,10 +48,4 @@ using array = std::array; } // namespace std #endif -namespace o2::gpu::gpustd -{ -template -using array = ::std::array; // temporary alias, to remove dependent types -} // o2::gpu::gpustd - #endif // GPUCOMMONARRAY_H From efff2f780c2b35603cdd6614f868100e94ba41e7 Mon Sep 17 00:00:00 2001 From: shahor02 Date: Wed, 21 May 2025 23:04:18 +0200 Subject: [PATCH 0059/1426] Add TPC cluster selector helper for tracking studies (#14308) --- .../study/CMakeLists.txt | 7 ++ .../GlobalTrackingStudy/TPCClusSelector.h | 92 ++++++++++++++ .../study/src/GlobalTrackingStudyLinkDef.h | 1 + .../study/src/TPCClusSelector.cxx | 117 ++++++++++++++++++ 4 files changed, 217 insertions(+) create mode 100644 Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TPCClusSelector.h create mode 100644 Detectors/GlobalTrackingWorkflow/study/src/TPCClusSelector.cxx diff --git a/Detectors/GlobalTrackingWorkflow/study/CMakeLists.txt b/Detectors/GlobalTrackingWorkflow/study/CMakeLists.txt index 398e7eb215f2e..776d3946283c3 100644 --- a/Detectors/GlobalTrackingWorkflow/study/CMakeLists.txt +++ b/Detectors/GlobalTrackingWorkflow/study/CMakeLists.txt @@ -12,6 +12,7 @@ #add_compile_options(-O0 -g -fPIC) o2_add_library(GlobalTrackingStudy + TARGETVARNAME targetName SOURCES src/TPCTrackStudy.cxx src/TrackingStudy.cxx src/SVStudy.cxx @@ -23,6 +24,7 @@ o2_add_library(GlobalTrackingStudy src/TrackInfoExt.cxx src/TrackMCStudyConfig.cxx src/TrackMCStudyTypes.cxx + src/TPCClusSelector.cxx PUBLIC_LINK_LIBRARIES O2::GlobalTracking O2::GlobalTrackingWorkflowReaders O2::GlobalTrackingWorkflowHelpers @@ -73,3 +75,8 @@ o2_add_executable(dump-workfow COMPONENT_NAME bc-tracks SOURCES src/track-dump-workflow.cxx PUBLIC_LINK_LIBRARIES O2::GlobalTrackingStudy) + +if (OpenMP_CXX_FOUND) + target_compile_definitions(${targetName} PRIVATE WITH_OPENMP) + target_link_libraries(${targetName} PRIVATE OpenMP::OpenMP_CXX) +endif() diff --git a/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TPCClusSelector.h b/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TPCClusSelector.h new file mode 100644 index 0000000000000..c1765558458c2 --- /dev/null +++ b/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TPCClusSelector.h @@ -0,0 +1,92 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +// helper class for TPC clusters selection + +#ifndef ALICEO2_TPCCLUSSELECTOR_H +#define ALICEO2_TPCCLUSSELECTOR_H + +#include +#include +#include + +namespace o2::tpc +{ +class ClusterNativeAccess; + +class TPCClusSelector +{ + // helper to select TPC cluster matching to certain timebin and optionally pads range + // example of usage: + /* + TPCClusSelector clSel; + o2::tpc::ClusterNativeHelper::Reader tcpClusterReader; + tcpClusterReader.init(native_clusters_file.c_str()); + o2::tpc::ClusterNativeAccess tpcClusterIdxStruct; + std::unique_ptr tpcClusterBuffer; ///< buffer for clusters in tpcClusterIdxStruct + o2::tpc::ClusterNativeHelper::ConstMCLabelContainerViewWithBuffer tpcClusterMCBuffer; ///< buffer for mc labels + + tcpClusterReader.read(iTF); + tcpClusterReader.fillIndex(tpcClusterIdxStruct, tpcClusterBuffer, tpcClusterMCBuffer); + + clSel.fill(tpcClusterIdxStruct); // Create sorted index + // to get i-th cluster in orderer timebins: + const auto& clus = tpcClusterIdxStruct.clusters[sector][row][ clSel.getIndex(sector, row, i)]; + + // to get sorted indices range of clusters in the tbmin:tbmax range + auto rng = clSel.findClustersRange(sector, row, tbmin, tbmax, tpcClusterIdxStruct); + if (rng.first>rng.second) { // nothing is found } + const auto& cln = tpcClusterIdxStruct.clusters[sector][row][clSel.getIndex(sector, row, rng.first )]; /... + + // to get number of clusters in tbmin:tbmax, padmin:padmax range (and optionally get the list) + std::vector cllist; // optional list + int nfnd = clSel.findClustersEntries(sector, row, tbmin, tbmax, padmin, padmax, tpcClusterIdxStruct, &cllist); + for (int i=0;i findClustersRange(int sec, int row, float tbmin, float tbmax, const o2::tpc::ClusterNativeAccess& tpcClusterIdxStruct); + int findClustersEntries(int sec, int row, float tbmin, float tbmax, float padmin, float padmax, const o2::tpc::ClusterNativeAccess& tpcClusterIdxStruct, std::vector* clIDDirect = nullptr); + void fill(const o2::tpc::ClusterNativeAccess& tpcClusterIdxStruct); + + int getNThreads() const { return mNThreads; } + void setNThreads(int n); + + private: + struct Sector { + static constexpr int NRows = 152; + std::array, NRows> rows; + void clear() + { + for (auto& r : rows) + r.clear(); + } + }; + + static constexpr int NSectors = 36; + std::array mSectors{}; + int mNThreads = 1; + + ClassDefNV(TPCClusSelector, 1); +}; + +} // namespace o2::tpc + +#endif diff --git a/Detectors/GlobalTrackingWorkflow/study/src/GlobalTrackingStudyLinkDef.h b/Detectors/GlobalTrackingWorkflow/study/src/GlobalTrackingStudyLinkDef.h index f666132c9c1cf..f0d3e7d4d0b4e 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/GlobalTrackingStudyLinkDef.h +++ b/Detectors/GlobalTrackingWorkflow/study/src/GlobalTrackingStudyLinkDef.h @@ -38,5 +38,6 @@ #pragma link C++ class std::vector < o2::trackstudy::ClResTPCCont> + ; #pragma link C++ class o2::trackstudy::TrackPairInfo + ; #pragma link C++ class std::vector < o2::trackstudy::TrackPairInfo> + ; +#pragma ling C++ class o2::tpc::TPCClusSelector + ; #endif diff --git a/Detectors/GlobalTrackingWorkflow/study/src/TPCClusSelector.cxx b/Detectors/GlobalTrackingWorkflow/study/src/TPCClusSelector.cxx new file mode 100644 index 0000000000000..e5b28fb0fd62b --- /dev/null +++ b/Detectors/GlobalTrackingWorkflow/study/src/TPCClusSelector.cxx @@ -0,0 +1,117 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +// helper class for TPC clusters selection +#include "GlobalTrackingStudy/TPCClusSelector.h" +#include "DataFormatsTPC/ClusterNativeHelper.h" +#include "Framework/Logger.h" +#include +#ifdef WITH_OPENMP +#include +#endif + +using namespace o2::tpc; + +void TPCClusSelector::setNThreads(int n) +{ +#ifndef WITH_OPENMP + if (n > 1) { + LOGP(warn, "No OpenMP"); + } + n = 1; +#endif + mNThreads = n; +} + +std::pair TPCClusSelector::findClustersRange(int sec, int row, float tbmin, float tbmax, const o2::tpc::ClusterNativeAccess& tpcClusterIdxStruct) +{ + // find sorted indices of clusters in the [tbmin:tbmax] range, if not found, return {-1,-2} + const auto& vidx = mSectors[sec].rows[row]; + const auto* clarr = tpcClusterIdxStruct.clusters[sec][row]; + // use binary search to find 1st cluster with time >= tb + int ncl = vidx.size(), left = 0, right = ncl; + while (left < right) { + int mid = left + (right - left) / 2; + if (clarr[vidx[mid]].getTime() < tbmin) { + left = mid + 1; + } else { + right = mid; + } + } + if (left == ncl || clarr[vidx[left]].getTime() > tbmax) { + return {-1, -2}; // all clusters have time < tbmin or no clusters in the range [tbmin:tbmax] + } + int idmin = left, idmax = left, idtst = idmin; + // look at smaller times + while (++idtst < ncl && clarr[vidx[idtst]].getTime() <= tbmax) { + idmax = idtst; + } + return {idmin, idmax}; +} + +int TPCClusSelector::findClustersEntries(int sec, int row, float tbmin, float tbmax, float padmin, float padmax, const o2::tpc::ClusterNativeAccess& tpcClusterIdxStruct, std::vector* clIDDirect) +{ + // find direct cluster indices for tbmin:tbmas / padmin/padmax range, fill clIDDirect vector if provided + const auto& vidx = mSectors[sec].rows[row]; + const auto* clarr = tpcClusterIdxStruct.clusters[sec][row]; + // use binary search to find 1st cluster with time >= tb + int ncl = vidx.size(), left = 0, right = ncl; + if (clIDDirect) { + clIDDirect->clear(); + } + while (left < right) { + int mid = left + (right - left) / 2; + if (clarr[vidx[mid]].getTime() < tbmin) { + left = mid + 1; + } else { + right = mid; + } + } + if (left == ncl || clarr[vidx[left]].getTime() > tbmax) { + return 0; // all clusters have time < tbmin or no clusters in the range [tbmin:tbmax] + } + int nclf = 0; + while (left < ncl) { + const auto& cl = clarr[vidx[left]]; + if (cl.getTime() > tbmax) { + break; + } + if (cl.getPad() >= padmin && cl.getPad() <= padmax) { + nclf++; + if (clIDDirect) { + clIDDirect->push_back(vidx[left]); + } + } + } + return nclf; +} + +void TPCClusSelector::fill(const o2::tpc::ClusterNativeAccess& tpcClusterIdxStruct) +{ + for (int is = 0; is < NSectors; is++) { + auto& sect = mSectors[is]; +#ifdef WITH_OPENMP +#pragma omp parallel for schedule(dynamic) num_threads(mNThreads) +#endif + for (int ir = 0; ir < Sector::NRows; ir++) { + size_t ncl = tpcClusterIdxStruct.nClusters[is][ir]; + if (ncl >= 0xffff) { + LOGP(error, "Row {} of sector {} has {} clusters, truncating to {}", ir, is, ncl, int(0xffff)); + ncl = 0xffff; + } + auto& rowidx = sect.rows[ir]; + rowidx.resize(ncl); + std::iota(rowidx.begin(), rowidx.end(), 0); + const auto* clus = tpcClusterIdxStruct.clusters[is][ir]; // C array of clusters + std::sort(rowidx.begin(), rowidx.end(), [&](size_t a, size_t b) { return clus[a].getTime() < clus[b].getTime(); }); + } + } +} From 81b7a64680531129657f5a9eb2a222b3d0c779c1 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 21 May 2025 19:06:16 +0200 Subject: [PATCH 0060/1426] GPU: Fix allocator / deallocator mismatch --- GPU/GPUTracking/Base/GPUReconstruction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index e0c866fd9421b..06f1c27fb6c06 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -378,7 +378,7 @@ class GPUReconstruction std::vector res; }; struct alignedDeleter { - void operator()(void* ptr) { ::operator delete(ptr, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); }; + void operator()(void* ptr) { ::operator delete[](ptr, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); }; }; std::unordered_map mMemoryReuse1to1; std::vector> mNonPersistentMemoryStack; // hostPoolAddress, devicePoolAddress, individualAllocationCount, directIndividualAllocationCound, tag From b8cacf6b25dbb22b30160821d70992cfac594f8b Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 21 May 2025 19:16:30 +0200 Subject: [PATCH 0061/1426] GPU TPC Merger: Clarify more variable names --- .../DataCompression/GPUTPCCompression.cxx | 2 +- .../GPUChainTrackingDebugAndProfiling.cxx | 4 +- .../Global/GPUChainTrackingMerger.cxx | 10 ++--- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 40 +++++++++---------- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 8 ++-- GPU/GPUTracking/SectorTracker/GPUTPCTrack.h | 3 -- 6 files changed, 32 insertions(+), 35 deletions(-) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index ec1636dfe7f59..61f8a614fbe6f 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -124,7 +124,7 @@ void GPUTPCCompression::SetMaxData(const GPUTrackingInOutPointers& io) mMaxClusters = io.clustersNative->nClustersTotal; mMaxClusterFactorBase1024 = mMaxClusters > 100000000 ? mRec->MemoryScalers()->NTPCUnattachedHitsBase1024(mRec->GetParam().rec.tpc.rejectionStrategy) : 1024; mMaxClustersInCache = mMaxClusters * mMaxClusterFactorBase1024 / 1024; - mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NOutputTrackClusters(); // TODO: Why is this not using ioPtrs? Could remove GPUConstantMem.h include + mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NMergedTrackClusters(); // TODO: Why is this not using ioPtrs? Could remove GPUConstantMem.h include mMaxTracks = mRec->GetConstantMem().tpcMerger.NMergedTracks(); if (mMaxClusters % 16) { mMaxClusters += 16 - (mMaxClusters % 16); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index e9721ec9d12bf..173d2fb916239 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -154,7 +154,7 @@ void GPUChainTracking::PrintMemoryStatistics() } addToMap("TPC Clusterer Clusters", usageMap, mRec->MemoryScalers()->nTPCHits, mRec->MemoryScalers()->NTPCClusters(mRec->MemoryScalers()->nTPCdigits)); addToMap("TPC Tracks", usageMap, processors()->tpcMerger.NMergedTracks(), processors()->tpcMerger.NMaxTracks()); - addToMap("TPC TrackHits", usageMap, processors()->tpcMerger.NOutputTrackClusters(), processors()->tpcMerger.NMaxOutputTrackClusters()); + addToMap("TPC TrackHits", usageMap, processors()->tpcMerger.NMergedTrackClusters(), processors()->tpcMerger.NMaxMergedTrackClusters()); if (mRec->GetProcessingSettings().createO2Output) { addToMap("TPC O2 Tracks", usageMap, processors()->tpcMerger.NOutputTracksTPCO2(), processors()->tpcMerger.NOutputTracksTPCO2()); @@ -182,7 +182,7 @@ void GPUChainTracking::PrintMemoryRelations() GPUInfo("MEMREL SectorTrackHits NCl %d NTrkH %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTrackHits()); } GPUInfo("MEMREL Tracks NCl %d NTrk %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTracks()); - GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NOutputTrackClusters()); + GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTrackClusters()); } void GPUChainTracking::PrepareKernelDebugOutput() diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 2b3d719a27dea..118f0bf73a845 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -261,9 +261,9 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) if (param().dodEdxEnabled) { GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadowAll.MergedTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracksdEdx()), outputStream, 0); } - GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0); + GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NMergedTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0); if (param().par.earlyTpcTransform) { - GPUMemCpy(RecoStep::TPCMerging, Merger.ClustersXYZ(), MergerShadowAll.ClustersXYZ(), Merger.NOutputTrackClusters() * sizeof(*Merger.ClustersXYZ()), outputStream, 0); + GPUMemCpy(RecoStep::TPCMerging, Merger.ClustersXYZ(), MergerShadowAll.ClustersXYZ(), Merger.NMergedTrackClusters() * sizeof(*Merger.ClustersXYZ()), outputStream, 0); } GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadowAll.ClusterAttachment(), Merger.NMaxClusters() * sizeof(*Merger.ClusterAttachment()), outputStream, 0); } @@ -330,7 +330,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mIOPtrs.nMergedTracks = Merger.NMergedTracks(); mIOPtrs.mergedTrackHits = Merger.Clusters(); mIOPtrs.mergedTrackHitsXYZ = Merger.ClustersXYZ(); - mIOPtrs.nMergedTrackHits = Merger.NOutputTrackClusters(); + mIOPtrs.nMergedTrackHits = Merger.NMergedTrackClusters(); mIOPtrs.mergedTrackHitAttachment = Merger.ClusterAttachment(); mIOPtrs.mergedTrackHitStates = Merger.ClusterStateExt(); mIOPtrs.outputTracksTPCO2 = Merger.OutputTracksTPCO2(); @@ -344,7 +344,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) processorsShadow()->ioPtrs.nMergedTracks = Merger.NMergedTracks(); processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters(); processorsShadow()->ioPtrs.mergedTrackHitsXYZ = MergerShadow.ClustersXYZ(); - processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NOutputTrackClusters(); + processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NMergedTrackClusters(); processorsShadow()->ioPtrs.mergedTrackHitAttachment = MergerShadow.ClusterAttachment(); processorsShadow()->ioPtrs.mergedTrackHitStates = MergerShadow.ClusterStateExt(); processorsShadow()->ioPtrs.outputTracksTPCO2 = MergerShadow.OutputTracksTPCO2(); @@ -355,7 +355,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) } if (GetProcessingSettings().debugLevel >= 2) { - GPUInfo("TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NOutputTrackClusters(), Merger.NClusters()); + GPUInfo("TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NMergedTrackClusters(), Merger.NClusters()); } return 0; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 1d5a7a0b1df47..16182464c12fe 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -372,9 +372,9 @@ void* GPUTPCGMMerger::SetPointersOutput(void* mem) computePointerWithAlignment(mem, mMergedTracksdEdxAlt, mNMaxTracks); } } - computePointerWithAlignment(mem, mClusters, mNMaxOutputTrackClusters); + computePointerWithAlignment(mem, mClusters, mNMaxMergedTrackClusters); if (mRec->GetParam().par.earlyTpcTransform) { - computePointerWithAlignment(mem, mClustersXYZ, mNMaxOutputTrackClusters); + computePointerWithAlignment(mem, mClustersXYZ, mNMaxMergedTrackClusters); } computePointerWithAlignment(mem, mClusterAttachment, mNMaxClusters); return mem; @@ -446,7 +446,7 @@ void GPUTPCGMMerger::SetMaxData(const GPUTrackingInOutPointers& io) mNMaxSingleSectorTracks = ntrk; } } - mNMaxOutputTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters); + mNMaxMergedTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters); if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (gpu_common_constants::kZeroFieldCut * gpu_common_constants::kCLight)) { mNMaxTracks = mRec->MemoryScalers()->getValue(mNTotalSectorTracks, mNTotalSectorTracks); // 0 magnetic field } else { @@ -1354,14 +1354,14 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i continue; } - uint32_t newRef = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, trk[0]->NClusters() + trk[1]->NClusters()); - if (newRef + trk[0]->NClusters() + trk[1]->NClusters() >= mNMaxOutputTrackClusters) { - raiseError(GPUErrors::ERROR_MERGER_CE_HIT_OVERFLOW, newRef + trk[0]->NClusters() + trk[1]->NClusters(), mNMaxOutputTrackClusters); - for (uint32_t k = newRef; k < mNMaxOutputTrackClusters; k++) { + uint32_t newRef = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, trk[0]->NClusters() + trk[1]->NClusters()); + if (newRef + trk[0]->NClusters() + trk[1]->NClusters() >= mNMaxMergedTrackClusters) { + raiseError(GPUErrors::ERROR_MERGER_CE_HIT_OVERFLOW, newRef + trk[0]->NClusters() + trk[1]->NClusters(), mNMaxMergedTrackClusters); + for (uint32_t k = newRef; k < mNMaxMergedTrackClusters; k++) { mClusters[k].num = 0; mClusters[k].state = 0; } - CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters); + CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters); return; } @@ -1711,20 +1711,20 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread nHits = nFilteredHits; } - const uint32_t iOutTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, (uint32_t)nHits); - if (iOutTrackFirstCluster >= mNMaxOutputTrackClusters) { - raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iOutTrackFirstCluster, mNMaxOutputTrackClusters); - CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters); + const uint32_t iMergedTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, (uint32_t)nHits); + if (iMergedTrackFirstCluster >= mNMaxMergedTrackClusters) { + raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iMergedTrackFirstCluster, mNMaxMergedTrackClusters); + CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters); continue; } - GPUTPCGMMergedTrackHit* const cl = mClusters + iOutTrackFirstCluster; + GPUTPCGMMergedTrackHit* const cl = mClusters + iMergedTrackFirstCluster; for (int32_t i = 0; i < nHits; i++) { uint8_t state; if (Param().par.earlyTpcTransform) { const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].sector].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].sector].Data().ClusterIdOffset()]; - GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster; + GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iMergedTrackFirstCluster; clXYZ[i].x = c.x; clXYZ[i].y = c.y; clXYZ[i].z = c.z; @@ -1759,13 +1759,13 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread mergedTrack.SetLooper(leg > 0); mergedTrack.SetLegs(leg); mergedTrack.SetNClusters(nHits); - mergedTrack.SetFirstClusterRef(iOutTrackFirstCluster); + mergedTrack.SetFirstClusterRef(iMergedTrackFirstCluster); GPUTPCGMTrackParam& p1 = mergedTrack.Param(); const GPUTPCGMSectorTrack& p2 = *trackParts[firstTrackIndex]; mergedTrack.SetCSide(p2.CSide()); GPUTPCGMBorderTrack b; - const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iOutTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row); + const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iMergedTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row); if (p2.TransportToX(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) { p1.X() = toX; p1.Y() = b.Par()[0]; @@ -1796,13 +1796,13 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread if (Param().rec.tpc.mergeCE) { bool CEside; if (Param().par.earlyTpcTransform) { - const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster; + const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iMergedTrackFirstCluster; CEside = (mergedTrack.CSide() != 0) ^ (clXYZ[0].z > clXYZ[nHits - 1].z); } else { auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear; CEside = cls[cl[0].num].getTime() < cls[cl[nHits - 1].num].getTime(); } - MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iOutTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack); + MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iMergedTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack); } } // itr } @@ -1855,7 +1855,7 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit1(int32_t nBlocks, int32_t nThr GPUd() void GPUTPCGMMerger::PrepareClustersForFit2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nBlocks * nThreads) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTrackClusters; i += nBlocks * nThreads) { if (mSharedCount[mClusters[i].num] > 1) { mClusters[i].state |= GPUTPCGMMergedTrackHit::flagShared; } @@ -1876,7 +1876,7 @@ GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { mTrackSort[mTrackOrderAttach[i]] = i; } - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nThreads * nBlocks) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTrackClusters; i += nThreads * nBlocks) { mClusterAttachment[mClusters[i].num] = 0; // Reset adjacent attachment for attached clusters, set correctly below } } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 4487b6d937dc2..54a541ebe0fd6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -70,7 +70,7 @@ class GPUTPCGMMerger : public GPUProcessor GPUAtomic(uint32_t) nLoopData; GPUAtomic(uint32_t) nUnpackedTracks; GPUAtomic(uint32_t) nMergedTracks; - GPUAtomic(uint32_t) nOutputTrackClusters; + GPUAtomic(uint32_t) nMergedTrackClusters; GPUAtomic(uint32_t) nO2Tracks; GPUAtomic(uint32_t) nO2ClusRefs; const GPUTPCTrack* firstExtrapolatedTracks[NSECTORS]; @@ -113,8 +113,8 @@ class GPUTPCGMMerger : public GPUProcessor GPUhdi() uint32_t NClusters() const { return mNClusters; } GPUhdi() uint32_t NMaxClusters() const { return mNMaxClusters; } GPUhdi() uint32_t NMaxTracks() const { return mNMaxTracks; } - GPUhdi() uint32_t NMaxOutputTrackClusters() const { return mNMaxOutputTrackClusters; } - GPUhdi() uint32_t NOutputTrackClusters() const { return mMemory->nOutputTrackClusters; } + GPUhdi() uint32_t NMaxMergedTrackClusters() const { return mNMaxMergedTrackClusters; } + GPUhdi() uint32_t NMergedTrackClusters() const { return mMemory->nMergedTrackClusters; } GPUhdi() const GPUTPCGMMergedTrackHit* Clusters() const { return mClusters; } GPUhdi() GPUTPCGMMergedTrackHit* Clusters() { return (mClusters); } GPUhdi() const GPUTPCGMMergedTrackHitXYZ* ClustersXYZ() const { return mClustersXYZ; } @@ -249,7 +249,7 @@ class GPUTPCGMMerger : public GPUProcessor uint32_t mNTotalSectorTracks = 0; // maximum number of incoming sector tracks uint32_t mNMaxTracks = 0; // maximum number of output tracks uint32_t mNMaxSingleSectorTracks = 0; // max N tracks in one sector - uint32_t mNMaxOutputTrackClusters = 0; // max number of clusters in output tracks (double-counting shared clusters) + uint32_t mNMaxMergedTrackClusters = 0; // max number of clusters in output tracks (double-counting shared clusters) uint32_t mNMaxClusters = 0; // max total unique clusters (in event) uint32_t mNMaxLooperMatches = 0; // Maximum number of candidate pairs for looper matching diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h index 225f5f0e2c7ad..7306c84cf949c 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h @@ -53,9 +53,6 @@ class GPUTPCTrack GPUhd() static int32_t GetSize(int32_t nClust) { return sizeof(GPUTPCTrack) + nClust * sizeof(GPUTPCSectorOutCluster); } GPUhd() const GPUTPCTrack* GetNextTrack() const { return (const GPUTPCTrack*)(((char*)this) + GetSize(mNHits)); } GPUhd() GPUTPCTrack* NextTrack() { return (GPUTPCTrack*)(((char*)this) + GetSize(mNHits)); } - GPUhd() void SetOutTrackCluster(int32_t i, const GPUTPCSectorOutCluster& v) { ((GPUTPCSectorOutCluster*)((char*)this + sizeof(*this)))[i] = v; } - GPUhd() const GPUTPCSectorOutCluster* OutTrackClusters() const { return (const GPUTPCSectorOutCluster*)((char*)this + sizeof(*this)); } - GPUhd() const GPUTPCSectorOutCluster& OutTrackCluster(int32_t i) const { return OutTrackClusters()[i]; } private: int32_t mFirstHitID; // index of the first track cell in the track->cell pointer array From c5498af7e0ed421d066b22fc36d34530bcd6c478 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 21 May 2025 19:43:59 +0200 Subject: [PATCH 0062/1426] GPU TPC Merger: Fix out of bounds check --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 16182464c12fe..533e697cc5852 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1513,7 +1513,6 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread GPUTPCGMSectorTrack* trackParts[kMaxParts]; for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) { - GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr]; if (track.PrevSegmentNeighbour() >= 0) { @@ -1712,7 +1711,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread } const uint32_t iMergedTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, (uint32_t)nHits); - if (iMergedTrackFirstCluster >= mNMaxMergedTrackClusters) { + if (iMergedTrackFirstCluster + nHits > mNMaxMergedTrackClusters) { raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iMergedTrackFirstCluster, mNMaxMergedTrackClusters); CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters); continue; From 0a7f3df8bf7e24304a1d2882061f8589e746598f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 21 May 2025 21:06:40 +0200 Subject: [PATCH 0063/1426] GPU: Add memoryScaling fuzzing debug option --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 2 +- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 8 ++++++ .../DataTypes/GPUMemorySizeScalers.cxx | 26 +++++++++++++++++++ .../DataTypes/GPUMemorySizeScalers.h | 11 +++++--- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + GPU/GPUTracking/Interface/GPUO2Interface.cxx | 2 +- 6 files changed, 45 insertions(+), 5 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index a4e5d5e1189f5..ad7a31cbd7470 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -304,7 +304,7 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mProcessingSettings->rtc.optConstexpr = false; } - mMemoryScalers->factor = GetProcessingSettings().memoryScalingFactor; + mMemoryScalers->scalingFactor = GetProcessingSettings().memoryScalingFactor; mMemoryScalers->conservative = GetProcessingSettings().conservativeMemoryEstimate; mMemoryScalers->returnMaxVal = GetProcessingSettings().forceMaxMemScalers != 0; if (GetProcessingSettings().forceMaxMemScalers > 1) { diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 5f80a56e9e64e..2d1061616d907 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -216,6 +216,14 @@ int32_t GPUReconstructionCPU::ExitDevice() int32_t GPUReconstructionCPU::RunChains() { mMemoryScalers->temporaryFactor = 1.; + if (GetProcessingSettings().memoryScalingFuzz) { + static std::mt19937 rng; + static std::uniform_int_distribution dist(0, 1000000); + uint64_t fuzzFactor = GetProcessingSettings().memoryScalingFuzz == 1 ? dist(rng) : GetProcessingSettings().memoryScalingFuzz; + GPUInfo("Fuzzing memory scaling factor with %lu", fuzzFactor); + mMemoryScalers->fuzzScalingFactor(fuzzFactor); + } + mStatNEvents++; mNEventsProcessed++; diff --git a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.cxx b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.cxx index 8b8fbc3ecae20..42ac2e8015f45 100644 --- a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.cxx +++ b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.cxx @@ -15,6 +15,8 @@ #include "GPUMemorySizeScalers.h" #include "GPULogging.h" +#include + using namespace o2::gpu; void GPUMemorySizeScalers::rescaleMaxMem(size_t newAvailableMemory) @@ -36,3 +38,27 @@ void GPUMemorySizeScalers::rescaleMaxMem(size_t newAvailableMemory) tpcMaxMergedTrackHits = (double)tmp.tpcMaxMergedTrackHits * scaleFactor; availableMemory = newAvailableMemory; } + +double GPUMemorySizeScalers::getScalingFactor() +{ + if (!doFuzzing) { + return scalingFactor; + } + static std::uniform_int_distribution dist(0, 1000000); + static std::mt19937 rng; + if (fuzzSeed) { + rng = std::mt19937(fuzzSeed); + fuzzLimit = dist(rng) / 10; + fuzzSeed = 0; + } + if (dist(rng) > fuzzLimit) { + return scalingFactor; + } + return scalingFactor * 0.000001 * dist(rng); +} + +void GPUMemorySizeScalers::fuzzScalingFactor(uint64_t seed) +{ + fuzzSeed = seed; + doFuzzing = true; +} diff --git a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h index e5012d86742f8..164ecb32c26c7 100644 --- a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h +++ b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h @@ -28,7 +28,9 @@ struct GPUMemorySizeScalers { size_t nITSTracks = 0; // General scaling factor - double factor = 1; + double scalingFactor = 1; + uint64_t fuzzSeed = 0; + uint64_t fuzzLimit = 0; double temporaryFactor = 1; bool conservative = 0; @@ -64,11 +66,14 @@ struct GPUMemorySizeScalers { size_t tpcMaxMergedTrackHits = 200000000; size_t availableMemory = 20500000000; bool returnMaxVal = false; + bool doFuzzing = false; void rescaleMaxMem(size_t newAvailableMemory); + double getScalingFactor(); + void fuzzScalingFactor(uint64_t seed); inline size_t getValue(size_t maxVal, size_t val) { - return returnMaxVal ? maxVal : (std::min(maxVal, offset + val) * factor * temporaryFactor); + return returnMaxVal ? maxVal : (std::min(maxVal, offset + val) * (doFuzzing == 0 ? scalingFactor : getScalingFactor()) * temporaryFactor); } inline size_t NTPCPeaks(size_t tpcDigits, bool perSector = false) { return getValue(perSector ? tpcMaxPeaks : (GPUCA_NSECTORS * tpcMaxPeaks), hitOffset + tpcDigits * tpcPeaksPerDigit); } @@ -81,7 +86,7 @@ struct GPUMemorySizeScalers { inline size_t NTPCSectorTrackHits(size_t tpcHits, uint8_t withRejection = 0) { return getValue(tpcMaxSectorTrackHits, tpcHits * (withRejection ? tpcSectorTrackHitsPerHitWithRejection : tpcSectorTrackHitsPerHit)); } inline size_t NTPCMergedTracks(size_t tpcSectorTracks) { return getValue(tpcMaxMergedTracks, tpcSectorTracks * (conservative ? 1.0 : tpcMergedTrackPerSectorTrack)); } inline size_t NTPCMergedTrackHits(size_t tpcSectorTrackHitss) { return getValue(tpcMaxMergedTrackHits, tpcSectorTrackHitss * tpcMergedTrackHitPerSectorHit); } - inline size_t NTPCUnattachedHitsBase1024(int32_t type) { return (returnMaxVal || conservative) ? 1024 : std::min(1024, tpcCompressedUnattachedHitsBase1024[type] * factor * temporaryFactor); } + inline size_t NTPCUnattachedHitsBase1024(int32_t type) { return (returnMaxVal || conservative) ? 1024 : std::min(1024, tpcCompressedUnattachedHitsBase1024[type] * (doFuzzing == 0 ? scalingFactor : getScalingFactor()) * temporaryFactor); } }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 12f40cda4c398..238994ee53af5 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -312,6 +312,7 @@ AddOption(memoryAllocationStrategy, int8_t, 0, "", 0, "Memory Allocation Strageg AddOption(forceMemoryPoolSize, uint64_t, 1, "memSize", 0, "Force size of allocated GPU / page locked host memory", min(0ul)) AddOption(forceHostMemoryPoolSize, uint64_t, 0, "hostMemSize", 0, "Force size of allocated host page locked host memory (overriding memSize)", min(0ul)) AddOption(memoryScalingFactor, float, 1.f, "", 0, "Factor to apply to all memory scalers") +AddOption(memoryScalingFuzz, uint64_t, 0, "", 0, "Fuzz the memoryScalingFactor (0 disable, 1 enable, >1 set seed", def(1)) AddOption(conservativeMemoryEstimate, bool, false, "", 0, "Use some more conservative defaults for larger buffers during TPC processing") AddOption(tpcInputWithClusterRejection, uint8_t, 0, "", 0, "Indicate whether the TPC input is CTF data with cluster rejection, to tune buffer estimations") AddOption(forceMaxMemScalers, uint64_t, 0, "", 0, "Force using the maximum values for all buffers, Set a value n > 1 to rescale all maximums to a memory size of n") diff --git a/GPU/GPUTracking/Interface/GPUO2Interface.cxx b/GPU/GPUTracking/Interface/GPUO2Interface.cxx index 81eb2c285192b..f7e972315a739 100644 --- a/GPU/GPUTracking/Interface/GPUO2Interface.cxx +++ b/GPU/GPUTracking/Interface/GPUO2Interface.cxx @@ -110,7 +110,7 @@ int32_t GPUO2Interface::Initialize(const GPUO2InterfaceConfiguration& config) return (1); } if (!mCtx[i].mRec->IsGPU() && mCtx[i].mRec->GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { - mCtx[i].mRec->MemoryScalers()->factor *= 2; + mCtx[i].mRec->MemoryScalers()->scalingFactor *= 2; } } if (mConfig->configProcessing.doublePipeline) { From 2673d512ffe9e1e1f658ace2ccf93ac799501b56 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 22 May 2025 11:17:08 +0200 Subject: [PATCH 0064/1426] Drop obsolete documentation (#14309) --- .cmake-format.py | 6 -- Algorithm/CMakeLists.txt | 3 - Algorithm/doc/Algorithm.3.in | 12 --- Algorithm/doc/algorithm_parser.3.in | 135 ---------------------------- CMakeLists.txt | 5 -- Examples/Ex5/CMakeLists.txt | 2 - Examples/Ex5/doc/ex5.7.in | 62 ------------- cmake/O2TargetManPage.cmake | 79 ---------------- doc/CMakeInstructions.md | 18 ---- doc/FairMQDevice.1.in | 64 ------------- doc/ManPages.md | 23 ----- doc/o2-timeframe-file-format.1.in | 27 ------ doc/o2.1.in | 19 ---- 13 files changed, 455 deletions(-) delete mode 100644 Algorithm/doc/Algorithm.3.in delete mode 100644 Algorithm/doc/algorithm_parser.3.in delete mode 100644 Examples/Ex5/doc/ex5.7.in delete mode 100644 cmake/O2TargetManPage.cmake delete mode 100644 doc/FairMQDevice.1.in delete mode 100644 doc/ManPages.md delete mode 100644 doc/o2-timeframe-file-format.1.in delete mode 100644 doc/o2.1.in diff --git a/.cmake-format.py b/.cmake-format.py index 9827eecd329c4..ae092bc09f363 100644 --- a/.cmake-format.py +++ b/.cmake-format.py @@ -66,12 +66,6 @@ "HEADERS": '*', } }, - "o2_target_man_page": { - "kwargs": { - "NAME": '+', - "SECTION": '*', - } - }, "add_root_dictionary": { "kwargs": { "LINKDEF": '+', diff --git a/Algorithm/CMakeLists.txt b/Algorithm/CMakeLists.txt index b245562c7cc93..ed7a42a96e528 100644 --- a/Algorithm/CMakeLists.txt +++ b/Algorithm/CMakeLists.txt @@ -11,9 +11,6 @@ o2_add_header_only_library(Algorithm INTERFACE_LINK_LIBRARIES O2::Headers) -o2_target_man_page(Algorithm NAME Algorithm SECTION 3) -o2_target_man_page(Algorithm NAME algorithm_parser SECTION 3) - o2_add_test(o2formatparser SOURCES test/o2formatparser.cxx COMPONENT_NAME Algorithm diff --git a/Algorithm/doc/Algorithm.3.in b/Algorithm/doc/Algorithm.3.in deleted file mode 100644 index eaf618ee68da2..0000000000000 --- a/Algorithm/doc/Algorithm.3.in +++ /dev/null @@ -1,12 +0,0 @@ -.\" Alice O2 manpage for module Algorithm -.TH "AliceO2" 3 "17 Jan 2017" "1.0" "Algorithm man page" - -.SH NAME -AliceO2 - module -.B Algorithm - -.SH DESCRIPTION -A collection of generic algorithms for Alice O2 - -.SH SEE ALSO -algorithm_parser(3) diff --git a/Algorithm/doc/algorithm_parser.3.in b/Algorithm/doc/algorithm_parser.3.in deleted file mode 100644 index 98f45df279669..0000000000000 --- a/Algorithm/doc/algorithm_parser.3.in +++ /dev/null @@ -1,135 +0,0 @@ -.\" Alice O2 manpage for parser algorithms -.TH "AliceO2" 3 "17 Jan 2017" "1.0" "Algorithm Parser man page" - -.SH NAME -AliceO2 - module -.B Algorithm -- data parsers - -.SH SYNOPSIS -.B ForwardParser< -.I SomeHeaderType -, -.I SomeTrailerType -.B > - -.B ReverseParser< -.I SomeHeaderType -, -.I SomeTrailerType -.B > - -.SS Public types -.TP 2 -// a compound of header, data, and trailer -.B struct FrameInfo { - using PtrT = const PayloadType*; - const HeaderType* header = nullptr; - const TrailerType* trailer = nullptr; - PtrT payload = nullptr; - size_t length = 0; - -.B }; - -.TP 2 -.B using CheckHeaderFct = std::function; -alias for callback checking the header, return true if the object is a valid header -.TP 2 -.B using CheckTrailerFct = std::function; -alias for callback checking the trailer -.TP 2 -.B using GetFrameSizeFct = std::function; -alias for callback to get the complete frame size including header, trailer and the data -.TP 2 -.B using InsertFct = std::function; -function callback to insert/handle one frame into, sequentially called for all frames if the whole block has a valid format - -.SS Public member functions -.TP 2 -.B template -.B int parse(const InputType* \fIbuffer\fB, size_t \fIbufferSize\fB, CheckHeaderFct \fIcheckHeader\fB, CheckTrailerFct \fIcheckTrailer\fB, GetFrameSizeFct \fIgetFrameSize\fB, InsertFct \fIinsert\fB) - -.SS Public member variables -.TP 2 -.B static const size_t headOffset = typesize::size; -the length offset due to header -.TP 2 -.B static const size_t tailOffset = typesize::size; -the length offset due to trailer -.TP 2 -.B static const size_t totalOffset = headOffset + tailOffset; -total length offset due to header and trailer - -.SH DESCRIPTION -Template utilities for parsing of data sequences. Each entry in the sequence consist of a header, variable payload, and optionally a trailer. The three parts are collected in the FrameInfo structure for every entry. - -Callback functions for checking header and trailer integrity, getting length of the current frame and handling of a frame. - -.SS ForwardParser -The size is expected to be part of the header, parsing starts at beginning of buffer. -Trailer type can be void, which is also the default template parameter. That -allows to define a frame consisting of only header and data. - -.SS ReverseParser -The size is expected to be part of the trailer, the parsing is thus in reverse direction. Also the insert callback is called with the entries starting form the end of the buffer. -An easy extension can be to reverse the order of the inserts, meaning that the entries are read from the beginning. - -.SH EXAMPLES -.SS ReverseParser example -.EX -using SomeParser = ReverseParser; -SomeParser parser; -std::vector frames; -parser.parse(ptr, size, - [] (const typename SomeParser::HeaderType& h) { - // check the header - return true; - }, - [] (const typename SomeParser::TrailerType& t) { - // check the trailer - return true; - }, - [] (const typename SomeParser::TrailerType& t) { - // get the size of the frame including payload - // and header and trailer size, e.g. payload size - // from a trailer member - return t.payloadSize + SomeParser::totalOffset; - }, - [&frames] (typename SomeParser::FrameInfo& info) { - frames.emplace_back(info); - return true; - } - ) -.EE - -.SS ForwardParser example with frame consisting of header and payload -.EX -using SomeParser = ForwardParser; -SomeParser parser; -std::vector frames; -parser.parse(ptr, size, - [] (const typename SomeParser::HeaderType& h) { - // check the header - return true; - }, - [] (const typename SomeParser::HeaderType& h) { - // get the size of the frame including payload - // and header and trailer size, e.g. payload size - // from a header member - return h.payloadSize + SomeParser::totalOffset; - }, - [&frames] (typename SomeParser::FrameInfo& info) { - frames.emplace_back(info); - return true; - } - ) -.EE - -.SH BUGS, CONTRIBUTIONS -Please add an issue to -.UR https://github.com/AliceO2Group/AliceO2/issues -.UE - -.SH SEE ALSO -.UR https://github.com/AliceO2Group/AliceO2/blob/dev/Algorithm/include/Algorithm/Parser.h -.UE diff --git a/CMakeLists.txt b/CMakeLists.txt index b71d05175e9e9..adecffc0f4dbf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,7 +86,6 @@ include(O2AddTestRootMacro) include(O2ReportNonTestedMacros) include(O2TargetRootDictionary) include(O2DataFile) -include(O2TargetManPage) include(O2AddWorkflow) include(O2SetROOTPCMDependencies) include(O2AddHipifiedExecutable) @@ -117,10 +116,6 @@ endif() add_subdirectory(config) -add_custom_target(man ALL) -o2_target_man_page(man NAME o2) -o2_target_man_page(man NAME FairMQDevice) - # Testing and packaging only needed if we are the top level directory if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) # Documentation diff --git a/Examples/Ex5/CMakeLists.txt b/Examples/Ex5/CMakeLists.txt index 3951709a8a050..f6216bcebdad2 100644 --- a/Examples/Ex5/CMakeLists.txt +++ b/Examples/Ex5/CMakeLists.txt @@ -12,5 +12,3 @@ o2_add_executable(ex5 SOURCES src/run.cxx COMPONENT_NAME example TARGETVARNAME targetName) - -o2_target_man_page(${targetName} NAME ex5 SECTION 7) diff --git a/Examples/Ex5/doc/ex5.7.in b/Examples/Ex5/doc/ex5.7.in deleted file mode 100644 index 5d346f7f4798d..0000000000000 --- a/Examples/Ex5/doc/ex5.7.in +++ /dev/null @@ -1,62 +0,0 @@ -.\" Manpage for ex5. - -.\" this file gives some basic introduction on how to use the -.\" roff format to write man pages -.\" NOTE: all formatting commands start with a dot and must be -.\" at the beginning of the line - -.\" the header section -.TH AliceO2 1 "07 July 2019" "1.0" "ex5 man page" - -.\" .SH starts a new section, NAME is the first section -.SH NAME - -ex5 - A simple example for AliceO2 submodules - -.\" next is the SYNOPSIS section -.SH SYNOPSIS - -.\" some bold formatted text -.B ex5 -.\" alternate between roman and bold font, separated by blank, i.e. the -.\" square backets in roman and the option in bold -.RB [ --someoption ] -.\" same here, in addition, the dots indicating the argument string are in -.\" italic. Note: the quoted " [" makes sure there is a blank -.RB [ --debug " [" --gdb=\fI...\fR ]] -.\" italic formatting (underline in man) -.I mandatory_argument -.\" alternate roman and italic -.RI [ further_arguments... ] - - -.SH DESCRIPTION - -ex5 is an example to demonstrate the AliceO2 cmake setup of -modules. This document illustrates creation of man pages. All options and -arument are pure fictive. - -.SH OPTIONS - -.\" indented paragraph with label, indentation is set to the optional number -.TP 5 -.B --someoption -This is a fancy option of the example. - -.TP 5 -.B --debug -Run everything with debugging options - -.TP 5 -.B --gdb=\fI...\fR -Add additional information to run with gdb - -.SH SEE ALSO - -ex5(1) - -http://gnustep.made-it.com/man-groff.html - -.SH BUGS - -No known bugs diff --git a/cmake/O2TargetManPage.cmake b/cmake/O2TargetManPage.cmake deleted file mode 100644 index 5d29447c52536..0000000000000 --- a/cmake/O2TargetManPage.cmake +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright 2019-2020 CERN and copyright holders of ALICE O2. -# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -# All rights not expressly granted are reserved. -# -# This software is distributed under the terms of the GNU General Public -# License v3 (GPL Version 3), copied verbatim in the file "COPYING". -# -# In applying this license CERN does not waive the privileges and immunities -# granted to it by virtue of its status as an Intergovernmental Organization -# or submit itself to any jurisdiction. - -include_guard() - -# Generate a man page -# -# Make sure we have nroff. If that is not the case we will not generate man -# pages -find_program(NROFF_FOUND nroff) - -function(o2_target_man_page target) - if(NOT NROFF_FOUND) - return() - endif() - cmake_parse_arguments(PARSE_ARGV - 1 - A - "" - "NAME;SECTION" - "") - - # check the target exists - if(NOT TARGET ${target}) - # try with out naming conventions - set(baseTargetName ${target}) - o2_name_target(${baseTargetName} NAME target) - if(NOT TARGET ${target}) - # not a library, maybe an executable ? - o2_name_target(${baseTargetName} NAME target IS_EXE) - if(NOT TARGET ${target}) - message(FATAL_ERROR "Target ${target} does not exist") - endif() - endif() - endif() - - if(NOT A_SECTION) - set(A_SECTION 1) - endif() - if(NOT A_NAME) - message( - FATAL_ERROR - "You must provide the name of the input man file in doc/.
.in" - ) - endif() - if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/doc/${A_NAME}.${A_SECTION}.in) - message( - FATAL_ERROR - "Input file ${CMAKE_CURRENT_SOURCE_DIR}/doc/${A_NAME}.${A_SECTION}.in does not exist" - ) - endif() - add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${A_NAME}.${A_SECTION} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/doc/${A_NAME}.${A_SECTION}.in - COMMAND nroff - -Tascii - -man - ${CMAKE_CURRENT_SOURCE_DIR}/doc/${A_NAME}.${A_SECTION}.in - > - ${CMAKE_CURRENT_BINARY_DIR}/${A_NAME}.${A_SECTION} - VERBATIM) - # the prefix man. for the target name avoids circular dependencies for the man - # pages added at top level. Simply droping the dependency for those does not - # invoke the custom command on all systems. - set(CUSTOM_TARGET_NAME man.${A_NAME}.${A_SECTION}) - add_custom_target(${CUSTOM_TARGET_NAME} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${A_NAME}.${A_SECTION}) - add_dependencies(${target} ${CUSTOM_TARGET_NAME}) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${A_NAME}.${A_SECTION} - DESTINATION ${CMAKE_INSTALL_DATADIR}/man/man${A_SECTION}) -endfunction() diff --git a/doc/CMakeInstructions.md b/doc/CMakeInstructions.md index c4f55140f611c..e0438c985c41d 100644 --- a/doc/CMakeInstructions.md +++ b/doc/CMakeInstructions.md @@ -349,24 +349,6 @@ Note as well that some (very few) tests are ran only for some configurations ctest -C RelWithDebInfo ``` -#### [Ex5](../Examples/Ex5) Adding a man page - -If a module provides one or more executables, it might be of interest for the users of those executables to have access to a man page for them. Ex5 illustates that use case. - - . - ├── CMakeLists.txt - ├── README.md - ├── doc - │   └── ex5.7.in - └── src - └── run.cxx - -The [man page](ManPages.md) is created using : - - o2_target_man_page([targetName] NAME ex5 SECTION 7) - -where `NAME xx` refers to a file `doc/xx.[SECTION].in`, and the actual `targetName` can be found from the base target name (ex5 in that case) using the [o2_name_target](../cmake/O2NameTarget.cmake) function. - ## CTest In the build directory of O2, if you launch the `ctest` command, all the O2 tests will be ran, which is not always what you want/need, in particular during development. diff --git a/doc/FairMQDevice.1.in b/doc/FairMQDevice.1.in deleted file mode 100644 index 4e33e3379ae82..0000000000000 --- a/doc/FairMQDevice.1.in +++ /dev/null @@ -1,64 +0,0 @@ -.\" Manpage for general FairMQ device properties and options. -.TH AliceO2 1 "10 Apr 2022" "1.1" "FairMQ device man page" -.SH NAME -fair::mq::Device - The basis for software devices in O2 - -.SH SYNOPSIS -.I deviceapp -.BI --id deviceid -.BI --mq-config " configfile" -| -.BI --channel-config " config" -| -.BI --config-json-string " arg" -| -.BI --config-xml-string " arg" - -.SH DESCRIPTION -All AliceO2 devices derive from fair::mq::Device which provides the -transport functionality. - -.SH OPTIONS -.SS Common FairMQ device options -.TP 2 -.BI --id " id" -A unique identifier of the device -.TP 2 -.BI --verbosity " arg " \fR(=DEBUG) -.RS -Verbosity level : -.B TRACE -.B DEBUG -.B RESULTS -.B INFO -.B WARN -.B ERROR -.B STATE -.B NOLOG -.RE - -.SS FairMQ device channel configuration -.TP 2 -.BI --config-xml-string " arg " -XML input as command line string. -.TP 2 -.BI --config-json-string " arg " -JSON input as command line string. -.TP 2 -.BI --mq-config " arg " -JSON/XML input as file. The configuration object will check xml or -json file extention and will call the json or xml parser accordingly -.TP 2 -.BI --channel-config " args " -channel configuration as comma separated key=\fIvalue\fR pairs -.RS -Valid Keys: -.B name -.B type -.B method -.B address -.B property -.RE - -.SH MORE OPTIONS -Use '\fIdeviceapp\fR --help' to get a full list of options. diff --git a/doc/ManPages.md b/doc/ManPages.md deleted file mode 100644 index 8fb7b48b07beb..0000000000000 --- a/doc/ManPages.md +++ /dev/null @@ -1,23 +0,0 @@ - - -# Man Pages - -You can create man pages in nroff format under: - - Subsystem/Module/docs/.
.in - -and it will create a man page for you in: - - ${CMAKE_BINARY_DIR}/stage/share/man/man
- -if you add: - - o2_target_man_page(target NAME SECTION
) - -to your `CMakeLists.txt`. Note the man page is "attached" to a given target. -If `SECTION` is omitted it will default to 1 -(executables). For more informantion about nroff format you can look at: - - http://www.linuxjournal.com/article/1158 diff --git a/doc/o2-timeframe-file-format.1.in b/doc/o2-timeframe-file-format.1.in deleted file mode 100644 index df36ff7256e33..0000000000000 --- a/doc/o2-timeframe-file-format.1.in +++ /dev/null @@ -1,27 +0,0 @@ -.\" Manpage for O2. -.TH man 1 "19 May 2017" "1.0" "Alice O2 Timeframe Format" - -.SH DESCRIPTION - -O2 is Alice next generation software framework to be used for RUN3. This is a -quick desctiption of the timeframe file format as dumped by -o2-timeframe-writer-device and read by the o2-timeframe-reader-device. - -The file format is simply a dump of the timeframe on disk. Multiple timeframes -can be concatenated resulting in a valid file. The format is as follow: - -o2tf: Timeframe [Timeframe [..]] -Timeframe: Subtimeframe [Subtimeframe [...]] TimeframeIndex -Subtimeframe: Header Payload -Header: DataHeader derived header stack -Payload: binary blob -TimeframeIndex: IndexElement [IndexElement [..]] -IndexElement: DataHeader Payload -Position in timeframe: int (4 bytes) -DataHeader: only the DataHeader part -Payload: binary blob - -.SH DISCLAIMER - -Notice that this file format is a work in progress and cannot be used for -anything but debugging purposes. diff --git a/doc/o2.1.in b/doc/o2.1.in deleted file mode 100644 index 57d74acf1640d..0000000000000 --- a/doc/o2.1.in +++ /dev/null @@ -1,19 +0,0 @@ -.\" Manpage for O2. -.TH man 1 "19 May 2017" "1.0" "Alice O2 man page" - -.SH NAME - -O2 is Alice next generation software framework to be used for RUN3. - -.SH DEVICES - -o2-alicehlt-wrapper-device(1), o2-subframebuilder-device(1) - -.\.SH TOOLS - -.SH SEE ALSO -FairMQDevice(1) - -.SH BUGS - -No bugs whatsoever From b41a2a1e13cd537654c05d5dedb25b9c63e473da Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 22 May 2025 14:44:22 +0200 Subject: [PATCH 0065/1426] Update GPU documentation build-standalone.md --- GPU/documentation/build-standalone.md | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/GPU/documentation/build-standalone.md b/GPU/documentation/build-standalone.md index 891d16b4dc2c4..bf84fd3edf0fb 100644 --- a/GPU/documentation/build-standalone.md +++ b/GPU/documentation/build-standalone.md @@ -55,9 +55,20 @@ An example line would .e.g. be ``` Some other noteworthy options are `--display` to run the GPU event display, `--qa` to run a QA task on MC data, `--runs` and `--runs2` to run multiple iterations of the benchmark, `--printSettings` to print all the settings that were used, `--memoryStat` to print memory statistics, `--sync` to run with settings for online reco, `--syncAsync` to run online reco first, and then offline reco on the produced TPC CTF data, `--setO2Settings` to use some defaults as they are in O2 not in the standalone version, `--PROCdoublePipeline` to enable the double-threaded pipeline for best performance (works only with multiple iterations, and not in async mode), and `--RTCenable` to enable the run time compilation improvements (check also `--RTCcacheOutput`). -An example for a benchmark in online mode would be: +With `--memSize` you can control the amount of GPU memory to use, and with `--inputMemory` and `--outputMemory` GPU-registered input/output buffers can be preallocated (as is the SHM memory when running in O2). +An example for a benchmark that runs with the same settings as in online data taking would be: ``` -./ca -e o2-pbpb-100 -g --sync --setO2Settings --PROCdoublePipeline --RTCenable --runs 10 +./ca -e o2-pbpb-100 -g --gpuType HIP --sync --setO2Settings --PROCdoublePipeline --RTCenable --runs 10 --memSize 15000000000 --inputMemory 6000000000 --outputMemory 10000000000 +``` + +For setting a GPU device, you can use the `--gpuDevice` option with the GPU index. +For ROCm with many GPUs, however, like on the EPNs with 8 GPUs, it is better to set the `ROCR_VISIBLE_DEVICES` env variable to the GPU you want to use. +MAKE SURE TO CHECK IF IT IS ALREADY SET BY SLURM WHEN YOU GET THE NODE!!! IN THAT CASE, USE ONLY THE GPUS ASSIGNED TO YOU BY SLURM! + +Finally, also NUMA pinning can play a role. On the EPN, you should use memory and GPUs and CPU cores from the same NUMA domain. +For a reaslistic benchmark using GPU 0 on the EPNs, please use: +``` +ROCR_VISIBLE_DEVICES=0 numactl --membind 0 --cpunodebind 0 ./ca -e o2-pbpb-100 --gpuType HIP --memSize 15000000000 --inputMemory 6000000000 --outputMemory 10000000000 --sync --runs 10 --RTCenable --setO2Settings --PROCdoublePipeline ``` # Generating a dataset @@ -84,3 +95,5 @@ To dump standalone data from CTF raw data in `myctf.root`, you can use the same ``` CTFINPUT=1 INPUT_FILE_LIST=myctf.root CONFIG_EXTRA_PROCESS_o2_gpu_reco_workflow="GPU_global.dump=1;" WORKFLOW_DETECTORS=TPC SHMSIZE=16000000000 $O2_ROOT/prodtests/full-system-test/dpl-workflow.sh ``` + +On the EPNs, you can find some reference data sets at `/home/drohr/standalone/events`. From 010f8676d1c1e4849281bc18405192dea0c9bc58 Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Wed, 21 May 2025 09:47:17 +0200 Subject: [PATCH 0066/1426] dpl-workflow.sh: using MI100 serialization workaround by default again, with option to disable it with env var --- prodtests/full-system-test/dpl-workflow.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index 4e6cbbebe7db7..dd575099857f4 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -270,6 +270,7 @@ if [[ $GPUTYPE == "HIP" ]]; then TIMESLICEOFFSET=$(($GPU_FIRST_ID + ($NUMAGPUIDS != 0 ? ($NGPUS * $NUMAID) : 0))) GPU_CONFIG+=" --environment \"ROCR_VISIBLE_DEVICES={timeslice${TIMESLICEOFFSET}}\"" fi + [[ $EPNSYNCMODE == 1 || ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]] && [[ ${EPN_NODE_MI100:-} == "1" ]] && [[ ${DISABLE_MI100_SERIALIZATION:-0} != 1 ]] && GPU_CONFIG_KEY+="GPU_proc.serializeGPU=3;" #export HSA_TOOLS_LIB=/opt/rocm/lib/librocm-debug-agent.so.2 else GPU_CONFIG_KEY+="GPU_proc.deviceNum=-2;" From b6f15f87e212a896e8c56f3cf475d1ef2c677889 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 22 May 2025 13:53:13 +0200 Subject: [PATCH 0067/1426] GPU RTC: Add keepTempFiles option --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu | 6 ++++-- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index c8e5420a8bcf3..907bd08779ec2 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -420,8 +420,10 @@ void GPUReconstructionCUDA::genAndLoadRTC() mInternals->kernelModules.emplace_back(std::make_unique()); GPUChkErr(cuModuleLoad(mInternals->kernelModules.back().get(), (filename + "_" + std::to_string(i) + mRtcBinExtension).c_str())); } - remove((filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str()); - remove((filename + "_" + std::to_string(i) + mRtcBinExtension).c_str()); + if (!GetProcessingSettings().rtctech.keepTempFiles) { + remove((filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str()); + remove((filename + "_" + std::to_string(i) + mRtcBinExtension).c_str()); + } } if (GetProcessingSettings().rtctech.runTest == 2) { return; diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 238994ee53af5..8dabd17f95d23 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -233,6 +233,7 @@ AddOption(cacheFolder, std::string, "./rtccache/", "", 0, "Folder in which the c AddOption(prependCommand, std::string, "", "", 0, "Prepend RTC compilation commands by this string") AddOption(overrideArchitecture, std::string, "", "", 0, "Override arhcitecture part of RTC compilation command line") // Part of cmdLine, so checked against the cache AddOption(loadLaunchBoundsFromFile, std::string, "", "", 0, "Load a parameter object containing the launch bounds from a file") +AddOption(keepTempFiles, bool, false, "", 0, "Keep temporary source and object files") AddHelp("help", 'h') EndConfig() From e060099977dac22c9eea41bc27234e45ee590a27 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 22 May 2025 10:16:48 +0200 Subject: [PATCH 0068/1426] GPU: Fix CUDA GetMemInfo must use correct device and simplify context creation / cleanup --- .../Base/cuda/GPUReconstructionCUDA.cu | 35 +++++++------------ 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 907bd08779ec2..0d5666b8ee790 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -125,34 +125,25 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } std::vector devicesOK(count, false); std::vector devMemory(count, 0); - bool contextCreated = false; + std::vector contextCreated(count, false); for (int32_t i = 0; i < count; i++) { if (GetProcessingSettings().debugLevel >= 4) { GPUInfo("Examining device %d", i); } size_t free, total; -#ifndef __HIPCC__ // CUDA - if (GPUChkErrI(cudaInitDevice(i, 0, 0))) { -#else // HIP - if (GPUChkErrI(hipSetDevice(i))) { -#endif + if (GPUChkErrI(cudaSetDevice(i))) { if (GetProcessingSettings().debugLevel >= 4) { GPUWarning("Couldn't create context for device %d. Skipping it.", i); } continue; } - contextCreated = true; + contextCreated[i] = true; if (GPUChkErrI(cudaMemGetInfo(&free, &total))) { if (GetProcessingSettings().debugLevel >= 4) { GPUWarning("Error obtaining CUDA memory info about device %d! Skipping it.", i); } - GPUChkErr(cudaDeviceReset()); continue; } - if (count > 1) { - GPUChkErr(cudaDeviceReset()); - contextCreated = false; - } if (GetProcessingSettings().debugLevel >= 4) { GPUInfo("Obtained current memory usage for device %d", i); } @@ -212,13 +203,20 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() bestDevice = GetProcessingSettings().deviceNum; } } - if (noDevice) { - if (contextCreated) { + for (int32_t i = 0; i < count; i++) { + if (contextCreated[i] && (noDevice || i != bestDevice)) { + GPUChkErrI(cudaSetDevice(i)); GPUChkErrI(cudaDeviceReset()); } + } + if (noDevice) { return (1); } mDeviceId = bestDevice; + if (GPUChkErrI(cudaSetDevice(mDeviceId))) { + GPUError("Could not set CUDA Device!"); + return (1); + } GPUChkErrI(cudaGetDeviceProperties(&deviceProp, mDeviceId)); @@ -262,15 +260,6 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } #endif -#ifndef __HIPCC__ // CUDA - if (contextCreated == 0 && GPUChkErrI(cudaInitDevice(mDeviceId, 0, 0))) { -#else // HIP - if (contextCreated == 0 && GPUChkErrI(hipSetDevice(mDeviceId))) { -#endif - GPUError("Could not set CUDA Device!"); - return (1); - } - #ifndef __HIPCC__ // CUDA if (GPUChkErrI(cudaDeviceSetLimit(cudaLimitStackSize, GPUCA_GPU_STACK_SIZE))) { GPUError("Error setting CUDA stack size"); From 52937edaf56f75f1e347141e4105a31a95c55fc9 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 22 May 2025 13:43:20 +0200 Subject: [PATCH 0069/1426] GPU Display: Store pointer to GPUSettingsProcessing, so we do not need to copy debugLevel to GPUParam --- .../Base/GPUReconstructionTimeframe.cxx | 3 +-- .../Interface/GPUO2InterfaceDisplay.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 2 +- GPU/GPUTracking/display/GPUDisplay.cxx | 18 ++++++++++-------- GPU/GPUTracking/display/GPUDisplay.h | 8 +++++++- .../display/GPUDisplayInterface.cxx | 4 ++-- GPU/GPUTracking/display/GPUDisplayInterface.h | 14 ++++++++++++-- .../backend/GPUDisplayBackendOpenGL.cxx | 2 +- .../backend/GPUDisplayBackendVulkan.cxx | 8 ++++---- .../frontend/GPUDisplayFrontendWayland.cxx | 6 +++--- .../display/helpers/GPUDisplayLoader.cxx | 15 +++++++++++++-- .../display/render/GPUDisplayDraw.cxx | 2 +- GPU/GPUTracking/qa/GPUQA.cxx | 2 +- GPU/GPUTracking/qa/genEvents.cxx | 3 +-- 14 files changed, 58 insertions(+), 31 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx index b25b93e957b15..fefcd0ac925fe 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx @@ -25,14 +25,13 @@ #include "TPCFastTransform.h" #include "CorrectionMapsHelper.h" #include "GPUO2DataTypes.h" +#include "GPUSettings.h" #include #include #include #include -#include "utils/qconfig.h" - using namespace o2::gpu; namespace o2::gpu diff --git a/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx b/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx index f84f29d826f1d..60d5eaf9ae162 100644 --- a/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx +++ b/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx @@ -35,7 +35,7 @@ GPUO2InterfaceDisplay::GPUO2InterfaceDisplay(const GPUO2InterfaceConfiguration* mQA.reset(new GPUQA(nullptr, &config->configQA, mParam.get())); mQA->InitO2MCData(); } - mDisplay.reset(GPUDisplayInterface::getDisplay(mFrontend.get(), nullptr, mQA.get(), mParam.get(), &mConfig->configCalib, &mConfig->configDisplay)); + mDisplay.reset(GPUDisplayInterface::getDisplay(mFrontend.get(), nullptr, mQA.get(), mParam.get(), &mConfig->configCalib, &mConfig->configDisplay, &mConfig->configProcessing)); } GPUO2InterfaceDisplay::~GPUO2InterfaceDisplay() = default; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index aed42e4f98f0c..9fb12432e763a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -42,7 +42,7 @@ #include "GPUGetConstexpr.h" #ifdef GPUCA_CADEBUG_ENABLED -#include "../utils/qconfig.h" +#include "GPUSettings.h" #include "AliHLTTPCClusterMCData.h" #endif diff --git a/GPU/GPUTracking/display/GPUDisplay.cxx b/GPU/GPUTracking/display/GPUDisplay.cxx index 5b0960919da15..136b1947f60ee 100644 --- a/GPU/GPUTracking/display/GPUDisplay.cxx +++ b/GPU/GPUTracking/display/GPUDisplay.cxx @@ -34,7 +34,7 @@ #include "GPUTPCTracker.h" #include "GPUTPCGMMergedTrack.h" #include "GPUO2DataTypes.h" -#include "utils/qconfig.h" +#include "GPUSettings.h" #include "frontend/GPUDisplayFrontend.h" #include "backend/GPUDisplayBackend.h" @@ -44,17 +44,19 @@ constexpr hmm_mat4 MY_HMM_IDENTITY = {{{1, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 1, 0}, using namespace o2::gpu; -static const GPUSettingsDisplay& GPUDisplay_GetConfig(GPUChainTracking* chain) +const GPUSettingsDisplay& GPUDisplay::GetConfig(GPUChainTracking* chain) { static GPUSettingsDisplay defaultConfig; - if (chain && chain->mConfigDisplay) { - return *chain->mConfigDisplay; - } else { - return defaultConfig; - } + return (chain && chain->mConfigDisplay) ? *chain->mConfigDisplay : defaultConfig; +} + +const GPUSettingsProcessing& GPUDisplay::GetProcessingConfig(GPUChainTracking* chain) +{ + static GPUSettingsProcessing defaultConfig; + return chain ? chain->GetProcessingSettings() : defaultConfig; } -GPUDisplay::GPUDisplay(GPUDisplayFrontend* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param, const GPUCalibObjectsConst* calib, const GPUSettingsDisplay* config) : GPUDisplayInterface(), mFrontend(frontend), mChain(chain), mConfig(config ? *config : GPUDisplay_GetConfig(chain)), mQA(qa) +GPUDisplay::GPUDisplay(GPUDisplayFrontend* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param, const GPUCalibObjectsConst* calib, const GPUSettingsDisplay* config, const GPUSettingsProcessing* proc) : GPUDisplayInterface(), mFrontend(frontend), mChain(chain), mConfig(config ? *config : GetConfig(chain)), mProcessingSettings(proc ? *proc : GetProcessingConfig(chain)), mQA(qa) { mParam = param ? param : &mChain->GetParam(); mCalib = calib; diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index dbd90020698b2..06977c26e0b63 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -18,6 +18,7 @@ #include "frontend/GPUDisplayFrontend.h" #include "backend/GPUDisplayBackend.h" #include "GPUDisplayInterface.h" +#include "GPUSettings.h" #include "../utils/vecpod.h" #include "../utils/qsem.h" @@ -37,7 +38,7 @@ class GPUTRDGeometry; class GPUDisplay : public GPUDisplayInterface { public: - GPUDisplay(GPUDisplayFrontend* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param = nullptr, const GPUCalibObjectsConst* calib = nullptr, const GPUSettingsDisplay* config = nullptr); + GPUDisplay(GPUDisplayFrontend* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param = nullptr, const GPUCalibObjectsConst* calib = nullptr, const GPUSettingsDisplay* config = nullptr, const GPUSettingsProcessing* proc = nullptr); GPUDisplay(const GPUDisplay&) = delete; ~GPUDisplay() override = default; @@ -71,6 +72,7 @@ class GPUDisplay : public GPUDisplayInterface }; vecpod* vertexBuffer() { return mVertexBuffer; } const GPUParam* param() { return mParam; } + const GPUSettingsProcessing& GetProcessingSettings() const { return mProcessingSettings; } GPUDisplayFrontend* frontend() { return mFrontend; } bool drawTextInCompatMode() const { return mDrawTextInCompatMode; } int32_t& drawTextFontSize() { return mDrawTextFontSize; } @@ -140,6 +142,9 @@ class GPUDisplay : public GPUDisplayInterface bool mVerbose = false; }; + static const GPUSettingsDisplay& GetConfig(GPUChainTracking* chain); + static const GPUSettingsProcessing& GetProcessingConfig(GPUChainTracking* chain); + void DrawGLScene_internal(float animateTime = -1.f, bool renderToMixBuffer = false); void DrawGLScene_updateEventData(); void DrawGLScene_cameraAndAnimation(float animateTime, float& mixSlaveImage, hmm_mat4& nextViewMatrix); @@ -214,6 +219,7 @@ class GPUDisplay : public GPUDisplayInterface GPUSettingsDisplayLight mCfgL; GPUSettingsDisplayHeavy mCfgH; GPUSettingsDisplayRenderer mCfgR; + const GPUSettingsProcessing& mProcessingSettings; GPUQA* mQA; qSem mSemLockDisplay; diff --git a/GPU/GPUTracking/display/GPUDisplayInterface.cxx b/GPU/GPUTracking/display/GPUDisplayInterface.cxx index 2f5cc9cbb5dd5..2eddef998fa8b 100644 --- a/GPU/GPUTracking/display/GPUDisplayInterface.cxx +++ b/GPU/GPUTracking/display/GPUDisplayInterface.cxx @@ -65,9 +65,9 @@ static void* loadUnloadLib(bool load) return nullptr; } -GPUDisplayInterface* GPUDisplayInterface::getDisplay(GPUDisplayFrontendInterface* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param, const GPUCalibObjectsConst* calib, const GPUSettingsDisplay* config) +GPUDisplayInterface* GPUDisplayInterface::getDisplay(GPUDisplayFrontendInterface* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param, const GPUCalibObjectsConst* calib, const GPUSettingsDisplay* config, const GPUSettingsProcessing* proc) { - std::tuple args = {frontend, chain, qa, param, calib, config}; + std::tuple args = {frontend, chain, qa, param, calib, config, proc}; auto func = (GPUDisplayInterface * (*)(const char*, void*)) loadUnloadLib(true); return func ? func("display", &args) : nullptr; } diff --git a/GPU/GPUTracking/display/GPUDisplayInterface.h b/GPU/GPUTracking/display/GPUDisplayInterface.h index 3c6928c78e5a1..574a8cffc71f0 100644 --- a/GPU/GPUTracking/display/GPUDisplayInterface.h +++ b/GPU/GPUTracking/display/GPUDisplayInterface.h @@ -15,7 +15,7 @@ #ifndef GPUDISPLAYINTERFACE_H #define GPUDISPLAYINTERFACE_H -#include "GPUSettings.h" +#include namespace o2::gpu { @@ -23,6 +23,16 @@ namespace o2::gpu class GPUChainTracking; class GPUQA; struct GPUParam; +struct GPUTrackingInOutPointers; +template +struct ConstPtr; +template